Browse Source

Revision 0.6 with myriad-groestl and jackpot coin

2upstream
Christian Buchner 10 years ago
parent
commit
07cbafe96c
  1. 0
      JHA/.deps/.dirstamp
  2. 0
      JHA/.dirstamp
  3. 572
      JHA/cuda_jha_keccak512.cu
  4. 173
      JHA/jackpotcoin.cu
  5. 622
      cuda_myriadgroestl.cu
  6. 106
      myriadgroestl.cpp
  7. 0
      quark/.deps/.dirstamp
  8. 0
      quark/.dirstamp
  9. 107
      quark/cuda_quark_checkhash.cu
  10. 392
      sph/aes_helper.c
  11. 0
      sph/blake.c
  12. 957
      sph/bmw.c
  13. 723
      sph/cubehash.c
  14. 1031
      sph/echo.c
  15. 0
      sph/fugue.c
  16. 0
      sph/groestl.c
  17. 1107
      sph/jh.c
  18. 0
      sph/keccak.c
  19. 1426
      sph/luffa.c
  20. 1764
      sph/shavite.c
  21. 1799
      sph/simd.c
  22. 1244
      sph/skein.c
  23. 0
      sph/sph_blake.h
  24. 320
      sph/sph_bmw.h
  25. 292
      sph/sph_cubehash.h
  26. 320
      sph/sph_echo.h
  27. 0
      sph/sph_fugue.h
  28. 0
      sph/sph_groestl.h
  29. 290
      sph/sph_jh.h
  30. 0
      sph/sph_keccak.h
  31. 296
      sph/sph_luffa.h
  32. 314
      sph/sph_shavite.h
  33. 309
      sph/sph_simd.h
  34. 290
      sph/sph_skein.h
  35. 0
      sph/sph_types.h

0
JHA/.deps/.dirstamp vendored

0
JHA/.dirstamp

572
JHA/cuda_jha_keccak512.cu

@ -0,0 +1,572 @@ @@ -0,0 +1,572 @@
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint64_t c_State[25];
__constant__ uint32_t c_PaddedMessage[18];
static __device__ uint32_t cuda_swab32(uint32_t x)
{
return __byte_perm(x, 0, 0x0123);
}
// diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt
#if __CUDA_ARCH__ >= 350
__forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offset) {
uint2 result;
if(offset >= 32) {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
} else {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
}
return __double_as_longlong(__hiloint2double(result.y, result.x));
}
#else
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
#define U32TO64_LE(p) \
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))
#define U64TO32_LE(p, v) \
*p = (uint32_t)((v)); *(p+1) = (uint32_t)((v) >> 32);
static const uint64_t host_keccak_round_constants[24] = {
0x0000000000000001ull, 0x0000000000008082ull,
0x800000000000808aull, 0x8000000080008000ull,
0x000000000000808bull, 0x0000000080000001ull,
0x8000000080008081ull, 0x8000000000008009ull,
0x000000000000008aull, 0x0000000000000088ull,
0x0000000080008009ull, 0x000000008000000aull,
0x000000008000808bull, 0x800000000000008bull,
0x8000000000008089ull, 0x8000000000008003ull,
0x8000000000008002ull, 0x8000000000000080ull,
0x000000000000800aull, 0x800000008000000aull,
0x8000000080008081ull, 0x8000000000008080ull,
0x0000000080000001ull, 0x8000000080008008ull
};
__constant__ uint64_t c_keccak_round_constants[24];
static __device__ __forceinline__ void
keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_constants) {
size_t i;
uint64_t t[5], u[5], v, w;
/* absorb input */
#pragma unroll 9
for (i = 0; i < 72 / 8; i++, in += 2)
s[i] ^= U32TO64_LE(in);
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
u[0] = t[4] ^ ROTL64(t[1], 1);
u[1] = t[0] ^ ROTL64(t[2], 1);
u[2] = t[1] ^ ROTL64(t[3], 1);
u[3] = t[2] ^ ROTL64(t[4], 1);
u[4] = t[3] ^ ROTL64(t[0], 1);
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[ 1];
s[ 1] = ROTL64(s[ 6], 44);
s[ 6] = ROTL64(s[ 9], 20);
s[ 9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[ 2], 62);
s[ 2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[ 4], 27);
s[ 4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[ 8], 55);
s[ 8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[ 5], 36);
s[ 5] = ROTL64(s[ 3], 28);
s[ 3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[ 7], 6);
s[ 7] = ROTL64(s[10], 3);
s[10] = ROTL64( v, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
}
__global__ void jackpot_keccak512_gpu_hash_88(int threads, uint32_t startNounce, uint64_t *g_hash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
int hashPosition = nounce - startNounce;
// Nachricht kopieren
uint32_t message[18];
#pragma unroll 18
for(int i=0;i<18;i++)
message[i] = c_PaddedMessage[i];
// die individuelle Nounce einsetzen
message[1] = cuda_swab32(nounce);
// State initialisieren
uint64_t keccak_gpu_state[25];
#pragma unroll 25
for (int i=0; i<25; i++)
keccak_gpu_state[i] = c_State[i];
// den Block einmal gut durchschütteln
keccak_block(keccak_gpu_state, message, c_keccak_round_constants);
// das Hash erzeugen
uint32_t hash[16];
#pragma unroll 8
for (size_t i = 0; i < 64; i += 8) {
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
}
// fertig
uint32_t *outpHash = (uint32_t*)&g_hash[8 * hashPosition];
#pragma unroll 16
for(int i=0;i<16;i++)
outpHash[i] = hash[i];
}
}
// Setup-Funktionen
__host__ void jackpot_keccak512_cpu_init(int thr_id, int threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( c_keccak_round_constants,
host_keccak_round_constants,
sizeof(host_keccak_round_constants),
0, cudaMemcpyHostToDevice);
}
#define cKeccakB 1600
#define cKeccakR 576
#define cKeccakR_SizeInBytes (cKeccakR / 8)
#define crypto_hash_BYTES 64
#if (cKeccakB == 1600)
typedef unsigned long long UINT64;
typedef UINT64 tKeccakLane;
#define cKeccakNumberOfRounds 24
#endif
#define cKeccakLaneSizeInBits (sizeof(tKeccakLane) * 8)
#define ROL(a, offset) ((((tKeccakLane)a) << ((offset) % cKeccakLaneSizeInBits)) ^ (((tKeccakLane)a) >> (cKeccakLaneSizeInBits-((offset) % cKeccakLaneSizeInBits))))
#if ((cKeccakB/25) == 8)
#define ROL_mult8(a, offset) ((tKeccakLane)a)
#else
#define ROL_mult8(a, offset) ROL(a, offset)
#endif
void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount );
const tKeccakLane KeccakF_RoundConstants[cKeccakNumberOfRounds] =
{
(tKeccakLane)0x0000000000000001ULL,
(tKeccakLane)0x0000000000008082ULL,
(tKeccakLane)0x800000000000808aULL,
(tKeccakLane)0x8000000080008000ULL,
(tKeccakLane)0x000000000000808bULL,
(tKeccakLane)0x0000000080000001ULL,
(tKeccakLane)0x8000000080008081ULL,
(tKeccakLane)0x8000000000008009ULL,
(tKeccakLane)0x000000000000008aULL,
(tKeccakLane)0x0000000000000088ULL,
(tKeccakLane)0x0000000080008009ULL,
(tKeccakLane)0x000000008000000aULL,
(tKeccakLane)0x000000008000808bULL,
(tKeccakLane)0x800000000000008bULL,
(tKeccakLane)0x8000000000008089ULL,
(tKeccakLane)0x8000000000008003ULL,
(tKeccakLane)0x8000000000008002ULL,
(tKeccakLane)0x8000000000000080ULL
#if (cKeccakB >= 400)
, (tKeccakLane)0x000000000000800aULL,
(tKeccakLane)0x800000008000000aULL
#if (cKeccakB >= 800)
, (tKeccakLane)0x8000000080008081ULL,
(tKeccakLane)0x8000000000008080ULL
#if (cKeccakB == 1600)
, (tKeccakLane)0x0000000080000001ULL,
(tKeccakLane)0x8000000080008008ULL
#endif
#endif
#endif
};
void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount )
{
{
while ( --laneCount >= 0 )
{
state[laneCount] ^= in[laneCount];
}
}
{
tKeccakLane Aba, Abe, Abi, Abo, Abu;
tKeccakLane Aga, Age, Agi, Ago, Agu;
tKeccakLane Aka, Ake, Aki, Ako, Aku;
tKeccakLane Ama, Ame, Ami, Amo, Amu;
tKeccakLane Asa, Ase, Asi, Aso, Asu;
tKeccakLane BCa, BCe, BCi, BCo, BCu;
tKeccakLane Da, De, Di, Do, Du;
tKeccakLane Eba, Ebe, Ebi, Ebo, Ebu;
tKeccakLane Ega, Ege, Egi, Ego, Egu;
tKeccakLane Eka, Eke, Eki, Eko, Eku;
tKeccakLane Ema, Eme, Emi, Emo, Emu;
tKeccakLane Esa, Ese, Esi, Eso, Esu;
#define round laneCount
//copyFromState(A, state)
Aba = state[ 0];
Abe = state[ 1];
Abi = state[ 2];
Abo = state[ 3];
Abu = state[ 4];
Aga = state[ 5];
Age = state[ 6];
Agi = state[ 7];
Ago = state[ 8];
Agu = state[ 9];
Aka = state[10];
Ake = state[11];
Aki = state[12];
Ako = state[13];
Aku = state[14];
Ama = state[15];
Ame = state[16];
Ami = state[17];
Amo = state[18];
Amu = state[19];
Asa = state[20];
Ase = state[21];
Asi = state[22];
Aso = state[23];
Asu = state[24];
for( round = 0; round < cKeccakNumberOfRounds; round += 2 )
{
// prepareTheta
BCa = Aba^Aga^Aka^Ama^Asa;
BCe = Abe^Age^Ake^Ame^Ase;
BCi = Abi^Agi^Aki^Ami^Asi;
BCo = Abo^Ago^Ako^Amo^Aso;
BCu = Abu^Agu^Aku^Amu^Asu;
//thetaRhoPiChiIotaPrepareTheta(round , A, E)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Aba ^= Da;
BCa = Aba;
Age ^= De;
BCe = ROL(Age, 44);
Aki ^= Di;
BCi = ROL(Aki, 43);
Amo ^= Do;
BCo = ROL(Amo, 21);
Asu ^= Du;
BCu = ROL(Asu, 14);
Eba = BCa ^((~BCe)& BCi );
Eba ^= (tKeccakLane)KeccakF_RoundConstants[round];
Ebe = BCe ^((~BCi)& BCo );
Ebi = BCi ^((~BCo)& BCu );
Ebo = BCo ^((~BCu)& BCa );
Ebu = BCu ^((~BCa)& BCe );
Abo ^= Do;
BCa = ROL(Abo, 28);
Agu ^= Du;
BCe = ROL(Agu, 20);
Aka ^= Da;
BCi = ROL(Aka, 3);
Ame ^= De;
BCo = ROL(Ame, 45);
Asi ^= Di;
BCu = ROL(Asi, 61);
Ega = BCa ^((~BCe)& BCi );
Ege = BCe ^((~BCi)& BCo );
Egi = BCi ^((~BCo)& BCu );
Ego = BCo ^((~BCu)& BCa );
Egu = BCu ^((~BCa)& BCe );
Abe ^= De;
BCa = ROL(Abe, 1);
Agi ^= Di;
BCe = ROL(Agi, 6);
Ako ^= Do;
BCi = ROL(Ako, 25);
Amu ^= Du;
BCo = ROL_mult8(Amu, 8);
Asa ^= Da;
BCu = ROL(Asa, 18);
Eka = BCa ^((~BCe)& BCi );
Eke = BCe ^((~BCi)& BCo );
Eki = BCi ^((~BCo)& BCu );
Eko = BCo ^((~BCu)& BCa );
Eku = BCu ^((~BCa)& BCe );
Abu ^= Du;
BCa = ROL(Abu, 27);
Aga ^= Da;
BCe = ROL(Aga, 36);
Ake ^= De;
BCi = ROL(Ake, 10);
Ami ^= Di;
BCo = ROL(Ami, 15);
Aso ^= Do;
BCu = ROL_mult8(Aso, 56);
Ema = BCa ^((~BCe)& BCi );
Eme = BCe ^((~BCi)& BCo );
Emi = BCi ^((~BCo)& BCu );
Emo = BCo ^((~BCu)& BCa );
Emu = BCu ^((~BCa)& BCe );
Abi ^= Di;
BCa = ROL(Abi, 62);
Ago ^= Do;
BCe = ROL(Ago, 55);
Aku ^= Du;
BCi = ROL(Aku, 39);
Ama ^= Da;
BCo = ROL(Ama, 41);
Ase ^= De;
BCu = ROL(Ase, 2);
Esa = BCa ^((~BCe)& BCi );
Ese = BCe ^((~BCi)& BCo );
Esi = BCi ^((~BCo)& BCu );
Eso = BCo ^((~BCu)& BCa );
Esu = BCu ^((~BCa)& BCe );
// prepareTheta
BCa = Eba^Ega^Eka^Ema^Esa;
BCe = Ebe^Ege^Eke^Eme^Ese;
BCi = Ebi^Egi^Eki^Emi^Esi;
BCo = Ebo^Ego^Eko^Emo^Eso;
BCu = Ebu^Egu^Eku^Emu^Esu;
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Eba ^= Da;
BCa = Eba;
Ege ^= De;
BCe = ROL(Ege, 44);
Eki ^= Di;
BCi = ROL(Eki, 43);
Emo ^= Do;
BCo = ROL(Emo, 21);
Esu ^= Du;
BCu = ROL(Esu, 14);
Aba = BCa ^((~BCe)& BCi );
Aba ^= (tKeccakLane)KeccakF_RoundConstants[round+1];
Abe = BCe ^((~BCi)& BCo );
Abi = BCi ^((~BCo)& BCu );
Abo = BCo ^((~BCu)& BCa );
Abu = BCu ^((~BCa)& BCe );
Ebo ^= Do;
BCa = ROL(Ebo, 28);
Egu ^= Du;
BCe = ROL(Egu, 20);
Eka ^= Da;
BCi = ROL(Eka, 3);
Eme ^= De;
BCo = ROL(Eme, 45);
Esi ^= Di;
BCu = ROL(Esi, 61);
Aga = BCa ^((~BCe)& BCi );
Age = BCe ^((~BCi)& BCo );
Agi = BCi ^((~BCo)& BCu );
Ago = BCo ^((~BCu)& BCa );
Agu = BCu ^((~BCa)& BCe );
Ebe ^= De;
BCa = ROL(Ebe, 1);
Egi ^= Di;
BCe = ROL(Egi, 6);
Eko ^= Do;
BCi = ROL(Eko, 25);
Emu ^= Du;
BCo = ROL_mult8(Emu, 8);
Esa ^= Da;
BCu = ROL(Esa, 18);
Aka = BCa ^((~BCe)& BCi );
Ake = BCe ^((~BCi)& BCo );
Aki = BCi ^((~BCo)& BCu );
Ako = BCo ^((~BCu)& BCa );
Aku = BCu ^((~BCa)& BCe );
Ebu ^= Du;
BCa = ROL(Ebu, 27);
Ega ^= Da;
BCe = ROL(Ega, 36);
Eke ^= De;
BCi = ROL(Eke, 10);
Emi ^= Di;
BCo = ROL(Emi, 15);
Eso ^= Do;
BCu = ROL_mult8(Eso, 56);
Ama = BCa ^((~BCe)& BCi );
Ame = BCe ^((~BCi)& BCo );
Ami = BCi ^((~BCo)& BCu );
Amo = BCo ^((~BCu)& BCa );
Amu = BCu ^((~BCa)& BCe );
Ebi ^= Di;
BCa = ROL(Ebi, 62);
Ego ^= Do;
BCe = ROL(Ego, 55);
Eku ^= Du;
BCi = ROL(Eku, 39);
Ema ^= Da;
BCo = ROL(Ema, 41);
Ese ^= De;
BCu = ROL(Ese, 2);
Asa = BCa ^((~BCe)& BCi );
Ase = BCe ^((~BCi)& BCo );
Asi = BCi ^((~BCo)& BCu );
Aso = BCo ^((~BCu)& BCa );
Asu = BCu ^((~BCa)& BCe );
}
//copyToState(state, A)
state[ 0] = Aba;
state[ 1] = Abe;
state[ 2] = Abi;
state[ 3] = Abo;
state[ 4] = Abu;
state[ 5] = Aga;
state[ 6] = Age;
state[ 7] = Agi;
state[ 8] = Ago;
state[ 9] = Agu;
state[10] = Aka;
state[11] = Ake;
state[12] = Aki;
state[13] = Ako;
state[14] = Aku;
state[15] = Ama;
state[16] = Ame;
state[17] = Ami;
state[18] = Amo;
state[19] = Amu;
state[20] = Asa;
state[21] = Ase;
state[22] = Asi;
state[23] = Aso;
state[24] = Asu;
#undef round
}
}
__host__ void jackpot_keccak512_cpu_setBlock_88(void *pdata)
{
unsigned long long inlen = 88;
const unsigned char *in = (const unsigned char*)pdata;
tKeccakLane state[5 * 5];
unsigned char temp[cKeccakR_SizeInBytes];
memset( state, 0, sizeof(state) );
for ( /* empty */; inlen >= cKeccakR_SizeInBytes; inlen -= cKeccakR_SizeInBytes, in += cKeccakR_SizeInBytes )
{
KeccakF( state, (const tKeccakLane*)in, cKeccakR_SizeInBytes / sizeof(tKeccakLane) );
}
// Kopiere den state nach der ersten Runde (nach Absorption von 72 Bytes Inputdaten)
// ins Constant Memory
cudaMemcpyToSymbol( c_State,
state,
sizeof(state),
0, cudaMemcpyHostToDevice);
// padding
memcpy( temp, in, (size_t)inlen );
temp[inlen++] = 1;
memset( temp+inlen, 0, cKeccakR_SizeInBytes - (size_t)inlen );
temp[cKeccakR_SizeInBytes-1] |= 0x80;
// Kopiere den Rest der Message und das Padding ins Constant Memory
cudaMemcpyToSymbol( c_PaddedMessage,
temp,
cKeccakR_SizeInBytes,
0, cudaMemcpyHostToDevice);
}
__host__ void jackpot_keccak512_cpu_hash_88(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
jackpot_keccak512_gpu_hash_88<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
MyStreamSynchronize(NULL, order, thr_id);
}

173
JHA/jackpotcoin.cu

@ -0,0 +1,173 @@ @@ -0,0 +1,173 @@
extern "C"
{
#include "sph/sph_keccak.h"
#include "sph/sph_blake.h"
#include "sph/sph_groestl.h"
#include "sph/sph_jh.h"
#include "sph/sph_skein.h"
}
#include "miner.h"
#include <stdint.h>
// aus cpu-miner.c
extern int device_map[8];
extern bool opt_benchmark;
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8];
extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
extern void jackpot_keccak512_cpu_setBlock_88(void *pdata);
extern void jackpot_keccak512_cpu_hash_88(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_check_cpu_init(int thr_id, int threads);
extern void quark_check_cpu_setTarget(const void *ptarget);
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
// Original jackpothash Funktion aus einem miner Quelltext
inline unsigned int jackpothash(void *state, const void *input)
{
sph_blake512_context ctx_blake;
sph_groestl512_context ctx_groestl;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
uint32_t hash[16];
sph_keccak512_init(&ctx_keccak);
sph_keccak512 (&ctx_keccak, input, 88);
sph_keccak512_close(&ctx_keccak, hash);
unsigned int round_mask = (
(unsigned int)(((unsigned char *)input)[84]) << 0 |
(unsigned int)(((unsigned char *)input)[85]) << 8 |
(unsigned int)(((unsigned char *)input)[86]) << 16 |
(unsigned int)(((unsigned char *)input)[87]) << 24 );
unsigned int round_max = hash[0] & round_mask;
unsigned int round;
for (round = 0; round < round_max; round++) {
switch (hash[0] & 3) {
case 0:
sph_blake512_init(&ctx_blake);
sph_blake512 (&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
break;
case 1:
sph_groestl512_init(&ctx_groestl);
sph_groestl512 (&ctx_groestl, hash, 64);
sph_groestl512_close(&ctx_groestl, hash);
break;
case 2:
sph_jh512_init(&ctx_jh);
sph_jh512 (&ctx_jh, hash, 64);
sph_jh512_close(&ctx_jh, hash);
break;
case 3:
sph_skein512_init(&ctx_skein);
sph_skein512 (&ctx_skein, hash, 64);
sph_skein512_close(&ctx_skein, hash);
break;
}
}
memcpy(state, hash, 32);
return round_max;
}
static int bit_population(uint32_t n){
int c =0;
while(n){
c += n&1;
n = n>>1;
}
return c;
}
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
// TODO: entfernen für eine Release! Ist nur zum Testen!
if (opt_benchmark) {
((uint32_t*)ptarget)[7] = 0x00000f;
((uint32_t*)pdata)[21] = 0x07000000; // round_mask von 7 vorgeben
}
const uint32_t Htarg = ptarget[7];
const int throughput = 256*4096; // 100;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
jackpot_keccak512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
uint32_t endiandata[22];
for (int k=0; k < 22; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
unsigned int round_mask = (
(unsigned int)(((unsigned char *)endiandata)[84]) << 0 |
(unsigned int)(((unsigned char *)endiandata)[85]) << 8 |
(unsigned int)(((unsigned char *)endiandata)[86]) << 16 |
(unsigned int)(((unsigned char *)endiandata)[87]) << 24 );
// Zählen wie viele Bits in round_mask gesetzt sind
int bitcount = bit_population(round_mask);
jackpot_keccak512_cpu_setBlock_88((void*)endiandata);
quark_check_cpu_setTarget(ptarget);
do {
int order = 0;
// erstes Blake512 Hash mit CUDA
jackpot_keccak512_cpu_hash_88(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// TODO: hier fehlen jetzt natürlich noch die anderen Hashrunden.
// bei round_mask=7 haben wir eine 1:8 Chance, dass das Hash dennoch
// die Kriterien erfüllt wenn hash[0] & round_mask zufällig 0 ist.
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
// diese jackpothash Funktion gibt die Zahl der zusätzlichen Runden zurück
unsigned int rounds = jackpothash(vhash64, endiandata);
// wir akzeptieren nur solche Hashes wo ausschliesslich Keccak verwendet wurde
if (rounds == 0) {
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
pdata[19] = foundNonce;
*hashes_done = (foundNonce - first_nonce + 1) / (1 << bitcount);
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU (%d rounds)!", thr_id, foundNonce, rounds);
}
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = (pdata[19] - first_nonce + 1) / (1 << bitcount);
return 0;
}

622
cuda_myriadgroestl.cu

@ -0,0 +1,622 @@ @@ -0,0 +1,622 @@
// Auf Myriadcoin spezialisierte Version von Groestl inkl. Bitslice
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
// it's unfortunate that this is a compile time constant.
#define MAXWELL_OR_FERMI 0
// aus cpu-miner.c
extern int device_map[8];
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
// diese Struktur wird in der Init Funktion angefordert
static cudaDeviceProp props;
// globaler Speicher für alle HeftyHashes aller Threads
__constant__ uint32_t pTarget[8]; // Single GPU
extern uint32_t *d_resultNonce[8];
__constant__ uint32_t myriadgroestl_gpu_msg[32];
// muss expandiert werden
__constant__ uint32_t myr_sha256_gpu_constantTable[64];
__constant__ uint32_t myr_sha256_gpu_hashTable[8];
uint32_t myr_sha256_cpu_hashTable[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
uint32_t myr_sha256_cpu_constantTable[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
#if __CUDA_ARCH__ < 350
// Kepler (Compute 3.0)
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#else
// Kepler (Compute 3.5)
#define ROTR32(x, n) __funnelshift_r( (x), (x), (n) )
#endif
#define R(x, n) ((x) >> (n))
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ R(x, 3))
#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ R(x, 10))
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
__device__ void myriadgroestl_gpu_sha256(uint32_t *message)
{
uint32_t W1[16];
uint32_t W2[16];
// Initialisiere die register a bis h mit der Hash-Tabelle
uint32_t regs[8];
uint32_t hash[8];
// pre
#pragma unroll 8
for (int k=0; k < 8; k++)
{
regs[k] = myr_sha256_gpu_hashTable[k];
hash[k] = regs[k];
}
#pragma unroll 16
for(int k=0;k<16;k++)
W1[k] = SWAB32(message[k]);
// Progress W1
#pragma unroll 16
for(int j=0;j<16;j++)
{
uint32_t T1, T2;
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j] + W1[j];
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]);
#pragma unroll 7
for (int k=6; k >= 0; k--) regs[k+1] = regs[k];
regs[0] = T1 + T2;
regs[4] += T1;
}
// Progress W2...W3
#pragma unroll 3
for(int k=0;k<3;k++)
{
#pragma unroll 2
for(int j=0;j<2;j++)
W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j];
#pragma unroll 5
for(int j=2;j<7;j++)
W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j];
#pragma unroll 8
for(int j=7;j<15;j++)
W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j];
W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15];
// Rundenfunktion
#pragma unroll 16
for(int j=0;j<16;j++)
{
uint32_t T1, T2;
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j + 16 * (k+1)] + W2[j];
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]);
#pragma unroll 7
for (int l=6; l >= 0; l--) regs[l+1] = regs[l];
regs[0] = T1 + T2;
regs[4] += T1;
}
#pragma unroll 16
for(int j=0;j<16;j++)
W1[j] = W2[j];
}
#pragma unroll 8
for(int k=0;k<8;k++)
hash[k] += regs[k];
/////
///// Zweite Runde (wegen Msg-Padding)
/////
#pragma unroll 8
for(int k=0;k<8;k++)
regs[k] = hash[k];
W1[0] = SWAB32(0x80);
#pragma unroll 14
for(int k=1;k<15;k++)
W1[k] = 0;
W1[15] = 512;
// Progress W1
#pragma unroll 16
for(int j=0;j<16;j++)
{
uint32_t T1, T2;
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j] + W1[j];
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]);
#pragma unroll 7
for (int k=6; k >= 0; k--) regs[k+1] = regs[k];
regs[0] = T1 + T2;
regs[4] += T1;
}
// Progress W2...W3
#pragma unroll 3
for(int k=0;k<3;k++)
{
#pragma unroll 2
for(int j=0;j<2;j++)
W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j];
#pragma unroll 5
for(int j=2;j<7;j++)
W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j];
#pragma unroll 8
for(int j=7;j<15;j++)
W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j];
W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15];
// Rundenfunktion
#pragma unroll 16
for(int j=0;j<16;j++)
{
uint32_t T1, T2;
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j + 16 * (k+1)] + W2[j];
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]);
#pragma unroll 7
for (int l=6; l >= 0; l--) regs[l+1] = regs[l];
regs[0] = T1 + T2;
regs[4] += T1;
}
#pragma unroll 16
for(int j=0;j<16;j++)
W1[j] = W2[j];
}
#pragma unroll 8
for(int k=0;k<8;k++)
hash[k] += regs[k];
//// FERTIG
#pragma unroll 8
for(int k=0;k<8;k++)
message[k] = SWAB32(hash[k]);
}
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define PC32up(j, r) ((uint32_t)((j) + (r)))
#define PC32dn(j, r) 0
#define QC32up(j, r) 0xFFFFFFFF
#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24)))
#define B32_0(x) __byte_perm(x, 0, 0x4440)
//((x) & 0xFF)
#define B32_1(x) __byte_perm(x, 0, 0x4441)
//(((x) >> 8) & 0xFF)
#define B32_2(x) __byte_perm(x, 0, 0x4442)
//(((x) >> 16) & 0xFF)
#define B32_3(x) __byte_perm(x, 0, 0x4443)
//((x) >> 24)
#if MAXWELL_OR_FEMRI
#define USE_SHARED 1
// Maxwell and Fermi cards get the best speed with SHARED access it seems.
#if USE_SHARED
#define T0up(x) (*((uint32_t*)mixtabs + ( (x))))
#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x))))
#define T1up(x) (*((uint32_t*)mixtabs + (512+(x))))
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x))))
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x))))
#else
#define T0up(x) tex1Dfetch(t0up1, x)
#define T0dn(x) tex1Dfetch(t0dn1, x)
#define T1up(x) tex1Dfetch(t1up1, x)
#define T1dn(x) tex1Dfetch(t1dn1, x)
#define T2up(x) tex1Dfetch(t2up1, x)
#define T2dn(x) tex1Dfetch(t2dn1, x)
#define T3up(x) tex1Dfetch(t3up1, x)
#define T3dn(x) tex1Dfetch(t3dn1, x)
#endif
#else
#define USE_SHARED 1
// a healthy mix between shared and textured access provides the highest speed on Compute 3.0 and 3.5!
#define T0up(x) (*((uint32_t*)mixtabs + ( (x))))
#define T0dn(x) tex1Dfetch(t0dn1, x)
#define T1up(x) tex1Dfetch(t1up1, x)
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
#define T2up(x) tex1Dfetch(t2up1, x)
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
#define T3dn(x) tex1Dfetch(t3dn1, x)
#endif
texture<unsigned int, 1, cudaReadModeElementType> t0up1;
texture<unsigned int, 1, cudaReadModeElementType> t0dn1;
texture<unsigned int, 1, cudaReadModeElementType> t1up1;
texture<unsigned int, 1, cudaReadModeElementType> t1dn1;
texture<unsigned int, 1, cudaReadModeElementType> t2up1;
texture<unsigned int, 1, cudaReadModeElementType> t2dn1;
texture<unsigned int, 1, cudaReadModeElementType> t3up1;
texture<unsigned int, 1, cudaReadModeElementType> t3dn1;
extern uint32_t T0up_cpu[];
extern uint32_t T0dn_cpu[];
extern uint32_t T1up_cpu[];
extern uint32_t T1dn_cpu[];
extern uint32_t T2up_cpu[];
extern uint32_t T2dn_cpu[];
extern uint32_t T3up_cpu[];
extern uint32_t T3dn_cpu[];
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
__device__ __forceinline__ void myriadgroestl_perm_P(uint32_t *a, char *mixtabs)
{
uint32_t t[32];
//#pragma unroll 14
for(int r=0;r<14;r++)
{
switch(r)
{
case 0:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 0); break;
case 1:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 1); break;
case 2:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 2); break;
case 3:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 3); break;
case 4:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 4); break;
case 5:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 5); break;
case 6:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 6); break;
case 7:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 7); break;
case 8:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 8); break;
case 9:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 9); break;
case 10:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 10); break;
case 11:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 11); break;
case 12:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 12); break;
case 13:
#pragma unroll 16
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 13); break;
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
uint32_t t0_0 = B32_0(a[(k ) & 0x1f]), t9_0 = B32_0(a[(k + 9) & 0x1f]);
uint32_t t2_1 = B32_1(a[(k + 2) & 0x1f]), t11_1 = B32_1(a[(k + 11) & 0x1f]);
uint32_t t4_2 = B32_2(a[(k + 4) & 0x1f]), t13_2 = B32_2(a[(k + 13) & 0x1f]);
uint32_t t6_3 = B32_3(a[(k + 6) & 0x1f]), t23_3 = B32_3(a[(k + 23) & 0x1f]);
t[k + 0] = T0up( t0_0 ) ^ T1up( t2_1 ) ^ T2up( t4_2 ) ^ T3up( t6_3 ) ^
T0dn( t9_0 ) ^ T1dn( t11_1 ) ^ T2dn( t13_2 ) ^ T3dn( t23_3 );
t[k + 1] = T0dn( t0_0 ) ^ T1dn( t2_1 ) ^ T2dn( t4_2 ) ^ T3dn( t6_3 ) ^
T0up( t9_0 ) ^ T1up( t11_1 ) ^ T2up( t13_2 ) ^ T3up( t23_3 );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
__device__ __forceinline__ void myriadgroestl_perm_Q(uint32_t *a, char *mixtabs)
{
//#pragma unroll 14
for(int r=0;r<14;r++)
{
uint32_t t[32];
switch(r)
{
case 0:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 0); a[(k*2)+1] ^= QC32dn(k * 0x10, 0);} break;
case 1:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 1); a[(k*2)+1] ^= QC32dn(k * 0x10, 1);} break;
case 2:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 2); a[(k*2)+1] ^= QC32dn(k * 0x10, 2);} break;
case 3:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 3); a[(k*2)+1] ^= QC32dn(k * 0x10, 3);} break;
case 4:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 4); a[(k*2)+1] ^= QC32dn(k * 0x10, 4);} break;
case 5:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 5); a[(k*2)+1] ^= QC32dn(k * 0x10, 5);} break;
case 6:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 6); a[(k*2)+1] ^= QC32dn(k * 0x10, 6);} break;
case 7:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 7); a[(k*2)+1] ^= QC32dn(k * 0x10, 7);} break;
case 8:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 8); a[(k*2)+1] ^= QC32dn(k * 0x10, 8);} break;
case 9:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 9); a[(k*2)+1] ^= QC32dn(k * 0x10, 9);} break;
case 10:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 10); a[(k*2)+1] ^= QC32dn(k * 0x10, 10);} break;
case 11:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 11); a[(k*2)+1] ^= QC32dn(k * 0x10, 11);} break;
case 12:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 12); a[(k*2)+1] ^= QC32dn(k * 0x10, 12);} break;
case 13:
#pragma unroll 16
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 13); a[(k*2)+1] ^= QC32dn(k * 0x10, 13);} break;
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
uint32_t t2_0 = B32_0(a[(k + 2) & 0x1f]), t1_0 = B32_0(a[(k + 1) & 0x1f]);
uint32_t t6_1 = B32_1(a[(k + 6) & 0x1f]), t5_1 = B32_1(a[(k + 5) & 0x1f]);
uint32_t t10_2 = B32_2(a[(k + 10) & 0x1f]), t9_2 = B32_2(a[(k + 9) & 0x1f]);
uint32_t t22_3 = B32_3(a[(k + 22) & 0x1f]), t13_3 = B32_3(a[(k + 13) & 0x1f]);
t[k + 0] = T0up( t2_0 ) ^ T1up( t6_1 ) ^ T2up( t10_2 ) ^ T3up( t22_3 ) ^
T0dn( t1_0 ) ^ T1dn( t5_1 ) ^ T2dn( t9_2 ) ^ T3dn( t13_3 );
t[k + 1] = T0dn( t2_0 ) ^ T1dn( t6_1 ) ^ T2dn( t10_2 ) ^ T3dn( t22_3 ) ^
T0up( t1_0 ) ^ T1up( t5_1 ) ^ T2up( t9_2 ) ^ T3up( t13_3 );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
__global__ void
myriadgroestl_gpu_hash(int threads, uint32_t startNounce, uint32_t *resNounce)
{
#if USE_SHARED
extern __shared__ char mixtabs[];
if (threadIdx.x < 256)
{
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x);
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x);
*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x);
*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x);
}
__syncthreads();
#endif
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
// GROESTL
uint32_t message[32];
uint32_t state[32];
#pragma unroll 32
for(int k=0;k<32;k++) message[k] = myriadgroestl_gpu_msg[k];
uint32_t nounce = startNounce + thread;
message[19] = SWAB32(nounce);
#pragma unroll 32
for(int u=0;u<32;u++) state[u] = message[u];
state[31] ^= 0x20000;
// Perm
#if USE_SHARED
myriadgroestl_perm_P(state, mixtabs);
state[31] ^= 0x20000;
myriadgroestl_perm_Q(message, mixtabs);
#else
myriadgroestl_perm_P(state, NULL);
state[31] ^= 0x20000;
myriadgroestl_perm_Q(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++) state[u] ^= message[u];
#pragma unroll 32
for(int u=0;u<32;u++) message[u] = state[u];
#if USE_SHARED
myriadgroestl_perm_P(message, mixtabs);
#else
myriadgroestl_perm_P(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++) state[u] ^= message[u];
uint32_t out_state[16];
#pragma unroll 16
for(int u=0;u<16;u++) out_state[u] = state[u+16];
myriadgroestl_gpu_sha256(out_state);
int i, position = -1;
bool rc = true;
#pragma unroll 8
for (i = 7; i >= 0; i--) {
if (out_state[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
}
}
if (out_state[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
}
if(rc == true)
if(resNounce[0] > nounce)
resNounce[0] = nounce;
}
}
#define texDef(texname, texmem, texsource, texsize) \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
// Setup-Funktionen
__host__ void myriadgroestl_cpu_init(int thr_id, int threads)
{
cudaSetDevice(device_map[thr_id]);
cudaMemcpyToSymbol( myr_sha256_gpu_hashTable,
myr_sha256_cpu_hashTable,
sizeof(uint32_t) * 8 );
cudaMemcpyToSymbol( myr_sha256_gpu_constantTable,
myr_sha256_cpu_constantTable,
sizeof(uint32_t) * 64 );
cudaGetDeviceProperties(&props, device_map[thr_id]);
// Texturen mit obigem Makro initialisieren
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
// Speicher für Gewinner-Nonce belegen
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
}
__host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
{
// Nachricht expandieren und setzen
uint32_t msgBlock[32];
memset(msgBlock, 0, sizeof(uint32_t) * 32);
memcpy(&msgBlock[0], data, 80);
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
// Unsere Nachricht hat 80 Byte
msgBlock[20] = 0x80;
msgBlock[31] = 0x01000000;
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
// auf der GPU ausgeführt)
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
cudaMemcpyToSymbol( myriadgroestl_gpu_msg,
msgBlock,
128);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
cudaMemcpyToSymbol( pTarget,
pTargetIn,
sizeof(uint32_t) * 8 );
}
__host__ void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
{
// Compute 3.x und 5.x Geräte am besten mit 768 Threads ansteuern,
// alle anderen mit 512 Threads.
int threadsperblock = (props.major >= 3) ? 768 : 512;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
#if USE_SHARED
size_t shared_size = 8 * 256 * sizeof(uint32_t);
#else
size_t shared_size = 0;
#endif
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
//fprintf(stderr, "ThrID: %d\n", thr_id);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
myriadgroestl_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_resultNonce[thr_id]);
// Strategisches Sleep Kommando zur Senkung der CPU Last
MyStreamSynchronize(NULL, 0, thr_id);
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
}

106
myriadgroestl.cpp

@ -0,0 +1,106 @@ @@ -0,0 +1,106 @@
#include "uint256.h"
#include "sph/sph_groestl.h"
#include "cpuminer-config.h"
#include "miner.h"
#include <string.h>
#include <stdint.h>
#include <openssl/sha.h>
extern bool opt_benchmark;
void myriadgroestl_cpu_init(int thr_id, int threads);
void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
#define SWAP32(x) \
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
static void myriadhash(void *state, const void *input)
{
sph_groestl512_context ctx_groestl;
uint32_t hashA[16], hashB[16];
sph_groestl512_init(&ctx_groestl);
sph_groestl512 (&ctx_groestl, input, 80);
sph_groestl512_close(&ctx_groestl, hashA);
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256,(unsigned char *)hashA, 64);
SHA256_Final((unsigned char *)hashB, &sha256);
memcpy(state, hashB, 32);
}
extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t start_nonce = pdata[19]++;
const uint32_t throughPut = 128 * 1024;
// const uint32_t throughPut = 1;
uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t));
// TODO: entfernen für eine Release! Ist nur zum Testen!
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
#if BIG_DEBUG
#else
myriadgroestl_cpu_init(thr_id, throughPut);
#endif
init[thr_id] = true;
}
uint32_t endiandata[32];
for (int kk=0; kk < 32; kk++)
be32enc(&endiandata[kk], pdata[kk]);
// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
myriadgroestl_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
do {
// GPU
uint32_t foundNounce = 0xFFFFFFFF;
myriadgroestl_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce);
if(foundNounce < 0xffffffff)
{
uint32_t tmpHash[8];
endiandata[19] = SWAP32(foundNounce);
myriadhash(tmpHash, endiandata);
if (tmpHash[7] <= Htarg &&
fulltest(tmpHash, ptarget)) {
pdata[19] = foundNounce;
*hashes_done = foundNounce - start_nonce;
free(outputHash);
return true;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNounce);
}
foundNounce = 0xffffffff;
}
if (pdata[19] + throughPut < pdata[19])
pdata[19] = max_nonce;
else pdata[19] += throughPut;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - start_nonce;
free(outputHash);
return 0;
}

0
quark/.deps/.dirstamp vendored

0
quark/.dirstamp

107
quark/cuda_quark_checkhash.cu

@ -0,0 +1,107 @@ @@ -0,0 +1,107 @@
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// das Hash Target gegen das wir testen sollen
__constant__ uint32_t pTarget[8];
uint32_t *d_resNounce[8];
uint32_t *h_resNounce[8];
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__global__ void quark_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
// bestimme den aktuellen Zähler
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *inpHash = &g_hash[16 * hashPosition];
uint32_t hash[8];
#pragma unroll 8
for (int i=0; i < 8; i++)
hash[i] = inpHash[i];
// kopiere Ergebnis
int i, position = -1;
bool rc = true;
#pragma unroll 8
for (i = 7; i >= 0; i--) {
if (hash[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
}
}
if (hash[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
}
if(rc == true)
if(resNounce[0] > nounce)
resNounce[0] = nounce;
}
}
// Setup-Funktionen
__host__ void quark_check_cpu_init(int thr_id, int threads)
{
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
}
// Target Difficulty setzen
__host__ void quark_check_cpu_setTarget(const void *ptarget)
{
// die Message zur Berechnung auf der GPU
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
}
__host__ uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
{
uint32_t result = 0xffffffff;
cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t));
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
quark_check_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
// Strategisches Sleep Kommando zur Senkung der CPU Last
MyStreamSynchronize(NULL, order, thr_id);
// Ergebnis zum Host kopieren (in page locked memory, damits schneller geht)
cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
// cudaMemcpy() ist asynchron!
cudaThreadSynchronize();
result = *h_resNounce[thr_id];
return result;
}

392
sph/aes_helper.c

@ -0,0 +1,392 @@ @@ -0,0 +1,392 @@
/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
/*
* AES tables. This file is not meant to be compiled by itself; it
* is included by some hash function implementations. It contains
* the precomputed tables and helper macros for evaluating an AES
* round, optionally with a final XOR with a subkey.
*
* By default, this file defines the tables and macros for little-endian
* processing (i.e. it is assumed that the input bytes have been read
* from memory and assembled with the little-endian convention). If
* the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
* when this file is included, then the tables and macros for big-endian
* processing are defined instead. The big-endian tables and macros have
* names distinct from the little-endian tables and macros, hence it is
* possible to have both simultaneously, by including this file twice
* (with and without the AES_BIG_ENDIAN macro).
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
#if AES_BIG_ENDIAN
#define AESx(x) ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
#define AES0 AES0_BE
#define AES1 AES1_BE
#define AES2 AES2_BE
#define AES3 AES3_BE
#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
(Y0) = AES0[((X0) >> 24) & 0xFF] \
^ AES1[((X1) >> 16) & 0xFF] \
^ AES2[((X2) >> 8) & 0xFF] \
^ AES3[(X3) & 0xFF] ^ (K0); \
(Y1) = AES0[((X1) >> 24) & 0xFF] \
^ AES1[((X2) >> 16) & 0xFF] \
^ AES2[((X3) >> 8) & 0xFF] \
^ AES3[(X0) & 0xFF] ^ (K1); \
(Y2) = AES0[((X2) >> 24) & 0xFF] \
^ AES1[((X3) >> 16) & 0xFF] \
^ AES2[((X0) >> 8) & 0xFF] \
^ AES3[(X1) & 0xFF] ^ (K2); \
(Y3) = AES0[((X3) >> 24) & 0xFF] \
^ AES1[((X0) >> 16) & 0xFF] \
^ AES2[((X1) >> 8) & 0xFF] \
^ AES3[(X2) & 0xFF] ^ (K3); \
} while (0)
#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
#else
#define AESx(x) SPH_C32(x)
#define AES0 AES0_LE
#define AES1 AES1_LE
#define AES2 AES2_LE
#define AES3 AES3_LE
#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
(Y0) = AES0[(X0) & 0xFF] \
^ AES1[((X1) >> 8) & 0xFF] \
^ AES2[((X2) >> 16) & 0xFF] \
^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
(Y1) = AES0[(X1) & 0xFF] \
^ AES1[((X2) >> 8) & 0xFF] \
^ AES2[((X3) >> 16) & 0xFF] \
^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
(Y2) = AES0[(X2) & 0xFF] \
^ AES1[((X3) >> 8) & 0xFF] \
^ AES2[((X0) >> 16) & 0xFF] \
^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
(Y3) = AES0[(X3) & 0xFF] \
^ AES1[((X0) >> 8) & 0xFF] \
^ AES2[((X1) >> 16) & 0xFF] \
^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
} while (0)
#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
#endif
/*
* The AES*[] tables allow us to perform a fast evaluation of an AES
* round; table AESi[] combines SubBytes for a byte at row i, and
* MixColumns for the column where that byte goes after ShiftRows.
*/
static const sph_u32 AES0[256] = {
AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
};
static const sph_u32 AES1[256] = {
AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
};
static const sph_u32 AES2[256] = {
AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
};
static const sph_u32 AES3[256] = {
AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
};
#ifdef __cplusplus
}
#endif

0
blake.c → sph/blake.c

957
sph/bmw.c

@ -0,0 +1,957 @@ @@ -0,0 +1,957 @@
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
/*
* BMW implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include <limits.h>
#include "sph_bmw.h"
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
#define SPH_SMALL_FOOTPRINT_BMW 1
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
static const sph_u32 IV224[] = {
SPH_C32(0x00010203), SPH_C32(0x04050607),
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
SPH_C32(0x10111213), SPH_C32(0x14151617),
SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F),
SPH_C32(0x20212223), SPH_C32(0x24252627),
SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F),
SPH_C32(0x30313233), SPH_C32(0x34353637),
SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F)
};
static const sph_u32 IV256[] = {
SPH_C32(0x40414243), SPH_C32(0x44454647),
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
SPH_C32(0x50515253), SPH_C32(0x54555657),
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
SPH_C32(0x60616263), SPH_C32(0x64656667),
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
SPH_C32(0x70717273), SPH_C32(0x74757677),
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
};
#if SPH_64
static const sph_u64 IV384[] = {
SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
};
static const sph_u64 IV512[] = {
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
};
#endif
#define XCAT(x, y) XCAT_(x, y)
#define XCAT_(x, y) x ## y
#define LPAR (
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
#define M16_16 0, 1, 3, 4, 7, 10, 11
#define M16_17 1, 2, 4, 5, 8, 11, 12
#define M16_18 2, 3, 5, 6, 9, 12, 13
#define M16_19 3, 4, 6, 7, 10, 13, 14
#define M16_20 4, 5, 7, 8, 11, 14, 15
#define M16_21 5, 6, 8, 9, 12, 15, 16
#define M16_22 6, 7, 9, 10, 13, 0, 1
#define M16_23 7, 8, 10, 11, 14, 1, 2
#define M16_24 8, 9, 11, 12, 15, 2, 3
#define M16_25 9, 10, 12, 13, 0, 3, 4
#define M16_26 10, 11, 13, 14, 1, 4, 5
#define M16_27 11, 12, 14, 15, 2, 5, 6
#define M16_28 12, 13, 15, 16, 3, 6, 7
#define M16_29 13, 14, 0, 1, 4, 7, 8
#define M16_30 14, 15, 1, 2, 5, 8, 9
#define M16_31 15, 16, 2, 3, 6, 9, 10
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
#define ss4(x) (((x) >> 1) ^ (x))
#define ss5(x) (((x) >> 2) ^ (x))
#define rs1(x) SPH_ROTL32(x, 3)
#define rs2(x) SPH_ROTL32(x, 7)
#define rs3(x) SPH_ROTL32(x, 13)
#define rs4(x) SPH_ROTL32(x, 16)
#define rs5(x) SPH_ROTL32(x, 19)
#define rs6(x) SPH_ROTL32(x, 23)
#define rs7(x) SPH_ROTL32(x, 27)
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
#define expand1s_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand1s(qf, mf, hf, i16) \
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand1s_(qf, mf, hf, i16, ix, iy) \
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
#define expand2s_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand2s(qf, mf, hf, i16) \
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand2s_(qf, mf, hf, i16, ix, iy) \
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
#if SPH_64
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
#define sb4(x) (((x) >> 1) ^ (x))
#define sb5(x) (((x) >> 2) ^ (x))
#define rb1(x) SPH_ROTL64(x, 5)
#define rb2(x) SPH_ROTL64(x, 11)
#define rb3(x) SPH_ROTL64(x, 27)
#define rb4(x) SPH_ROTL64(x, 32)
#define rb5(x) SPH_ROTL64(x, 37)
#define rb6(x) SPH_ROTL64(x, 43)
#define rb7(x) SPH_ROTL64(x, 53)
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
#if SPH_SMALL_FOOTPRINT_BMW
static const sph_u64 Kb_tab[] = {
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
};
#define rol_off(mf, j, off) \
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
#define add_elt_b(mf, hf, j) \
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
#define expand1b(qf, mf, hf, i) \
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
+ add_elt_b(mf, hf, (i) - 16))
#define expand2b(qf, mf, hf, i) \
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
+ qf((i) - 14) + rb2(qf((i) - 13)) \
+ qf((i) - 12) + rb3(qf((i) - 11)) \
+ qf((i) - 10) + rb4(qf((i) - 9)) \
+ qf((i) - 8) + rb5(qf((i) - 7)) \
+ qf((i) - 6) + rb6(qf((i) - 5)) \
+ qf((i) - 4) + rb7(qf((i) - 3)) \
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
+ add_elt_b(mf, hf, (i) - 16))
#else
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
#define expand1b_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand1b(qf, mf, hf, i16) \
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand1b_(qf, mf, hf, i16, ix, iy) \
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
#define expand2b_inner(qf, mf, hf, i16, \
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
i9, i10, i11, i12, i13, i14, i15, \
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
#define expand2b(qf, mf, hf, i16) \
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
#define expand2b_(qf, mf, hf, i16, ix, iy) \
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
#endif
#endif
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
#if SPH_SMALL_FOOTPRINT_BMW
#define MAKE_Qas do { \
unsigned u; \
sph_u32 Ws[16]; \
Ws[ 0] = Ws0; \
Ws[ 1] = Ws1; \
Ws[ 2] = Ws2; \
Ws[ 3] = Ws3; \
Ws[ 4] = Ws4; \
Ws[ 5] = Ws5; \
Ws[ 6] = Ws6; \
Ws[ 7] = Ws7; \
Ws[ 8] = Ws8; \
Ws[ 9] = Ws9; \
Ws[10] = Ws10; \
Ws[11] = Ws11; \
Ws[12] = Ws12; \
Ws[13] = Ws13; \
Ws[14] = Ws14; \
Ws[15] = Ws15; \
for (u = 0; u < 15; u += 5) { \
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
} \
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
} while (0)
#define MAKE_Qbs do { \
qt[16] = expand1s(Qs, M, H, 16); \
qt[17] = expand1s(Qs, M, H, 17); \
qt[18] = expand2s(Qs, M, H, 18); \
qt[19] = expand2s(Qs, M, H, 19); \
qt[20] = expand2s(Qs, M, H, 20); \
qt[21] = expand2s(Qs, M, H, 21); \
qt[22] = expand2s(Qs, M, H, 22); \
qt[23] = expand2s(Qs, M, H, 23); \
qt[24] = expand2s(Qs, M, H, 24); \
qt[25] = expand2s(Qs, M, H, 25); \
qt[26] = expand2s(Qs, M, H, 26); \
qt[27] = expand2s(Qs, M, H, 27); \
qt[28] = expand2s(Qs, M, H, 28); \
qt[29] = expand2s(Qs, M, H, 29); \
qt[30] = expand2s(Qs, M, H, 30); \
qt[31] = expand2s(Qs, M, H, 31); \
} while (0)
#else
#define MAKE_Qas do { \
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
} while (0)
#define MAKE_Qbs do { \
qt[16] = expand1s(Qs, M, H, 16); \
qt[17] = expand1s(Qs, M, H, 17); \
qt[18] = expand2s(Qs, M, H, 18); \
qt[19] = expand2s(Qs, M, H, 19); \
qt[20] = expand2s(Qs, M, H, 20); \
qt[21] = expand2s(Qs, M, H, 21); \
qt[22] = expand2s(Qs, M, H, 22); \
qt[23] = expand2s(Qs, M, H, 23); \
qt[24] = expand2s(Qs, M, H, 24); \
qt[25] = expand2s(Qs, M, H, 25); \
qt[26] = expand2s(Qs, M, H, 26); \
qt[27] = expand2s(Qs, M, H, 27); \
qt[28] = expand2s(Qs, M, H, 28); \
qt[29] = expand2s(Qs, M, H, 29); \
qt[30] = expand2s(Qs, M, H, 30); \
qt[31] = expand2s(Qs, M, H, 31); \
} while (0)
#endif
#define MAKE_Qs do { \
MAKE_Qas; \
MAKE_Qbs; \
} while (0)
#define Qs(j) (qt[j])
#if SPH_64
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
#if SPH_SMALL_FOOTPRINT_BMW
#define MAKE_Qab do { \
unsigned u; \
sph_u64 Wb[16]; \
Wb[ 0] = Wb0; \
Wb[ 1] = Wb1; \
Wb[ 2] = Wb2; \
Wb[ 3] = Wb3; \
Wb[ 4] = Wb4; \
Wb[ 5] = Wb5; \
Wb[ 6] = Wb6; \
Wb[ 7] = Wb7; \
Wb[ 8] = Wb8; \
Wb[ 9] = Wb9; \
Wb[10] = Wb10; \
Wb[11] = Wb11; \
Wb[12] = Wb12; \
Wb[13] = Wb13; \
Wb[14] = Wb14; \
Wb[15] = Wb15; \
for (u = 0; u < 15; u += 5) { \
qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \
qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \
qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \
qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \
qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \
} \
qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \
} while (0)
#define MAKE_Qbb do { \
unsigned u; \
for (u = 16; u < 18; u ++) \
qt[u] = expand1b(Qb, M, H, u); \
for (u = 18; u < 32; u ++) \
qt[u] = expand2b(Qb, M, H, u); \
} while (0)
#else
#define MAKE_Qab do { \
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
} while (0)
#define MAKE_Qbb do { \
qt[16] = expand1b(Qb, M, H, 16); \
qt[17] = expand1b(Qb, M, H, 17); \
qt[18] = expand2b(Qb, M, H, 18); \
qt[19] = expand2b(Qb, M, H, 19); \
qt[20] = expand2b(Qb, M, H, 20); \
qt[21] = expand2b(Qb, M, H, 21); \
qt[22] = expand2b(Qb, M, H, 22); \
qt[23] = expand2b(Qb, M, H, 23); \
qt[24] = expand2b(Qb, M, H, 24); \
qt[25] = expand2b(Qb, M, H, 25); \
qt[26] = expand2b(Qb, M, H, 26); \
qt[27] = expand2b(Qb, M, H, 27); \
qt[28] = expand2b(Qb, M, H, 28); \
qt[29] = expand2b(Qb, M, H, 29); \
qt[30] = expand2b(Qb, M, H, 30); \
qt[31] = expand2b(Qb, M, H, 31); \
} while (0)
#endif
#define MAKE_Qb do { \
MAKE_Qab; \
MAKE_Qbb; \
} while (0)
#define Qb(j) (qt[j])
#endif
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
type qt[32], xl, xh; \
mkQ; \
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
+ (xl ^ qf(24) ^ qf( 0))); \
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
+ (xl ^ qf(25) ^ qf( 1))); \
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
+ (xl ^ qf(26) ^ qf( 2))); \
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
+ (xl ^ qf(27) ^ qf( 3))); \
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
+ (xl ^ qf(28) ^ qf( 4))); \
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
+ (xl ^ qf(29) ^ qf( 5))); \
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
+ (xl ^ qf(30) ^ qf( 6))); \
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
+ (xl ^ qf(31) ^ qf( 7))); \
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
} while (0)
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
#if SPH_64
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
#endif
static void
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
{
#if SPH_LITTLE_FAST
#define M(x) sph_dec32le_aligned(data + 4 * (x))
#else
sph_u32 mv[16];
mv[ 0] = sph_dec32le_aligned(data + 0);
mv[ 1] = sph_dec32le_aligned(data + 4);
mv[ 2] = sph_dec32le_aligned(data + 8);
mv[ 3] = sph_dec32le_aligned(data + 12);
mv[ 4] = sph_dec32le_aligned(data + 16);
mv[ 5] = sph_dec32le_aligned(data + 20);
mv[ 6] = sph_dec32le_aligned(data + 24);
mv[ 7] = sph_dec32le_aligned(data + 28);
mv[ 8] = sph_dec32le_aligned(data + 32);
mv[ 9] = sph_dec32le_aligned(data + 36);
mv[10] = sph_dec32le_aligned(data + 40);
mv[11] = sph_dec32le_aligned(data + 44);
mv[12] = sph_dec32le_aligned(data + 48);
mv[13] = sph_dec32le_aligned(data + 52);
mv[14] = sph_dec32le_aligned(data + 56);
mv[15] = sph_dec32le_aligned(data + 60);
#define M(x) (mv[x])
#endif
#define H(x) (h[x])
#define dH(x) (dh[x])
FOLDs;
#undef M
#undef H
#undef dH
}
static const sph_u32 final_s[16] = {
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
SPH_C32(0xaaaaaaaf)
};
static void
bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
{
memcpy(sc->H, iv, sizeof sc->H);
sc->ptr = 0;
#if SPH_64
sc->bit_count = 0;
#else
sc->bit_count_high = 0;
sc->bit_count_low = 0;
#endif
}
static void
bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
{
unsigned char *buf;
size_t ptr;
sph_u32 htmp[16];
sph_u32 *h1, *h2;
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->bit_count += (sph_u64)len << 3;
#else
tmp = sc->bit_count_low;
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
if (sc->bit_count_low < tmp)
sc->bit_count_high ++;
sc->bit_count_high += len >> 29;
#endif
buf = sc->buf;
ptr = sc->ptr;
h1 = sc->H;
h2 = htmp;
while (len > 0) {
size_t clen;
clen = (sizeof sc->buf) - ptr;
if (clen > len)
clen = len;
memcpy(buf + ptr, data, clen);
data = (const unsigned char *)data + clen;
len -= clen;
ptr += clen;
if (ptr == sizeof sc->buf) {
sph_u32 *ht;
compress_small(buf, h1, h2);
ht = h1;
h1 = h2;
h2 = ht;
ptr = 0;
}
}
sc->ptr = ptr;
if (h1 != sc->H)
memcpy(sc->H, h1, sizeof sc->H);
}
static void
bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32)
{
unsigned char *buf, *out;
size_t ptr, u, v;
unsigned z;
sph_u32 h1[16], h2[16], *h;
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
h = sc->H;
if (ptr > (sizeof sc->buf) - 8) {
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
compress_small(buf, h, h1);
ptr = 0;
h = h1;
}
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
#if SPH_64
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
SPH_T64(sc->bit_count + n));
#else
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
sc->bit_count_low + n);
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
SPH_T32(sc->bit_count_high));
#endif
compress_small(buf, h, h2);
for (u = 0; u < 16; u ++)
sph_enc32le_aligned(buf + 4 * u, h2[u]);
compress_small(buf, final_s, h1);
out = dst;
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
sph_enc32le(out + 4 * u, h1[v]);
}
#if SPH_64
static void
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
{
#if SPH_LITTLE_FAST
#define M(x) sph_dec64le_aligned(data + 8 * (x))
#else
sph_u64 mv[16];
mv[ 0] = sph_dec64le_aligned(data + 0);
mv[ 1] = sph_dec64le_aligned(data + 8);
mv[ 2] = sph_dec64le_aligned(data + 16);
mv[ 3] = sph_dec64le_aligned(data + 24);
mv[ 4] = sph_dec64le_aligned(data + 32);
mv[ 5] = sph_dec64le_aligned(data + 40);
mv[ 6] = sph_dec64le_aligned(data + 48);
mv[ 7] = sph_dec64le_aligned(data + 56);
mv[ 8] = sph_dec64le_aligned(data + 64);
mv[ 9] = sph_dec64le_aligned(data + 72);
mv[10] = sph_dec64le_aligned(data + 80);
mv[11] = sph_dec64le_aligned(data + 88);
mv[12] = sph_dec64le_aligned(data + 96);
mv[13] = sph_dec64le_aligned(data + 104);
mv[14] = sph_dec64le_aligned(data + 112);
mv[15] = sph_dec64le_aligned(data + 120);
#define M(x) (mv[x])
#endif
#define H(x) (h[x])
#define dH(x) (dh[x])
FOLDb;
#undef M
#undef H
#undef dH
}
static const sph_u64 final_b[16] = {
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
};
static void
bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv)
{
memcpy(sc->H, iv, sizeof sc->H);
sc->ptr = 0;
sc->bit_count = 0;
}
static void
bmw64(sph_bmw_big_context *sc, const void *data, size_t len)
{
unsigned char *buf;
size_t ptr;
sph_u64 htmp[16];
sph_u64 *h1, *h2;
sc->bit_count += (sph_u64)len << 3;
buf = sc->buf;
ptr = sc->ptr;
h1 = sc->H;
h2 = htmp;
while (len > 0) {
size_t clen;
clen = (sizeof sc->buf) - ptr;
if (clen > len)
clen = len;
memcpy(buf + ptr, data, clen);
data = (const unsigned char *)data + clen;
len -= clen;
ptr += clen;
if (ptr == sizeof sc->buf) {
sph_u64 *ht;
compress_big(buf, h1, h2);
ht = h1;
h1 = h2;
h2 = ht;
ptr = 0;
}
}
sc->ptr = ptr;
if (h1 != sc->H)
memcpy(sc->H, h1, sizeof sc->H);
}
static void
bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w64)
{
unsigned char *buf, *out;
size_t ptr, u, v;
unsigned z;
sph_u64 h1[16], h2[16], *h;
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
h = sc->H;
if (ptr > (sizeof sc->buf) - 8) {
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
compress_big(buf, h, h1);
ptr = 0;
h = h1;
}
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
SPH_T64(sc->bit_count + n));
compress_big(buf, h, h2);
for (u = 0; u < 16; u ++)
sph_enc64le_aligned(buf + 8 * u, h2[u]);
compress_big(buf, final_b, h1);
out = dst;
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
sph_enc64le(out + 8 * u, h1[v]);
}
#endif
/* see sph_bmw.h */
void
sph_bmw224_init(void *cc)
{
bmw32_init(cc, IV224);
}
/* see sph_bmw.h */
void
sph_bmw224(void *cc, const void *data, size_t len)
{
bmw32(cc, data, len);
}
/* see sph_bmw.h */
void
sph_bmw224_close(void *cc, void *dst)
{
sph_bmw224_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_bmw.h */
void
sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
bmw32_close(cc, ub, n, dst, 7);
sph_bmw224_init(cc);
}
/* see sph_bmw.h */
void
sph_bmw256_init(void *cc)
{
bmw32_init(cc, IV256);
}
/* see sph_bmw.h */
void
sph_bmw256(void *cc, const void *data, size_t len)
{
bmw32(cc, data, len);
}
/* see sph_bmw.h */
void
sph_bmw256_close(void *cc, void *dst)
{
sph_bmw256_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_bmw.h */
void
sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
bmw32_close(cc, ub, n, dst, 8);
sph_bmw256_init(cc);
}
#if SPH_64
/* see sph_bmw.h */
void
sph_bmw384_init(void *cc)
{
bmw64_init(cc, IV384);
}
/* see sph_bmw.h */
void
sph_bmw384(void *cc, const void *data, size_t len)
{
bmw64(cc, data, len);
}
/* see sph_bmw.h */
void
sph_bmw384_close(void *cc, void *dst)
{
sph_bmw384_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_bmw.h */
void
sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
bmw64_close(cc, ub, n, dst, 6);
sph_bmw384_init(cc);
}
/* see sph_bmw.h */
void
sph_bmw512_init(void *cc)
{
bmw64_init(cc, IV512);
}
/* see sph_bmw.h */
void
sph_bmw512(void *cc, const void *data, size_t len)
{
bmw64(cc, data, len);
}
/* see sph_bmw.h */
void
sph_bmw512_close(void *cc, void *dst)
{
sph_bmw512_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_bmw.h */
void
sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
bmw64_close(cc, ub, n, dst, 8);
sph_bmw512_init(cc);
}
#endif

723
sph/cubehash.c

@ -0,0 +1,723 @@ @@ -0,0 +1,723 @@
/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
/*
* CubeHash implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include <limits.h>
#include "sph_cubehash.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
#define SPH_SMALL_FOOTPRINT_CUBEHASH 1
#endif
/*
* Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
* mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
* It appears that the optimal settings are:
* -- full unroll, no state copy on the "big" systems (x86, PowerPC)
* -- unroll to 4 or 8, state copy on the "small" system (MIPS)
*/
#if SPH_SMALL_FOOTPRINT_CUBEHASH
#if !defined SPH_CUBEHASH_UNROLL
#define SPH_CUBEHASH_UNROLL 4
#endif
#if !defined SPH_CUBEHASH_NOCOPY
#define SPH_CUBEHASH_NOCOPY 1
#endif
#else
#if !defined SPH_CUBEHASH_UNROLL
#define SPH_CUBEHASH_UNROLL 0
#endif
#if !defined SPH_CUBEHASH_NOCOPY
#define SPH_CUBEHASH_NOCOPY 0
#endif
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
static const sph_u32 IV224[] = {
SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
};
static const sph_u32 IV256[] = {
SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
};
static const sph_u32 IV384[] = {
SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
};
static const sph_u32 IV512[] = {
SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
};
#define T32 SPH_T32
#define ROTL32 SPH_ROTL32
#if SPH_CUBEHASH_NOCOPY
#define DECL_STATE
#define READ_STATE(cc)
#define WRITE_STATE(cc)
#define x0 ((sc)->state[ 0])
#define x1 ((sc)->state[ 1])
#define x2 ((sc)->state[ 2])
#define x3 ((sc)->state[ 3])
#define x4 ((sc)->state[ 4])
#define x5 ((sc)->state[ 5])
#define x6 ((sc)->state[ 6])
#define x7 ((sc)->state[ 7])
#define x8 ((sc)->state[ 8])
#define x9 ((sc)->state[ 9])
#define xa ((sc)->state[10])
#define xb ((sc)->state[11])
#define xc ((sc)->state[12])
#define xd ((sc)->state[13])
#define xe ((sc)->state[14])
#define xf ((sc)->state[15])
#define xg ((sc)->state[16])
#define xh ((sc)->state[17])
#define xi ((sc)->state[18])
#define xj ((sc)->state[19])
#define xk ((sc)->state[20])
#define xl ((sc)->state[21])
#define xm ((sc)->state[22])
#define xn ((sc)->state[23])
#define xo ((sc)->state[24])
#define xp ((sc)->state[25])
#define xq ((sc)->state[26])
#define xr ((sc)->state[27])
#define xs ((sc)->state[28])
#define xt ((sc)->state[29])
#define xu ((sc)->state[30])
#define xv ((sc)->state[31])
#else
#define DECL_STATE \
sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
#define READ_STATE(cc) do { \
x0 = (cc)->state[ 0]; \
x1 = (cc)->state[ 1]; \
x2 = (cc)->state[ 2]; \
x3 = (cc)->state[ 3]; \
x4 = (cc)->state[ 4]; \
x5 = (cc)->state[ 5]; \
x6 = (cc)->state[ 6]; \
x7 = (cc)->state[ 7]; \
x8 = (cc)->state[ 8]; \
x9 = (cc)->state[ 9]; \
xa = (cc)->state[10]; \
xb = (cc)->state[11]; \
xc = (cc)->state[12]; \
xd = (cc)->state[13]; \
xe = (cc)->state[14]; \
xf = (cc)->state[15]; \
xg = (cc)->state[16]; \
xh = (cc)->state[17]; \
xi = (cc)->state[18]; \
xj = (cc)->state[19]; \
xk = (cc)->state[20]; \
xl = (cc)->state[21]; \
xm = (cc)->state[22]; \
xn = (cc)->state[23]; \
xo = (cc)->state[24]; \
xp = (cc)->state[25]; \
xq = (cc)->state[26]; \
xr = (cc)->state[27]; \
xs = (cc)->state[28]; \
xt = (cc)->state[29]; \
xu = (cc)->state[30]; \
xv = (cc)->state[31]; \
} while (0)
#define WRITE_STATE(cc) do { \
(cc)->state[ 0] = x0; \
(cc)->state[ 1] = x1; \
(cc)->state[ 2] = x2; \
(cc)->state[ 3] = x3; \
(cc)->state[ 4] = x4; \
(cc)->state[ 5] = x5; \
(cc)->state[ 6] = x6; \
(cc)->state[ 7] = x7; \
(cc)->state[ 8] = x8; \
(cc)->state[ 9] = x9; \
(cc)->state[10] = xa; \
(cc)->state[11] = xb; \
(cc)->state[12] = xc; \
(cc)->state[13] = xd; \
(cc)->state[14] = xe; \
(cc)->state[15] = xf; \
(cc)->state[16] = xg; \
(cc)->state[17] = xh; \
(cc)->state[18] = xi; \
(cc)->state[19] = xj; \
(cc)->state[20] = xk; \
(cc)->state[21] = xl; \
(cc)->state[22] = xm; \
(cc)->state[23] = xn; \
(cc)->state[24] = xo; \
(cc)->state[25] = xp; \
(cc)->state[26] = xq; \
(cc)->state[27] = xr; \
(cc)->state[28] = xs; \
(cc)->state[29] = xt; \
(cc)->state[30] = xu; \
(cc)->state[31] = xv; \
} while (0)
#endif
#define INPUT_BLOCK do { \
x0 ^= sph_dec32le_aligned(buf + 0); \
x1 ^= sph_dec32le_aligned(buf + 4); \
x2 ^= sph_dec32le_aligned(buf + 8); \
x3 ^= sph_dec32le_aligned(buf + 12); \
x4 ^= sph_dec32le_aligned(buf + 16); \
x5 ^= sph_dec32le_aligned(buf + 20); \
x6 ^= sph_dec32le_aligned(buf + 24); \
x7 ^= sph_dec32le_aligned(buf + 28); \
} while (0)
#define ROUND_EVEN do { \
xg = T32(x0 + xg); \
x0 = ROTL32(x0, 7); \
xh = T32(x1 + xh); \
x1 = ROTL32(x1, 7); \
xi = T32(x2 + xi); \
x2 = ROTL32(x2, 7); \
xj = T32(x3 + xj); \
x3 = ROTL32(x3, 7); \
xk = T32(x4 + xk); \
x4 = ROTL32(x4, 7); \
xl = T32(x5 + xl); \
x5 = ROTL32(x5, 7); \
xm = T32(x6 + xm); \
x6 = ROTL32(x6, 7); \
xn = T32(x7 + xn); \
x7 = ROTL32(x7, 7); \
xo = T32(x8 + xo); \
x8 = ROTL32(x8, 7); \
xp = T32(x9 + xp); \
x9 = ROTL32(x9, 7); \
xq = T32(xa + xq); \
xa = ROTL32(xa, 7); \
xr = T32(xb + xr); \
xb = ROTL32(xb, 7); \
xs = T32(xc + xs); \
xc = ROTL32(xc, 7); \
xt = T32(xd + xt); \
xd = ROTL32(xd, 7); \
xu = T32(xe + xu); \
xe = ROTL32(xe, 7); \
xv = T32(xf + xv); \
xf = ROTL32(xf, 7); \
x8 ^= xg; \
x9 ^= xh; \
xa ^= xi; \
xb ^= xj; \
xc ^= xk; \
xd ^= xl; \
xe ^= xm; \
xf ^= xn; \
x0 ^= xo; \
x1 ^= xp; \
x2 ^= xq; \
x3 ^= xr; \
x4 ^= xs; \
x5 ^= xt; \
x6 ^= xu; \
x7 ^= xv; \
xi = T32(x8 + xi); \
x8 = ROTL32(x8, 11); \
xj = T32(x9 + xj); \
x9 = ROTL32(x9, 11); \
xg = T32(xa + xg); \
xa = ROTL32(xa, 11); \
xh = T32(xb + xh); \
xb = ROTL32(xb, 11); \
xm = T32(xc + xm); \
xc = ROTL32(xc, 11); \
xn = T32(xd + xn); \
xd = ROTL32(xd, 11); \
xk = T32(xe + xk); \
xe = ROTL32(xe, 11); \
xl = T32(xf + xl); \
xf = ROTL32(xf, 11); \
xq = T32(x0 + xq); \
x0 = ROTL32(x0, 11); \
xr = T32(x1 + xr); \
x1 = ROTL32(x1, 11); \
xo = T32(x2 + xo); \
x2 = ROTL32(x2, 11); \
xp = T32(x3 + xp); \
x3 = ROTL32(x3, 11); \
xu = T32(x4 + xu); \
x4 = ROTL32(x4, 11); \
xv = T32(x5 + xv); \
x5 = ROTL32(x5, 11); \
xs = T32(x6 + xs); \
x6 = ROTL32(x6, 11); \
xt = T32(x7 + xt); \
x7 = ROTL32(x7, 11); \
xc ^= xi; \
xd ^= xj; \
xe ^= xg; \
xf ^= xh; \
x8 ^= xm; \
x9 ^= xn; \
xa ^= xk; \
xb ^= xl; \
x4 ^= xq; \
x5 ^= xr; \
x6 ^= xo; \
x7 ^= xp; \
x0 ^= xu; \
x1 ^= xv; \
x2 ^= xs; \
x3 ^= xt; \
} while (0)
#define ROUND_ODD do { \
xj = T32(xc + xj); \
xc = ROTL32(xc, 7); \
xi = T32(xd + xi); \
xd = ROTL32(xd, 7); \
xh = T32(xe + xh); \
xe = ROTL32(xe, 7); \
xg = T32(xf + xg); \
xf = ROTL32(xf, 7); \
xn = T32(x8 + xn); \
x8 = ROTL32(x8, 7); \
xm = T32(x9 + xm); \
x9 = ROTL32(x9, 7); \
xl = T32(xa + xl); \
xa = ROTL32(xa, 7); \
xk = T32(xb + xk); \
xb = ROTL32(xb, 7); \
xr = T32(x4 + xr); \
x4 = ROTL32(x4, 7); \
xq = T32(x5 + xq); \
x5 = ROTL32(x5, 7); \
xp = T32(x6 + xp); \
x6 = ROTL32(x6, 7); \
xo = T32(x7 + xo); \
x7 = ROTL32(x7, 7); \
xv = T32(x0 + xv); \
x0 = ROTL32(x0, 7); \
xu = T32(x1 + xu); \
x1 = ROTL32(x1, 7); \
xt = T32(x2 + xt); \
x2 = ROTL32(x2, 7); \
xs = T32(x3 + xs); \
x3 = ROTL32(x3, 7); \
x4 ^= xj; \
x5 ^= xi; \
x6 ^= xh; \
x7 ^= xg; \
x0 ^= xn; \
x1 ^= xm; \
x2 ^= xl; \
x3 ^= xk; \
xc ^= xr; \
xd ^= xq; \
xe ^= xp; \
xf ^= xo; \
x8 ^= xv; \
x9 ^= xu; \
xa ^= xt; \
xb ^= xs; \
xh = T32(x4 + xh); \
x4 = ROTL32(x4, 11); \
xg = T32(x5 + xg); \
x5 = ROTL32(x5, 11); \
xj = T32(x6 + xj); \
x6 = ROTL32(x6, 11); \
xi = T32(x7 + xi); \
x7 = ROTL32(x7, 11); \
xl = T32(x0 + xl); \
x0 = ROTL32(x0, 11); \
xk = T32(x1 + xk); \
x1 = ROTL32(x1, 11); \
xn = T32(x2 + xn); \
x2 = ROTL32(x2, 11); \
xm = T32(x3 + xm); \
x3 = ROTL32(x3, 11); \
xp = T32(xc + xp); \
xc = ROTL32(xc, 11); \
xo = T32(xd + xo); \
xd = ROTL32(xd, 11); \
xr = T32(xe + xr); \
xe = ROTL32(xe, 11); \
xq = T32(xf + xq); \
xf = ROTL32(xf, 11); \
xt = T32(x8 + xt); \
x8 = ROTL32(x8, 11); \
xs = T32(x9 + xs); \
x9 = ROTL32(x9, 11); \
xv = T32(xa + xv); \
xa = ROTL32(xa, 11); \
xu = T32(xb + xu); \
xb = ROTL32(xb, 11); \
x0 ^= xh; \
x1 ^= xg; \
x2 ^= xj; \
x3 ^= xi; \
x4 ^= xl; \
x5 ^= xk; \
x6 ^= xn; \
x7 ^= xm; \
x8 ^= xp; \
x9 ^= xo; \
xa ^= xr; \
xb ^= xq; \
xc ^= xt; \
xd ^= xs; \
xe ^= xv; \
xf ^= xu; \
} while (0)
/*
* There is no need to unroll all 16 rounds. The word-swapping permutation
* is an involution, so we need to unroll an even number of rounds. On
* "big" systems, unrolling 4 rounds yields about 97% of the speed
* achieved with full unrolling; and it keeps the code more compact
* for small architectures.
*/
#if SPH_CUBEHASH_UNROLL == 2
#define SIXTEEN_ROUNDS do { \
int j; \
for (j = 0; j < 8; j ++) { \
ROUND_EVEN; \
ROUND_ODD; \
} \
} while (0)
#elif SPH_CUBEHASH_UNROLL == 4
#define SIXTEEN_ROUNDS do { \
int j; \
for (j = 0; j < 4; j ++) { \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
} \
} while (0)
#elif SPH_CUBEHASH_UNROLL == 8
#define SIXTEEN_ROUNDS do { \
int j; \
for (j = 0; j < 2; j ++) { \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
} \
} while (0)
#else
#define SIXTEEN_ROUNDS do { \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
ROUND_EVEN; \
ROUND_ODD; \
} while (0)
#endif
static void
cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
{
memcpy(sc->state, iv, sizeof sc->state);
sc->ptr = 0;
}
static void
cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
{
unsigned char *buf;
size_t ptr;
DECL_STATE
buf = sc->buf;
ptr = sc->ptr;
if (len < (sizeof sc->buf) - ptr) {
memcpy(buf + ptr, data, len);
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE(sc);
while (len > 0) {
size_t clen;
clen = (sizeof sc->buf) - ptr;
if (clen > len)
clen = len;
memcpy(buf + ptr, data, clen);
ptr += clen;
data = (const unsigned char *)data + clen;
len -= clen;
if (ptr == sizeof sc->buf) {
INPUT_BLOCK;
SIXTEEN_ROUNDS;
ptr = 0;
}
}
WRITE_STATE(sc);
sc->ptr = ptr;
}
static void
cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_size_w32)
{
unsigned char *buf, *out;
size_t ptr;
unsigned z;
int i;
DECL_STATE
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
READ_STATE(sc);
INPUT_BLOCK;
for (i = 0; i < 11; i ++) {
SIXTEEN_ROUNDS;
if (i == 0)
xv ^= SPH_C32(1);
}
WRITE_STATE(sc);
out = dst;
for (z = 0; z < out_size_w32; z ++)
sph_enc32le(out + (z << 2), sc->state[z]);
}
/* see sph_cubehash.h */
void
sph_cubehash224_init(void *cc)
{
cubehash_init(cc, IV224);
}
/* see sph_cubehash.h */
void
sph_cubehash224(void *cc, const void *data, size_t len)
{
cubehash_core(cc, data, len);
}
/* see sph_cubehash.h */
void
sph_cubehash224_close(void *cc, void *dst)
{
sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_cubehash.h */
void
sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
cubehash_close(cc, ub, n, dst, 7);
sph_cubehash224_init(cc);
}
/* see sph_cubehash.h */
void
sph_cubehash256_init(void *cc)
{
cubehash_init(cc, IV256);
}
/* see sph_cubehash.h */
void
sph_cubehash256(void *cc, const void *data, size_t len)
{
cubehash_core(cc, data, len);
}
/* see sph_cubehash.h */
void
sph_cubehash256_close(void *cc, void *dst)
{
sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_cubehash.h */
void
sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
cubehash_close(cc, ub, n, dst, 8);
sph_cubehash256_init(cc);
}
/* see sph_cubehash.h */
void
sph_cubehash384_init(void *cc)
{
cubehash_init(cc, IV384);
}
/* see sph_cubehash.h */
void
sph_cubehash384(void *cc, const void *data, size_t len)
{
cubehash_core(cc, data, len);
}
/* see sph_cubehash.h */
void
sph_cubehash384_close(void *cc, void *dst)
{
sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_cubehash.h */
void
sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
cubehash_close(cc, ub, n, dst, 12);
sph_cubehash384_init(cc);
}
/* see sph_cubehash.h */
void
sph_cubehash512_init(void *cc)
{
cubehash_init(cc, IV512);
}
/* see sph_cubehash.h */
void
sph_cubehash512(void *cc, const void *data, size_t len)
{
cubehash_core(cc, data, len);
}
/* see sph_cubehash.h */
void
sph_cubehash512_close(void *cc, void *dst)
{
sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
}
/* see sph_cubehash.h */
void
sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
cubehash_close(cc, ub, n, dst, 16);
sph_cubehash512_init(cc);
}
#ifdef __cplusplus
}
#endif

1031
sph/echo.c

File diff suppressed because it is too large Load Diff

0
fugue.c → sph/fugue.c

0
groestl.c → sph/groestl.c

1107
sph/jh.c

File diff suppressed because it is too large Load Diff

0
keccak.c → sph/keccak.c

1426
sph/luffa.c

File diff suppressed because it is too large Load Diff

1764
sph/shavite.c

File diff suppressed because it is too large Load Diff

1799
sph/simd.c

File diff suppressed because it is too large Load Diff

1244
sph/skein.c

File diff suppressed because it is too large Load Diff

0
sph_blake.h → sph/sph_blake.h

320
sph/sph_bmw.h

@ -0,0 +1,320 @@ @@ -0,0 +1,320 @@
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
/**
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
* functions which differ by their output size; this implementation
* defines BMW for output sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_bmw.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_BMW_H__
#define SPH_BMW_H__
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for BMW-224.
*/
#define SPH_SIZE_bmw224 224
/**
* Output size (in bits) for BMW-256.
*/
#define SPH_SIZE_bmw256 256
#if SPH_64
/**
* Output size (in bits) for BMW-384.
*/
#define SPH_SIZE_bmw384 384
/**
* Output size (in bits) for BMW-512.
*/
#define SPH_SIZE_bmw512 512
#endif
/**
* This structure is a context for BMW-224 and BMW-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a BMW computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running BMW
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 H[16];
#if SPH_64
sph_u64 bit_count;
#else
sph_u32 bit_count_high, bit_count_low;
#endif
#endif
} sph_bmw_small_context;
/**
* This structure is a context for BMW-224 computations. It is
* identical to the common <code>sph_bmw_small_context</code>.
*/
typedef sph_bmw_small_context sph_bmw224_context;
/**
* This structure is a context for BMW-256 computations. It is
* identical to the common <code>sph_bmw_small_context</code>.
*/
typedef sph_bmw_small_context sph_bmw256_context;
#if SPH_64
/**
* This structure is a context for BMW-384 and BMW-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a BMW computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running BMW
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
sph_u64 H[16];
sph_u64 bit_count;
#endif
} sph_bmw_big_context;
/**
* This structure is a context for BMW-384 computations. It is
* identical to the common <code>sph_bmw_small_context</code>.
*/
typedef sph_bmw_big_context sph_bmw384_context;
/**
* This structure is a context for BMW-512 computations. It is
* identical to the common <code>sph_bmw_small_context</code>.
*/
typedef sph_bmw_big_context sph_bmw512_context;
#endif
/**
* Initialize a BMW-224 context. This process performs no memory allocation.
*
* @param cc the BMW-224 context (pointer to a
* <code>sph_bmw224_context</code>)
*/
void sph_bmw224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the BMW-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_bmw224(void *cc, const void *data, size_t len);
/**
* Terminate the current BMW-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the BMW-224 context
* @param dst the destination buffer
*/
void sph_bmw224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the BMW-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_bmw224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a BMW-256 context. This process performs no memory allocation.
*
* @param cc the BMW-256 context (pointer to a
* <code>sph_bmw256_context</code>)
*/
void sph_bmw256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the BMW-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_bmw256(void *cc, const void *data, size_t len);
/**
* Terminate the current BMW-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the BMW-256 context
* @param dst the destination buffer
*/
void sph_bmw256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the BMW-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_bmw256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#if SPH_64
/**
* Initialize a BMW-384 context. This process performs no memory allocation.
*
* @param cc the BMW-384 context (pointer to a
* <code>sph_bmw384_context</code>)
*/
void sph_bmw384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the BMW-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_bmw384(void *cc, const void *data, size_t len);
/**
* Terminate the current BMW-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the BMW-384 context
* @param dst the destination buffer
*/
void sph_bmw384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the BMW-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_bmw384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a BMW-512 context. This process performs no memory allocation.
*
* @param cc the BMW-512 context (pointer to a
* <code>sph_bmw512_context</code>)
*/
void sph_bmw512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the BMW-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_bmw512(void *cc, const void *data, size_t len);
/**
* Terminate the current BMW-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the BMW-512 context
* @param dst the destination buffer
*/
void sph_bmw512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the BMW-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_bmw512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#endif

292
sph/sph_cubehash.h

@ -0,0 +1,292 @@ @@ -0,0 +1,292 @@
/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
/**
* CubeHash interface. CubeHash is a family of functions which differ by
* their output size; this implementation defines CubeHash for output
* sizes 224, 256, 384 and 512 bits, with the "standard parameters"
* (CubeHash16/32 with the CubeHash specification notations).
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_cubehash.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_CUBEHASH_H__
#define SPH_CUBEHASH_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for CubeHash-224.
*/
#define SPH_SIZE_cubehash224 224
/**
* Output size (in bits) for CubeHash-256.
*/
#define SPH_SIZE_cubehash256 256
/**
* Output size (in bits) for CubeHash-384.
*/
#define SPH_SIZE_cubehash384 384
/**
* Output size (in bits) for CubeHash-512.
*/
#define SPH_SIZE_cubehash512 512
/**
* This structure is a context for CubeHash computations: it contains the
* intermediate values and some data from the last entered block. Once
* a CubeHash computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running CubeHash computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[32]; /* first field, for alignment */
size_t ptr;
sph_u32 state[32];
#endif
} sph_cubehash_context;
/**
* Type for a CubeHash-224 context (identical to the common context).
*/
typedef sph_cubehash_context sph_cubehash224_context;
/**
* Type for a CubeHash-256 context (identical to the common context).
*/
typedef sph_cubehash_context sph_cubehash256_context;
/**
* Type for a CubeHash-384 context (identical to the common context).
*/
typedef sph_cubehash_context sph_cubehash384_context;
/**
* Type for a CubeHash-512 context (identical to the common context).
*/
typedef sph_cubehash_context sph_cubehash512_context;
/**
* Initialize a CubeHash-224 context. This process performs no memory
* allocation.
*
* @param cc the CubeHash-224 context (pointer to a
* <code>sph_cubehash224_context</code>)
*/
void sph_cubehash224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the CubeHash-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_cubehash224(void *cc, const void *data, size_t len);
/**
* Terminate the current CubeHash-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the CubeHash-224 context
* @param dst the destination buffer
*/
void sph_cubehash224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the CubeHash-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_cubehash224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a CubeHash-256 context. This process performs no memory
* allocation.
*
* @param cc the CubeHash-256 context (pointer to a
* <code>sph_cubehash256_context</code>)
*/
void sph_cubehash256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the CubeHash-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_cubehash256(void *cc, const void *data, size_t len);
/**
* Terminate the current CubeHash-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the CubeHash-256 context
* @param dst the destination buffer
*/
void sph_cubehash256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the CubeHash-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_cubehash256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a CubeHash-384 context. This process performs no memory
* allocation.
*
* @param cc the CubeHash-384 context (pointer to a
* <code>sph_cubehash384_context</code>)
*/
void sph_cubehash384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the CubeHash-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_cubehash384(void *cc, const void *data, size_t len);
/**
* Terminate the current CubeHash-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the CubeHash-384 context
* @param dst the destination buffer
*/
void sph_cubehash384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the CubeHash-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_cubehash384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a CubeHash-512 context. This process performs no memory
* allocation.
*
* @param cc the CubeHash-512 context (pointer to a
* <code>sph_cubehash512_context</code>)
*/
void sph_cubehash512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the CubeHash-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_cubehash512(void *cc, const void *data, size_t len);
/**
* Terminate the current CubeHash-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the CubeHash-512 context
* @param dst the destination buffer
*/
void sph_cubehash512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the CubeHash-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_cubehash512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

320
sph/sph_echo.h

@ -0,0 +1,320 @@ @@ -0,0 +1,320 @@
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
/**
* ECHO interface. ECHO is a family of functions which differ by
* their output size; this implementation defines ECHO for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_echo.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_ECHO_H__
#define SPH_ECHO_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for ECHO-224.
*/
#define SPH_SIZE_echo224 224
/**
* Output size (in bits) for ECHO-256.
*/
#define SPH_SIZE_echo256 256
/**
* Output size (in bits) for ECHO-384.
*/
#define SPH_SIZE_echo384 384
/**
* Output size (in bits) for ECHO-512.
*/
#define SPH_SIZE_echo512 512
/**
* This structure is a context for ECHO computations: it contains the
* intermediate values and some data from the last entered block. Once
* an ECHO computation has been performed, the context can be reused for
* another computation. This specific structure is used for ECHO-224
* and ECHO-256.
*
* The contents of this structure are private. A running ECHO computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[192]; /* first field, for alignment */
size_t ptr;
union {
sph_u32 Vs[4][4];
#if SPH_64
sph_u64 Vb[4][2];
#endif
} u;
sph_u32 C0, C1, C2, C3;
#endif
} sph_echo_small_context;
/**
* This structure is a context for ECHO computations: it contains the
* intermediate values and some data from the last entered block. Once
* an ECHO computation has been performed, the context can be reused for
* another computation. This specific structure is used for ECHO-384
* and ECHO-512.
*
* The contents of this structure are private. A running ECHO computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
union {
sph_u32 Vs[8][4];
#if SPH_64
sph_u64 Vb[8][2];
#endif
} u;
sph_u32 C0, C1, C2, C3;
#endif
} sph_echo_big_context;
/**
* Type for a ECHO-224 context (identical to the common "small" context).
*/
typedef sph_echo_small_context sph_echo224_context;
/**
* Type for a ECHO-256 context (identical to the common "small" context).
*/
typedef sph_echo_small_context sph_echo256_context;
/**
* Type for a ECHO-384 context (identical to the common "big" context).
*/
typedef sph_echo_big_context sph_echo384_context;
/**
* Type for a ECHO-512 context (identical to the common "big" context).
*/
typedef sph_echo_big_context sph_echo512_context;
/**
* Initialize an ECHO-224 context. This process performs no memory allocation.
*
* @param cc the ECHO-224 context (pointer to a
* <code>sph_echo224_context</code>)
*/
void sph_echo224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo224(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-224 context
* @param dst the destination buffer
*/
void sph_echo224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-256 context. This process performs no memory allocation.
*
* @param cc the ECHO-256 context (pointer to a
* <code>sph_echo256_context</code>)
*/
void sph_echo256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo256(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-256 context
* @param dst the destination buffer
*/
void sph_echo256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-384 context. This process performs no memory allocation.
*
* @param cc the ECHO-384 context (pointer to a
* <code>sph_echo384_context</code>)
*/
void sph_echo384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo384(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-384 context
* @param dst the destination buffer
*/
void sph_echo384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-512 context. This process performs no memory allocation.
*
* @param cc the ECHO-512 context (pointer to a
* <code>sph_echo512_context</code>)
*/
void sph_echo512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo512(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-512 context
* @param dst the destination buffer
*/
void sph_echo512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

0
sph_fugue.h → sph/sph_fugue.h

0
sph_groestl.h → sph/sph_groestl.h

290
sph/sph_jh.h

@ -0,0 +1,290 @@ @@ -0,0 +1,290 @@
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
/**
* JH interface. JH is a family of functions which differ by
* their output size; this implementation defines JH for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_jh.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_JH_H__
#define SPH_JH_H__
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for JH-224.
*/
#define SPH_SIZE_jh224 224
/**
* Output size (in bits) for JH-256.
*/
#define SPH_SIZE_jh256 256
/**
* Output size (in bits) for JH-384.
*/
#define SPH_SIZE_jh384 384
/**
* Output size (in bits) for JH-512.
*/
#define SPH_SIZE_jh512 512
/**
* This structure is a context for JH computations: it contains the
* intermediate values and some data from the last entered block. Once
* a JH computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running JH computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
union {
#if SPH_64
sph_u64 wide[16];
#endif
sph_u32 narrow[32];
} H;
#if SPH_64
sph_u64 block_count;
#else
sph_u32 block_count_high, block_count_low;
#endif
#endif
} sph_jh_context;
/**
* Type for a JH-224 context (identical to the common context).
*/
typedef sph_jh_context sph_jh224_context;
/**
* Type for a JH-256 context (identical to the common context).
*/
typedef sph_jh_context sph_jh256_context;
/**
* Type for a JH-384 context (identical to the common context).
*/
typedef sph_jh_context sph_jh384_context;
/**
* Type for a JH-512 context (identical to the common context).
*/
typedef sph_jh_context sph_jh512_context;
/**
* Initialize a JH-224 context. This process performs no memory allocation.
*
* @param cc the JH-224 context (pointer to a
* <code>sph_jh224_context</code>)
*/
void sph_jh224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the JH-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_jh224(void *cc, const void *data, size_t len);
/**
* Terminate the current JH-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the JH-224 context
* @param dst the destination buffer
*/
void sph_jh224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the JH-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_jh224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a JH-256 context. This process performs no memory allocation.
*
* @param cc the JH-256 context (pointer to a
* <code>sph_jh256_context</code>)
*/
void sph_jh256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the JH-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_jh256(void *cc, const void *data, size_t len);
/**
* Terminate the current JH-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the JH-256 context
* @param dst the destination buffer
*/
void sph_jh256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the JH-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_jh256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a JH-384 context. This process performs no memory allocation.
*
* @param cc the JH-384 context (pointer to a
* <code>sph_jh384_context</code>)
*/
void sph_jh384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the JH-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_jh384(void *cc, const void *data, size_t len);
/**
* Terminate the current JH-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the JH-384 context
* @param dst the destination buffer
*/
void sph_jh384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the JH-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_jh384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a JH-512 context. This process performs no memory allocation.
*
* @param cc the JH-512 context (pointer to a
* <code>sph_jh512_context</code>)
*/
void sph_jh512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the JH-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_jh512(void *cc, const void *data, size_t len);
/**
* Terminate the current JH-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the JH-512 context
* @param dst the destination buffer
*/
void sph_jh512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the JH-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_jh512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif

0
sph_keccak.h → sph/sph_keccak.h

296
sph/sph_luffa.h

@ -0,0 +1,296 @@ @@ -0,0 +1,296 @@
/* $Id: sph_luffa.h 154 2010-04-26 17:00:24Z tp $ */
/**
* Luffa interface. Luffa is a family of functions which differ by
* their output size; this implementation defines Luffa for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_luffa.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_LUFFA_H__
#define SPH_LUFFA_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for Luffa-224.
*/
#define SPH_SIZE_luffa224 224
/**
* Output size (in bits) for Luffa-256.
*/
#define SPH_SIZE_luffa256 256
/**
* Output size (in bits) for Luffa-384.
*/
#define SPH_SIZE_luffa384 384
/**
* Output size (in bits) for Luffa-512.
*/
#define SPH_SIZE_luffa512 512
/**
* This structure is a context for Luffa-224 computations: it contains
* the intermediate values and some data from the last entered block.
* Once a Luffa computation has been performed, the context can be
* reused for another computation.
*
* The contents of this structure are private. A running Luffa
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[32]; /* first field, for alignment */
size_t ptr;
sph_u32 V[3][8];
#endif
} sph_luffa224_context;
/**
* This structure is a context for Luffa-256 computations. It is
* identical to <code>sph_luffa224_context</code>.
*/
typedef sph_luffa224_context sph_luffa256_context;
/**
* This structure is a context for Luffa-384 computations.
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[32]; /* first field, for alignment */
size_t ptr;
sph_u32 V[4][8];
#endif
} sph_luffa384_context;
/**
* This structure is a context for Luffa-512 computations.
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[32]; /* first field, for alignment */
size_t ptr;
sph_u32 V[5][8];
#endif
} sph_luffa512_context;
/**
* Initialize a Luffa-224 context. This process performs no memory allocation.
*
* @param cc the Luffa-224 context (pointer to a
* <code>sph_luffa224_context</code>)
*/
void sph_luffa224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Luffa-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_luffa224(void *cc, const void *data, size_t len);
/**
* Terminate the current Luffa-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Luffa-224 context
* @param dst the destination buffer
*/
void sph_luffa224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Luffa-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_luffa224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Luffa-256 context. This process performs no memory allocation.
*
* @param cc the Luffa-256 context (pointer to a
* <code>sph_luffa256_context</code>)
*/
void sph_luffa256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Luffa-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_luffa256(void *cc, const void *data, size_t len);
/**
* Terminate the current Luffa-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Luffa-256 context
* @param dst the destination buffer
*/
void sph_luffa256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Luffa-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_luffa256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Luffa-384 context. This process performs no memory allocation.
*
* @param cc the Luffa-384 context (pointer to a
* <code>sph_luffa384_context</code>)
*/
void sph_luffa384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Luffa-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_luffa384(void *cc, const void *data, size_t len);
/**
* Terminate the current Luffa-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Luffa-384 context
* @param dst the destination buffer
*/
void sph_luffa384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Luffa-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_luffa384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Luffa-512 context. This process performs no memory allocation.
*
* @param cc the Luffa-512 context (pointer to a
* <code>sph_luffa512_context</code>)
*/
void sph_luffa512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Luffa-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_luffa512(void *cc, const void *data, size_t len);
/**
* Terminate the current Luffa-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Luffa-512 context
* @param dst the destination buffer
*/
void sph_luffa512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Luffa-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_luffa512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

314
sph/sph_shavite.h

@ -0,0 +1,314 @@ @@ -0,0 +1,314 @@
/* $Id: sph_shavite.h 208 2010-06-02 20:33:00Z tp $ */
/**
* SHAvite-3 interface. This code implements SHAvite-3 with the
* recommended parameters for SHA-3, with outputs of 224, 256, 384 and
* 512 bits. In the following, we call the function "SHAvite" (without
* the "-3" suffix), thus "SHAvite-224" is "SHAvite-3 with a 224-bit
* output".
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_shavite.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_SHAVITE_H__
#define SPH_SHAVITE_H__
#include <stddef.h>
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
/**
* Output size (in bits) for SHAvite-224.
*/
#define SPH_SIZE_shavite224 224
/**
* Output size (in bits) for SHAvite-256.
*/
#define SPH_SIZE_shavite256 256
/**
* Output size (in bits) for SHAvite-384.
*/
#define SPH_SIZE_shavite384 384
/**
* Output size (in bits) for SHAvite-512.
*/
#define SPH_SIZE_shavite512 512
/**
* This structure is a context for SHAvite-224 and SHAvite-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a SHAvite computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running SHAvite
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 h[8];
sph_u32 count0, count1;
#endif
} sph_shavite_small_context;
/**
* This structure is a context for SHAvite-224 computations. It is
* identical to the common <code>sph_shavite_small_context</code>.
*/
typedef sph_shavite_small_context sph_shavite224_context;
/**
* This structure is a context for SHAvite-256 computations. It is
* identical to the common <code>sph_shavite_small_context</code>.
*/
typedef sph_shavite_small_context sph_shavite256_context;
/**
* This structure is a context for SHAvite-384 and SHAvite-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a SHAvite computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running SHAvite
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
sph_u32 h[16];
sph_u32 count0, count1, count2, count3;
#endif
} sph_shavite_big_context;
/**
* This structure is a context for SHAvite-384 computations. It is
* identical to the common <code>sph_shavite_small_context</code>.
*/
typedef sph_shavite_big_context sph_shavite384_context;
/**
* This structure is a context for SHAvite-512 computations. It is
* identical to the common <code>sph_shavite_small_context</code>.
*/
typedef sph_shavite_big_context sph_shavite512_context;
/**
* Initialize a SHAvite-224 context. This process performs no memory allocation.
*
* @param cc the SHAvite-224 context (pointer to a
* <code>sph_shavite224_context</code>)
*/
void sph_shavite224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SHAvite-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shavite224(void *cc, const void *data, size_t len);
/**
* Terminate the current SHAvite-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the SHAvite-224 context
* @param dst the destination buffer
*/
void sph_shavite224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SHAvite-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shavite224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a SHAvite-256 context. This process performs no memory allocation.
*
* @param cc the SHAvite-256 context (pointer to a
* <code>sph_shavite256_context</code>)
*/
void sph_shavite256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SHAvite-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shavite256(void *cc, const void *data, size_t len);
/**
* Terminate the current SHAvite-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the SHAvite-256 context
* @param dst the destination buffer
*/
void sph_shavite256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SHAvite-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shavite256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a SHAvite-384 context. This process performs no memory allocation.
*
* @param cc the SHAvite-384 context (pointer to a
* <code>sph_shavite384_context</code>)
*/
void sph_shavite384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SHAvite-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shavite384(void *cc, const void *data, size_t len);
/**
* Terminate the current SHAvite-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the SHAvite-384 context
* @param dst the destination buffer
*/
void sph_shavite384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SHAvite-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shavite384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a SHAvite-512 context. This process performs no memory allocation.
*
* @param cc the SHAvite-512 context (pointer to a
* <code>sph_shavite512_context</code>)
*/
void sph_shavite512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SHAvite-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shavite512(void *cc, const void *data, size_t len);
/**
* Terminate the current SHAvite-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the SHAvite-512 context
* @param dst the destination buffer
*/
void sph_shavite512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SHAvite-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shavite512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

309
sph/sph_simd.h

@ -0,0 +1,309 @@ @@ -0,0 +1,309 @@
/* $Id: sph_simd.h 154 2010-04-26 17:00:24Z tp $ */
/**
* SIMD interface. SIMD is a family of functions which differ by
* their output size; this implementation defines SIMD for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_simd.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_SIMD_H__
#define SPH_SIMD_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for SIMD-224.
*/
#define SPH_SIZE_simd224 224
/**
* Output size (in bits) for SIMD-256.
*/
#define SPH_SIZE_simd256 256
/**
* Output size (in bits) for SIMD-384.
*/
#define SPH_SIZE_simd384 384
/**
* Output size (in bits) for SIMD-512.
*/
#define SPH_SIZE_simd512 512
/**
* This structure is a context for SIMD computations: it contains the
* intermediate values and some data from the last entered block. Once
* an SIMD computation has been performed, the context can be reused for
* another computation. This specific structure is used for SIMD-224
* and SIMD-256.
*
* The contents of this structure are private. A running SIMD computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 state[16];
sph_u32 count_low, count_high;
#endif
} sph_simd_small_context;
/**
* This structure is a context for SIMD computations: it contains the
* intermediate values and some data from the last entered block. Once
* an SIMD computation has been performed, the context can be reused for
* another computation. This specific structure is used for SIMD-384
* and SIMD-512.
*
* The contents of this structure are private. A running SIMD computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
sph_u32 state[32];
sph_u32 count_low, count_high;
#endif
} sph_simd_big_context;
/**
* Type for a SIMD-224 context (identical to the common "small" context).
*/
typedef sph_simd_small_context sph_simd224_context;
/**
* Type for a SIMD-256 context (identical to the common "small" context).
*/
typedef sph_simd_small_context sph_simd256_context;
/**
* Type for a SIMD-384 context (identical to the common "big" context).
*/
typedef sph_simd_big_context sph_simd384_context;
/**
* Type for a SIMD-512 context (identical to the common "big" context).
*/
typedef sph_simd_big_context sph_simd512_context;
/**
* Initialize an SIMD-224 context. This process performs no memory allocation.
*
* @param cc the SIMD-224 context (pointer to a
* <code>sph_simd224_context</code>)
*/
void sph_simd224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SIMD-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_simd224(void *cc, const void *data, size_t len);
/**
* Terminate the current SIMD-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the SIMD-224 context
* @param dst the destination buffer
*/
void sph_simd224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SIMD-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_simd224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an SIMD-256 context. This process performs no memory allocation.
*
* @param cc the SIMD-256 context (pointer to a
* <code>sph_simd256_context</code>)
*/
void sph_simd256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SIMD-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_simd256(void *cc, const void *data, size_t len);
/**
* Terminate the current SIMD-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the SIMD-256 context
* @param dst the destination buffer
*/
void sph_simd256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SIMD-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_simd256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an SIMD-384 context. This process performs no memory allocation.
*
* @param cc the SIMD-384 context (pointer to a
* <code>sph_simd384_context</code>)
*/
void sph_simd384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SIMD-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_simd384(void *cc, const void *data, size_t len);
/**
* Terminate the current SIMD-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the SIMD-384 context
* @param dst the destination buffer
*/
void sph_simd384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SIMD-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_simd384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an SIMD-512 context. This process performs no memory allocation.
*
* @param cc the SIMD-512 context (pointer to a
* <code>sph_simd512_context</code>)
*/
void sph_simd512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the SIMD-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_simd512(void *cc, const void *data, size_t len);
/**
* Terminate the current SIMD-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the SIMD-512 context
* @param dst the destination buffer
*/
void sph_simd512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the SIMD-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_simd512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

290
sph/sph_skein.h

@ -0,0 +1,290 @@ @@ -0,0 +1,290 @@
/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
/**
* Skein interface. The Skein specification defines three main
* functions, called Skein-256, Skein-512 and Skein-1024, which can be
* further parameterized with an output length. For the SHA-3
* competition, Skein-512 is used for output sizes of 224, 256, 384 and
* 512 bits; this is what this code implements. Thus, we hereafter call
* Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
* specification defines as Skein-512-224, Skein-512-256, Skein-512-384
* and Skein-512-512, respectively.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_skein.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_SKEIN_H__
#define SPH_SKEIN_H__
#include <stddef.h>
#include "sph_types.h"
#if SPH_64
/**
* Output size (in bits) for Skein-224.
*/
#define SPH_SIZE_skein224 224
/**
* Output size (in bits) for Skein-256.
*/
#define SPH_SIZE_skein256 256
/**
* Output size (in bits) for Skein-384.
*/
#define SPH_SIZE_skein384 384
/**
* Output size (in bits) for Skein-512.
*/
#define SPH_SIZE_skein512 512
/**
* This structure is a context for Skein computations (with a 384- or
* 512-bit output): it contains the intermediate values and some data
* from the last entered block. Once a Skein computation has been
* performed, the context can be reused for another computation.
*
* The contents of this structure are private. A running Skein computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7;
sph_u64 bcount;
#endif
} sph_skein_big_context;
/**
* Type for a Skein-224 context (identical to the common "big" context).
*/
typedef sph_skein_big_context sph_skein224_context;
/**
* Type for a Skein-256 context (identical to the common "big" context).
*/
typedef sph_skein_big_context sph_skein256_context;
/**
* Type for a Skein-384 context (identical to the common "big" context).
*/
typedef sph_skein_big_context sph_skein384_context;
/**
* Type for a Skein-512 context (identical to the common "big" context).
*/
typedef sph_skein_big_context sph_skein512_context;
/**
* Initialize a Skein-224 context. This process performs no memory allocation.
*
* @param cc the Skein-224 context (pointer to a
* <code>sph_skein224_context</code>)
*/
void sph_skein224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Skein-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_skein224(void *cc, const void *data, size_t len);
/**
* Terminate the current Skein-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Skein-224 context
* @param dst the destination buffer
*/
void sph_skein224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Skein-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_skein224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Skein-256 context. This process performs no memory allocation.
*
* @param cc the Skein-256 context (pointer to a
* <code>sph_skein256_context</code>)
*/
void sph_skein256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Skein-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_skein256(void *cc, const void *data, size_t len);
/**
* Terminate the current Skein-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Skein-256 context
* @param dst the destination buffer
*/
void sph_skein256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Skein-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_skein256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Skein-384 context. This process performs no memory allocation.
*
* @param cc the Skein-384 context (pointer to a
* <code>sph_skein384_context</code>)
*/
void sph_skein384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Skein-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_skein384(void *cc, const void *data, size_t len);
/**
* Terminate the current Skein-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Skein-384 context
* @param dst the destination buffer
*/
void sph_skein384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Skein-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_skein384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Skein-512 context. This process performs no memory allocation.
*
* @param cc the Skein-512 context (pointer to a
* <code>sph_skein512_context</code>)
*/
void sph_skein512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Skein-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_skein512(void *cc, const void *data, size_t len);
/**
* Terminate the current Skein-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Skein-512 context
* @param dst the destination buffer
*/
void sph_skein512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Skein-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_skein512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#endif

0
sph_types.h → sph/sph_types.h

Loading…
Cancel
Save