mirror of https://github.com/GOSTSec/ccminer
Christian Buchner
11 years ago
35 changed files with 14454 additions and 0 deletions
@ -0,0 +1,572 @@
@@ -0,0 +1,572 @@
|
||||
#include <cuda.h> |
||||
#include "cuda_runtime.h" |
||||
#include "device_launch_parameters.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <memory.h> |
||||
|
||||
// Folgende Definitionen später durch header ersetzen |
||||
typedef unsigned char uint8_t; |
||||
typedef unsigned int uint32_t; |
||||
typedef unsigned long long uint64_t; |
||||
|
||||
// aus heavy.cu |
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||
|
||||
__constant__ uint64_t c_State[25]; |
||||
__constant__ uint32_t c_PaddedMessage[18]; |
||||
|
||||
static __device__ uint32_t cuda_swab32(uint32_t x) |
||||
{ |
||||
return __byte_perm(x, 0, 0x0123); |
||||
} |
||||
|
||||
// diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt |
||||
#if __CUDA_ARCH__ >= 350 |
||||
__forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offset) { |
||||
uint2 result; |
||||
if(offset >= 32) { |
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); |
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); |
||||
} else { |
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); |
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); |
||||
} |
||||
return __double_as_longlong(__hiloint2double(result.y, result.x)); |
||||
} |
||||
#else |
||||
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) |
||||
#endif |
||||
|
||||
#define U32TO64_LE(p) \ |
||||
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32)) |
||||
|
||||
#define U64TO32_LE(p, v) \ |
||||
*p = (uint32_t)((v)); *(p+1) = (uint32_t)((v) >> 32); |
||||
|
||||
static const uint64_t host_keccak_round_constants[24] = { |
||||
0x0000000000000001ull, 0x0000000000008082ull, |
||||
0x800000000000808aull, 0x8000000080008000ull, |
||||
0x000000000000808bull, 0x0000000080000001ull, |
||||
0x8000000080008081ull, 0x8000000000008009ull, |
||||
0x000000000000008aull, 0x0000000000000088ull, |
||||
0x0000000080008009ull, 0x000000008000000aull, |
||||
0x000000008000808bull, 0x800000000000008bull, |
||||
0x8000000000008089ull, 0x8000000000008003ull, |
||||
0x8000000000008002ull, 0x8000000000000080ull, |
||||
0x000000000000800aull, 0x800000008000000aull, |
||||
0x8000000080008081ull, 0x8000000000008080ull, |
||||
0x0000000080000001ull, 0x8000000080008008ull |
||||
}; |
||||
|
||||
__constant__ uint64_t c_keccak_round_constants[24]; |
||||
|
||||
static __device__ __forceinline__ void |
||||
keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_constants) { |
||||
size_t i; |
||||
uint64_t t[5], u[5], v, w; |
||||
|
||||
/* absorb input */ |
||||
#pragma unroll 9 |
||||
for (i = 0; i < 72 / 8; i++, in += 2) |
||||
s[i] ^= U32TO64_LE(in); |
||||
|
||||
for (i = 0; i < 24; i++) { |
||||
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ |
||||
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; |
||||
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; |
||||
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; |
||||
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; |
||||
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; |
||||
|
||||
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ |
||||
u[0] = t[4] ^ ROTL64(t[1], 1); |
||||
u[1] = t[0] ^ ROTL64(t[2], 1); |
||||
u[2] = t[1] ^ ROTL64(t[3], 1); |
||||
u[3] = t[2] ^ ROTL64(t[4], 1); |
||||
u[4] = t[3] ^ ROTL64(t[0], 1); |
||||
|
||||
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ |
||||
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; |
||||
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; |
||||
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; |
||||
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; |
||||
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; |
||||
|
||||
/* rho pi: b[..] = rotl(a[..], ..) */ |
||||
v = s[ 1]; |
||||
s[ 1] = ROTL64(s[ 6], 44); |
||||
s[ 6] = ROTL64(s[ 9], 20); |
||||
s[ 9] = ROTL64(s[22], 61); |
||||
s[22] = ROTL64(s[14], 39); |
||||
s[14] = ROTL64(s[20], 18); |
||||
s[20] = ROTL64(s[ 2], 62); |
||||
s[ 2] = ROTL64(s[12], 43); |
||||
s[12] = ROTL64(s[13], 25); |
||||
s[13] = ROTL64(s[19], 8); |
||||
s[19] = ROTL64(s[23], 56); |
||||
s[23] = ROTL64(s[15], 41); |
||||
s[15] = ROTL64(s[ 4], 27); |
||||
s[ 4] = ROTL64(s[24], 14); |
||||
s[24] = ROTL64(s[21], 2); |
||||
s[21] = ROTL64(s[ 8], 55); |
||||
s[ 8] = ROTL64(s[16], 45); |
||||
s[16] = ROTL64(s[ 5], 36); |
||||
s[ 5] = ROTL64(s[ 3], 28); |
||||
s[ 3] = ROTL64(s[18], 21); |
||||
s[18] = ROTL64(s[17], 15); |
||||
s[17] = ROTL64(s[11], 10); |
||||
s[11] = ROTL64(s[ 7], 6); |
||||
s[ 7] = ROTL64(s[10], 3); |
||||
s[10] = ROTL64( v, 1); |
||||
|
||||
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ |
||||
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; |
||||
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; |
||||
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; |
||||
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; |
||||
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; |
||||
|
||||
/* iota: a[0,0] ^= round constant */ |
||||
s[0] ^= keccak_round_constants[i]; |
||||
} |
||||
} |
||||
|
||||
__global__ void jackpot_keccak512_gpu_hash_88(int threads, uint32_t startNounce, uint64_t *g_hash) |
||||
{ |
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t nounce = startNounce + thread; |
||||
|
||||
int hashPosition = nounce - startNounce; |
||||
|
||||
// Nachricht kopieren |
||||
uint32_t message[18]; |
||||
#pragma unroll 18 |
||||
for(int i=0;i<18;i++) |
||||
message[i] = c_PaddedMessage[i]; |
||||
|
||||
// die individuelle Nounce einsetzen |
||||
message[1] = cuda_swab32(nounce); |
||||
|
||||
// State initialisieren |
||||
uint64_t keccak_gpu_state[25]; |
||||
#pragma unroll 25 |
||||
for (int i=0; i<25; i++) |
||||
keccak_gpu_state[i] = c_State[i]; |
||||
|
||||
// den Block einmal gut durchschütteln |
||||
keccak_block(keccak_gpu_state, message, c_keccak_round_constants); |
||||
|
||||
// das Hash erzeugen |
||||
uint32_t hash[16]; |
||||
|
||||
#pragma unroll 8 |
||||
for (size_t i = 0; i < 64; i += 8) { |
||||
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]); |
||||
} |
||||
|
||||
// fertig |
||||
uint32_t *outpHash = (uint32_t*)&g_hash[8 * hashPosition]; |
||||
|
||||
#pragma unroll 16 |
||||
for(int i=0;i<16;i++) |
||||
outpHash[i] = hash[i]; |
||||
} |
||||
} |
||||
|
||||
// Setup-Funktionen |
||||
__host__ void jackpot_keccak512_cpu_init(int thr_id, int threads) |
||||
{ |
||||
// Kopiere die Hash-Tabellen in den GPU-Speicher |
||||
cudaMemcpyToSymbol( c_keccak_round_constants, |
||||
host_keccak_round_constants, |
||||
sizeof(host_keccak_round_constants), |
||||
0, cudaMemcpyHostToDevice); |
||||
} |
||||
|
||||
#define cKeccakB 1600 |
||||
#define cKeccakR 576 |
||||
|
||||
#define cKeccakR_SizeInBytes (cKeccakR / 8) |
||||
#define crypto_hash_BYTES 64 |
||||
|
||||
#if (cKeccakB == 1600) |
||||
typedef unsigned long long UINT64; |
||||
typedef UINT64 tKeccakLane; |
||||
#define cKeccakNumberOfRounds 24 |
||||
#endif |
||||
|
||||
#define cKeccakLaneSizeInBits (sizeof(tKeccakLane) * 8) |
||||
|
||||
#define ROL(a, offset) ((((tKeccakLane)a) << ((offset) % cKeccakLaneSizeInBits)) ^ (((tKeccakLane)a) >> (cKeccakLaneSizeInBits-((offset) % cKeccakLaneSizeInBits)))) |
||||
#if ((cKeccakB/25) == 8) |
||||
#define ROL_mult8(a, offset) ((tKeccakLane)a) |
||||
#else |
||||
#define ROL_mult8(a, offset) ROL(a, offset) |
||||
#endif |
||||
void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount ); |
||||
|
||||
const tKeccakLane KeccakF_RoundConstants[cKeccakNumberOfRounds] = |
||||
{ |
||||
(tKeccakLane)0x0000000000000001ULL, |
||||
(tKeccakLane)0x0000000000008082ULL, |
||||
(tKeccakLane)0x800000000000808aULL, |
||||
(tKeccakLane)0x8000000080008000ULL, |
||||
(tKeccakLane)0x000000000000808bULL, |
||||
(tKeccakLane)0x0000000080000001ULL, |
||||
(tKeccakLane)0x8000000080008081ULL, |
||||
(tKeccakLane)0x8000000000008009ULL, |
||||
(tKeccakLane)0x000000000000008aULL, |
||||
(tKeccakLane)0x0000000000000088ULL, |
||||
(tKeccakLane)0x0000000080008009ULL, |
||||
(tKeccakLane)0x000000008000000aULL, |
||||
(tKeccakLane)0x000000008000808bULL, |
||||
(tKeccakLane)0x800000000000008bULL, |
||||
(tKeccakLane)0x8000000000008089ULL, |
||||
(tKeccakLane)0x8000000000008003ULL, |
||||
(tKeccakLane)0x8000000000008002ULL, |
||||
(tKeccakLane)0x8000000000000080ULL |
||||
#if (cKeccakB >= 400) |
||||
, (tKeccakLane)0x000000000000800aULL, |
||||
(tKeccakLane)0x800000008000000aULL |
||||
#if (cKeccakB >= 800) |
||||
, (tKeccakLane)0x8000000080008081ULL, |
||||
(tKeccakLane)0x8000000000008080ULL |
||||
#if (cKeccakB == 1600) |
||||
, (tKeccakLane)0x0000000080000001ULL, |
||||
(tKeccakLane)0x8000000080008008ULL |
||||
#endif |
||||
#endif |
||||
#endif |
||||
}; |
||||
|
||||
void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount ) |
||||
{ |
||||
|
||||
{ |
||||
while ( --laneCount >= 0 ) |
||||
{ |
||||
state[laneCount] ^= in[laneCount]; |
||||
} |
||||
} |
||||
|
||||
{ |
||||
tKeccakLane Aba, Abe, Abi, Abo, Abu; |
||||
tKeccakLane Aga, Age, Agi, Ago, Agu; |
||||
tKeccakLane Aka, Ake, Aki, Ako, Aku; |
||||
tKeccakLane Ama, Ame, Ami, Amo, Amu; |
||||
tKeccakLane Asa, Ase, Asi, Aso, Asu; |
||||
tKeccakLane BCa, BCe, BCi, BCo, BCu; |
||||
tKeccakLane Da, De, Di, Do, Du; |
||||
tKeccakLane Eba, Ebe, Ebi, Ebo, Ebu; |
||||
tKeccakLane Ega, Ege, Egi, Ego, Egu; |
||||
tKeccakLane Eka, Eke, Eki, Eko, Eku; |
||||
tKeccakLane Ema, Eme, Emi, Emo, Emu; |
||||
tKeccakLane Esa, Ese, Esi, Eso, Esu; |
||||
#define round laneCount |
||||
|
||||
//copyFromState(A, state) |
||||
Aba = state[ 0]; |
||||
Abe = state[ 1]; |
||||
Abi = state[ 2]; |
||||
Abo = state[ 3]; |
||||
Abu = state[ 4]; |
||||
Aga = state[ 5]; |
||||
Age = state[ 6]; |
||||
Agi = state[ 7]; |
||||
Ago = state[ 8]; |
||||
Agu = state[ 9]; |
||||
Aka = state[10]; |
||||
Ake = state[11]; |
||||
Aki = state[12]; |
||||
Ako = state[13]; |
||||
Aku = state[14]; |
||||
Ama = state[15]; |
||||
Ame = state[16]; |
||||
Ami = state[17]; |
||||
Amo = state[18]; |
||||
Amu = state[19]; |
||||
Asa = state[20]; |
||||
Ase = state[21]; |
||||
Asi = state[22]; |
||||
Aso = state[23]; |
||||
Asu = state[24]; |
||||
|
||||
for( round = 0; round < cKeccakNumberOfRounds; round += 2 ) |
||||
{ |
||||
// prepareTheta |
||||
BCa = Aba^Aga^Aka^Ama^Asa; |
||||
BCe = Abe^Age^Ake^Ame^Ase; |
||||
BCi = Abi^Agi^Aki^Ami^Asi; |
||||
BCo = Abo^Ago^Ako^Amo^Aso; |
||||
BCu = Abu^Agu^Aku^Amu^Asu; |
||||
|
||||
//thetaRhoPiChiIotaPrepareTheta(round , A, E) |
||||
Da = BCu^ROL(BCe, 1); |
||||
De = BCa^ROL(BCi, 1); |
||||
Di = BCe^ROL(BCo, 1); |
||||
Do = BCi^ROL(BCu, 1); |
||||
Du = BCo^ROL(BCa, 1); |
||||
|
||||
Aba ^= Da; |
||||
BCa = Aba; |
||||
Age ^= De; |
||||
BCe = ROL(Age, 44); |
||||
Aki ^= Di; |
||||
BCi = ROL(Aki, 43); |
||||
Amo ^= Do; |
||||
BCo = ROL(Amo, 21); |
||||
Asu ^= Du; |
||||
BCu = ROL(Asu, 14); |
||||
Eba = BCa ^((~BCe)& BCi ); |
||||
Eba ^= (tKeccakLane)KeccakF_RoundConstants[round]; |
||||
Ebe = BCe ^((~BCi)& BCo ); |
||||
Ebi = BCi ^((~BCo)& BCu ); |
||||
Ebo = BCo ^((~BCu)& BCa ); |
||||
Ebu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Abo ^= Do; |
||||
BCa = ROL(Abo, 28); |
||||
Agu ^= Du; |
||||
BCe = ROL(Agu, 20); |
||||
Aka ^= Da; |
||||
BCi = ROL(Aka, 3); |
||||
Ame ^= De; |
||||
BCo = ROL(Ame, 45); |
||||
Asi ^= Di; |
||||
BCu = ROL(Asi, 61); |
||||
Ega = BCa ^((~BCe)& BCi ); |
||||
Ege = BCe ^((~BCi)& BCo ); |
||||
Egi = BCi ^((~BCo)& BCu ); |
||||
Ego = BCo ^((~BCu)& BCa ); |
||||
Egu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Abe ^= De; |
||||
BCa = ROL(Abe, 1); |
||||
Agi ^= Di; |
||||
BCe = ROL(Agi, 6); |
||||
Ako ^= Do; |
||||
BCi = ROL(Ako, 25); |
||||
Amu ^= Du; |
||||
BCo = ROL_mult8(Amu, 8); |
||||
Asa ^= Da; |
||||
BCu = ROL(Asa, 18); |
||||
Eka = BCa ^((~BCe)& BCi ); |
||||
Eke = BCe ^((~BCi)& BCo ); |
||||
Eki = BCi ^((~BCo)& BCu ); |
||||
Eko = BCo ^((~BCu)& BCa ); |
||||
Eku = BCu ^((~BCa)& BCe ); |
||||
|
||||
Abu ^= Du; |
||||
BCa = ROL(Abu, 27); |
||||
Aga ^= Da; |
||||
BCe = ROL(Aga, 36); |
||||
Ake ^= De; |
||||
BCi = ROL(Ake, 10); |
||||
Ami ^= Di; |
||||
BCo = ROL(Ami, 15); |
||||
Aso ^= Do; |
||||
BCu = ROL_mult8(Aso, 56); |
||||
Ema = BCa ^((~BCe)& BCi ); |
||||
Eme = BCe ^((~BCi)& BCo ); |
||||
Emi = BCi ^((~BCo)& BCu ); |
||||
Emo = BCo ^((~BCu)& BCa ); |
||||
Emu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Abi ^= Di; |
||||
BCa = ROL(Abi, 62); |
||||
Ago ^= Do; |
||||
BCe = ROL(Ago, 55); |
||||
Aku ^= Du; |
||||
BCi = ROL(Aku, 39); |
||||
Ama ^= Da; |
||||
BCo = ROL(Ama, 41); |
||||
Ase ^= De; |
||||
BCu = ROL(Ase, 2); |
||||
Esa = BCa ^((~BCe)& BCi ); |
||||
Ese = BCe ^((~BCi)& BCo ); |
||||
Esi = BCi ^((~BCo)& BCu ); |
||||
Eso = BCo ^((~BCu)& BCa ); |
||||
Esu = BCu ^((~BCa)& BCe ); |
||||
|
||||
// prepareTheta |
||||
BCa = Eba^Ega^Eka^Ema^Esa; |
||||
BCe = Ebe^Ege^Eke^Eme^Ese; |
||||
BCi = Ebi^Egi^Eki^Emi^Esi; |
||||
BCo = Ebo^Ego^Eko^Emo^Eso; |
||||
BCu = Ebu^Egu^Eku^Emu^Esu; |
||||
|
||||
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A) |
||||
Da = BCu^ROL(BCe, 1); |
||||
De = BCa^ROL(BCi, 1); |
||||
Di = BCe^ROL(BCo, 1); |
||||
Do = BCi^ROL(BCu, 1); |
||||
Du = BCo^ROL(BCa, 1); |
||||
|
||||
Eba ^= Da; |
||||
BCa = Eba; |
||||
Ege ^= De; |
||||
BCe = ROL(Ege, 44); |
||||
Eki ^= Di; |
||||
BCi = ROL(Eki, 43); |
||||
Emo ^= Do; |
||||
BCo = ROL(Emo, 21); |
||||
Esu ^= Du; |
||||
BCu = ROL(Esu, 14); |
||||
Aba = BCa ^((~BCe)& BCi ); |
||||
Aba ^= (tKeccakLane)KeccakF_RoundConstants[round+1]; |
||||
Abe = BCe ^((~BCi)& BCo ); |
||||
Abi = BCi ^((~BCo)& BCu ); |
||||
Abo = BCo ^((~BCu)& BCa ); |
||||
Abu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Ebo ^= Do; |
||||
BCa = ROL(Ebo, 28); |
||||
Egu ^= Du; |
||||
BCe = ROL(Egu, 20); |
||||
Eka ^= Da; |
||||
BCi = ROL(Eka, 3); |
||||
Eme ^= De; |
||||
BCo = ROL(Eme, 45); |
||||
Esi ^= Di; |
||||
BCu = ROL(Esi, 61); |
||||
Aga = BCa ^((~BCe)& BCi ); |
||||
Age = BCe ^((~BCi)& BCo ); |
||||
Agi = BCi ^((~BCo)& BCu ); |
||||
Ago = BCo ^((~BCu)& BCa ); |
||||
Agu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Ebe ^= De; |
||||
BCa = ROL(Ebe, 1); |
||||
Egi ^= Di; |
||||
BCe = ROL(Egi, 6); |
||||
Eko ^= Do; |
||||
BCi = ROL(Eko, 25); |
||||
Emu ^= Du; |
||||
BCo = ROL_mult8(Emu, 8); |
||||
Esa ^= Da; |
||||
BCu = ROL(Esa, 18); |
||||
Aka = BCa ^((~BCe)& BCi ); |
||||
Ake = BCe ^((~BCi)& BCo ); |
||||
Aki = BCi ^((~BCo)& BCu ); |
||||
Ako = BCo ^((~BCu)& BCa ); |
||||
Aku = BCu ^((~BCa)& BCe ); |
||||
|
||||
Ebu ^= Du; |
||||
BCa = ROL(Ebu, 27); |
||||
Ega ^= Da; |
||||
BCe = ROL(Ega, 36); |
||||
Eke ^= De; |
||||
BCi = ROL(Eke, 10); |
||||
Emi ^= Di; |
||||
BCo = ROL(Emi, 15); |
||||
Eso ^= Do; |
||||
BCu = ROL_mult8(Eso, 56); |
||||
Ama = BCa ^((~BCe)& BCi ); |
||||
Ame = BCe ^((~BCi)& BCo ); |
||||
Ami = BCi ^((~BCo)& BCu ); |
||||
Amo = BCo ^((~BCu)& BCa ); |
||||
Amu = BCu ^((~BCa)& BCe ); |
||||
|
||||
Ebi ^= Di; |
||||
BCa = ROL(Ebi, 62); |
||||
Ego ^= Do; |
||||
BCe = ROL(Ego, 55); |
||||
Eku ^= Du; |
||||
BCi = ROL(Eku, 39); |
||||
Ema ^= Da; |
||||
BCo = ROL(Ema, 41); |
||||
Ese ^= De; |
||||
BCu = ROL(Ese, 2); |
||||
Asa = BCa ^((~BCe)& BCi ); |
||||
Ase = BCe ^((~BCi)& BCo ); |
||||
Asi = BCi ^((~BCo)& BCu ); |
||||
Aso = BCo ^((~BCu)& BCa ); |
||||
Asu = BCu ^((~BCa)& BCe ); |
||||
} |
||||
|
||||
//copyToState(state, A) |
||||
state[ 0] = Aba; |
||||
state[ 1] = Abe; |
||||
state[ 2] = Abi; |
||||
state[ 3] = Abo; |
||||
state[ 4] = Abu; |
||||
state[ 5] = Aga; |
||||
state[ 6] = Age; |
||||
state[ 7] = Agi; |
||||
state[ 8] = Ago; |
||||
state[ 9] = Agu; |
||||
state[10] = Aka; |
||||
state[11] = Ake; |
||||
state[12] = Aki; |
||||
state[13] = Ako; |
||||
state[14] = Aku; |
||||
state[15] = Ama; |
||||
state[16] = Ame; |
||||
state[17] = Ami; |
||||
state[18] = Amo; |
||||
state[19] = Amu; |
||||
state[20] = Asa; |
||||
state[21] = Ase; |
||||
state[22] = Asi; |
||||
state[23] = Aso; |
||||
state[24] = Asu; |
||||
|
||||
#undef round |
||||
} |
||||
} |
||||
|
||||
__host__ void jackpot_keccak512_cpu_setBlock_88(void *pdata) |
||||
{ |
||||
unsigned long long inlen = 88; |
||||
const unsigned char *in = (const unsigned char*)pdata; |
||||
|
||||
tKeccakLane state[5 * 5]; |
||||
unsigned char temp[cKeccakR_SizeInBytes]; |
||||
|
||||
memset( state, 0, sizeof(state) ); |
||||
|
||||
for ( /* empty */; inlen >= cKeccakR_SizeInBytes; inlen -= cKeccakR_SizeInBytes, in += cKeccakR_SizeInBytes ) |
||||
{ |
||||
KeccakF( state, (const tKeccakLane*)in, cKeccakR_SizeInBytes / sizeof(tKeccakLane) ); |
||||
} |
||||
|
||||
// Kopiere den state nach der ersten Runde (nach Absorption von 72 Bytes Inputdaten) |
||||
// ins Constant Memory |
||||
cudaMemcpyToSymbol( c_State, |
||||
state, |
||||
sizeof(state), |
||||
0, cudaMemcpyHostToDevice); |
||||
|
||||
// padding |
||||
memcpy( temp, in, (size_t)inlen ); |
||||
temp[inlen++] = 1; |
||||
memset( temp+inlen, 0, cKeccakR_SizeInBytes - (size_t)inlen ); |
||||
temp[cKeccakR_SizeInBytes-1] |= 0x80; |
||||
|
||||
|
||||
// Kopiere den Rest der Message und das Padding ins Constant Memory |
||||
cudaMemcpyToSymbol( c_PaddedMessage, |
||||
temp, |
||||
cKeccakR_SizeInBytes, |
||||
0, cudaMemcpyHostToDevice); |
||||
} |
||||
|
||||
__host__ void jackpot_keccak512_cpu_hash_88(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen |
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs |
||||
size_t shared_size = 0; |
||||
|
||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
||||
|
||||
jackpot_keccak512_gpu_hash_88<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash); |
||||
MyStreamSynchronize(NULL, order, thr_id); |
||||
} |
@ -0,0 +1,173 @@
@@ -0,0 +1,173 @@
|
||||
|
||||
extern "C" |
||||
{ |
||||
#include "sph/sph_keccak.h" |
||||
#include "sph/sph_blake.h" |
||||
#include "sph/sph_groestl.h" |
||||
#include "sph/sph_jh.h" |
||||
#include "sph/sph_skein.h" |
||||
} |
||||
|
||||
#include "miner.h" |
||||
#include <stdint.h> |
||||
|
||||
// aus cpu-miner.c |
||||
extern int device_map[8]; |
||||
extern bool opt_benchmark; |
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen |
||||
static uint32_t *d_hash[8]; |
||||
|
||||
extern void jackpot_keccak512_cpu_init(int thr_id, int threads); |
||||
extern void jackpot_keccak512_cpu_setBlock_88(void *pdata); |
||||
extern void jackpot_keccak512_cpu_hash_88(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); |
||||
|
||||
extern void quark_check_cpu_init(int thr_id, int threads); |
||||
extern void quark_check_cpu_setTarget(const void *ptarget); |
||||
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); |
||||
|
||||
// Original jackpothash Funktion aus einem miner Quelltext |
||||
inline unsigned int jackpothash(void *state, const void *input) |
||||
{ |
||||
sph_blake512_context ctx_blake; |
||||
sph_groestl512_context ctx_groestl; |
||||
sph_jh512_context ctx_jh; |
||||
sph_keccak512_context ctx_keccak; |
||||
sph_skein512_context ctx_skein; |
||||
|
||||
uint32_t hash[16]; |
||||
|
||||
sph_keccak512_init(&ctx_keccak); |
||||
sph_keccak512 (&ctx_keccak, input, 88); |
||||
sph_keccak512_close(&ctx_keccak, hash); |
||||
|
||||
unsigned int round_mask = ( |
||||
(unsigned int)(((unsigned char *)input)[84]) << 0 | |
||||
(unsigned int)(((unsigned char *)input)[85]) << 8 | |
||||
(unsigned int)(((unsigned char *)input)[86]) << 16 | |
||||
(unsigned int)(((unsigned char *)input)[87]) << 24 ); |
||||
unsigned int round_max = hash[0] & round_mask; |
||||
unsigned int round; |
||||
for (round = 0; round < round_max; round++) { |
||||
switch (hash[0] & 3) { |
||||
case 0: |
||||
sph_blake512_init(&ctx_blake); |
||||
sph_blake512 (&ctx_blake, hash, 64); |
||||
sph_blake512_close(&ctx_blake, hash); |
||||
break; |
||||
case 1: |
||||
sph_groestl512_init(&ctx_groestl); |
||||
sph_groestl512 (&ctx_groestl, hash, 64); |
||||
sph_groestl512_close(&ctx_groestl, hash); |
||||
break; |
||||
case 2: |
||||
sph_jh512_init(&ctx_jh); |
||||
sph_jh512 (&ctx_jh, hash, 64); |
||||
sph_jh512_close(&ctx_jh, hash); |
||||
break; |
||||
case 3: |
||||
sph_skein512_init(&ctx_skein); |
||||
sph_skein512 (&ctx_skein, hash, 64); |
||||
sph_skein512_close(&ctx_skein, hash); |
||||
break; |
||||
} |
||||
} |
||||
memcpy(state, hash, 32); |
||||
|
||||
return round_max; |
||||
} |
||||
|
||||
|
||||
static int bit_population(uint32_t n){ |
||||
int c =0; |
||||
while(n){ |
||||
c += n&1; |
||||
n = n>>1; |
||||
} |
||||
return c; |
||||
} |
||||
|
||||
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, |
||||
const uint32_t *ptarget, uint32_t max_nonce, |
||||
unsigned long *hashes_done) |
||||
{ |
||||
const uint32_t first_nonce = pdata[19]; |
||||
|
||||
// TODO: entfernen für eine Release! Ist nur zum Testen! |
||||
if (opt_benchmark) { |
||||
((uint32_t*)ptarget)[7] = 0x00000f; |
||||
((uint32_t*)pdata)[21] = 0x07000000; // round_mask von 7 vorgeben |
||||
} |
||||
|
||||
const uint32_t Htarg = ptarget[7]; |
||||
|
||||
const int throughput = 256*4096; // 100; |
||||
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0}; |
||||
if (!init[thr_id]) |
||||
{ |
||||
cudaSetDevice(device_map[thr_id]); |
||||
|
||||
// Konstanten kopieren, Speicher belegen |
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); |
||||
jackpot_keccak512_cpu_init(thr_id, throughput); |
||||
quark_check_cpu_init(thr_id, throughput); |
||||
init[thr_id] = true; |
||||
} |
||||
|
||||
uint32_t endiandata[22]; |
||||
for (int k=0; k < 22; k++) |
||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); |
||||
|
||||
unsigned int round_mask = ( |
||||
(unsigned int)(((unsigned char *)endiandata)[84]) << 0 | |
||||
(unsigned int)(((unsigned char *)endiandata)[85]) << 8 | |
||||
(unsigned int)(((unsigned char *)endiandata)[86]) << 16 | |
||||
(unsigned int)(((unsigned char *)endiandata)[87]) << 24 ); |
||||
|
||||
// Zählen wie viele Bits in round_mask gesetzt sind |
||||
int bitcount = bit_population(round_mask); |
||||
|
||||
jackpot_keccak512_cpu_setBlock_88((void*)endiandata); |
||||
quark_check_cpu_setTarget(ptarget); |
||||
|
||||
do { |
||||
int order = 0; |
||||
|
||||
// erstes Blake512 Hash mit CUDA |
||||
jackpot_keccak512_cpu_hash_88(thr_id, throughput, pdata[19], d_hash[thr_id], order++); |
||||
|
||||
// TODO: hier fehlen jetzt natürlich noch die anderen Hashrunden. |
||||
// bei round_mask=7 haben wir eine 1:8 Chance, dass das Hash dennoch |
||||
// die Kriterien erfüllt wenn hash[0] & round_mask zufällig 0 ist. |
||||
|
||||
// Scan nach Gewinner Hashes auf der GPU |
||||
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||
if (foundNonce != 0xffffffff) |
||||
{ |
||||
uint32_t vhash64[8]; |
||||
be32enc(&endiandata[19], foundNonce); |
||||
|
||||
// diese jackpothash Funktion gibt die Zahl der zusätzlichen Runden zurück |
||||
unsigned int rounds = jackpothash(vhash64, endiandata); |
||||
|
||||
// wir akzeptieren nur solche Hashes wo ausschliesslich Keccak verwendet wurde |
||||
if (rounds == 0) { |
||||
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) { |
||||
|
||||
pdata[19] = foundNonce; |
||||
*hashes_done = (foundNonce - first_nonce + 1) / (1 << bitcount); |
||||
return 1; |
||||
} else { |
||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU (%d rounds)!", thr_id, foundNonce, rounds); |
||||
} |
||||
} |
||||
} |
||||
|
||||
pdata[19] += throughput; |
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); |
||||
|
||||
*hashes_done = (pdata[19] - first_nonce + 1) / (1 << bitcount); |
||||
return 0; |
||||
} |
@ -0,0 +1,622 @@
@@ -0,0 +1,622 @@
|
||||
// Auf Myriadcoin spezialisierte Version von Groestl inkl. Bitslice |
||||
|
||||
#include <cuda.h> |
||||
#include "cuda_runtime.h" |
||||
#include "device_launch_parameters.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <memory.h> |
||||
|
||||
// it's unfortunate that this is a compile time constant. |
||||
#define MAXWELL_OR_FERMI 0 |
||||
|
||||
// aus cpu-miner.c |
||||
extern int device_map[8]; |
||||
|
||||
// aus heavy.cu |
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||
|
||||
// Folgende Definitionen später durch header ersetzen |
||||
typedef unsigned char uint8_t; |
||||
typedef unsigned short uint16_t; |
||||
typedef unsigned int uint32_t; |
||||
|
||||
// diese Struktur wird in der Init Funktion angefordert |
||||
static cudaDeviceProp props; |
||||
|
||||
// globaler Speicher für alle HeftyHashes aller Threads |
||||
__constant__ uint32_t pTarget[8]; // Single GPU |
||||
extern uint32_t *d_resultNonce[8]; |
||||
|
||||
__constant__ uint32_t myriadgroestl_gpu_msg[32]; |
||||
|
||||
// muss expandiert werden |
||||
__constant__ uint32_t myr_sha256_gpu_constantTable[64]; |
||||
__constant__ uint32_t myr_sha256_gpu_hashTable[8]; |
||||
|
||||
uint32_t myr_sha256_cpu_hashTable[] = { |
||||
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; |
||||
uint32_t myr_sha256_cpu_constantTable[] = { |
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, |
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, |
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, |
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, |
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, |
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, |
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, |
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, |
||||
}; |
||||
|
||||
#if __CUDA_ARCH__ < 350 |
||||
// Kepler (Compute 3.0) |
||||
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) |
||||
#else |
||||
// Kepler (Compute 3.5) |
||||
#define ROTR32(x, n) __funnelshift_r( (x), (x), (n) ) |
||||
#endif |
||||
#define R(x, n) ((x) >> (n)) |
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z) |
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z)) |
||||
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) |
||||
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) |
||||
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ R(x, 3)) |
||||
#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ R(x, 10)) |
||||
|
||||
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) |
||||
|
||||
__device__ void myriadgroestl_gpu_sha256(uint32_t *message) |
||||
{ |
||||
uint32_t W1[16]; |
||||
uint32_t W2[16]; |
||||
|
||||
// Initialisiere die register a bis h mit der Hash-Tabelle |
||||
uint32_t regs[8]; |
||||
uint32_t hash[8]; |
||||
|
||||
// pre |
||||
#pragma unroll 8 |
||||
for (int k=0; k < 8; k++) |
||||
{ |
||||
regs[k] = myr_sha256_gpu_hashTable[k]; |
||||
hash[k] = regs[k]; |
||||
} |
||||
|
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
W1[k] = SWAB32(message[k]); |
||||
|
||||
// Progress W1 |
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
{ |
||||
uint32_t T1, T2; |
||||
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j] + W1[j]; |
||||
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); |
||||
|
||||
#pragma unroll 7 |
||||
for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; |
||||
regs[0] = T1 + T2; |
||||
regs[4] += T1; |
||||
} |
||||
|
||||
// Progress W2...W3 |
||||
#pragma unroll 3 |
||||
for(int k=0;k<3;k++) |
||||
{ |
||||
#pragma unroll 2 |
||||
for(int j=0;j<2;j++) |
||||
W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j]; |
||||
#pragma unroll 5 |
||||
for(int j=2;j<7;j++) |
||||
W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j]; |
||||
|
||||
#pragma unroll 8 |
||||
for(int j=7;j<15;j++) |
||||
W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j]; |
||||
|
||||
W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15]; |
||||
|
||||
// Rundenfunktion |
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
{ |
||||
uint32_t T1, T2; |
||||
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j + 16 * (k+1)] + W2[j]; |
||||
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); |
||||
|
||||
#pragma unroll 7 |
||||
for (int l=6; l >= 0; l--) regs[l+1] = regs[l]; |
||||
regs[0] = T1 + T2; |
||||
regs[4] += T1; |
||||
} |
||||
|
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
W1[j] = W2[j]; |
||||
} |
||||
|
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
hash[k] += regs[k]; |
||||
|
||||
///// |
||||
///// Zweite Runde (wegen Msg-Padding) |
||||
///// |
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
regs[k] = hash[k]; |
||||
|
||||
W1[0] = SWAB32(0x80); |
||||
#pragma unroll 14 |
||||
for(int k=1;k<15;k++) |
||||
W1[k] = 0; |
||||
W1[15] = 512; |
||||
|
||||
// Progress W1 |
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
{ |
||||
uint32_t T1, T2; |
||||
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j] + W1[j]; |
||||
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); |
||||
|
||||
#pragma unroll 7 |
||||
for (int k=6; k >= 0; k--) regs[k+1] = regs[k]; |
||||
regs[0] = T1 + T2; |
||||
regs[4] += T1; |
||||
} |
||||
|
||||
// Progress W2...W3 |
||||
#pragma unroll 3 |
||||
for(int k=0;k<3;k++) |
||||
{ |
||||
#pragma unroll 2 |
||||
for(int j=0;j<2;j++) |
||||
W2[j] = s1(W1[14+j]) + W1[9+j] + s0(W1[1+j]) + W1[j]; |
||||
#pragma unroll 5 |
||||
for(int j=2;j<7;j++) |
||||
W2[j] = s1(W2[j-2]) + W1[9+j] + s0(W1[1+j]) + W1[j]; |
||||
|
||||
#pragma unroll 8 |
||||
for(int j=7;j<15;j++) |
||||
W2[j] = s1(W2[j-2]) + W2[j-7] + s0(W1[1+j]) + W1[j]; |
||||
|
||||
W2[15] = s1(W2[13]) + W2[8] + s0(W2[0]) + W1[15]; |
||||
|
||||
// Rundenfunktion |
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
{ |
||||
uint32_t T1, T2; |
||||
T1 = regs[7] + S1(regs[4]) + Ch(regs[4], regs[5], regs[6]) + myr_sha256_gpu_constantTable[j + 16 * (k+1)] + W2[j]; |
||||
T2 = S0(regs[0]) + Maj(regs[0], regs[1], regs[2]); |
||||
|
||||
#pragma unroll 7 |
||||
for (int l=6; l >= 0; l--) regs[l+1] = regs[l]; |
||||
regs[0] = T1 + T2; |
||||
regs[4] += T1; |
||||
} |
||||
|
||||
#pragma unroll 16 |
||||
for(int j=0;j<16;j++) |
||||
W1[j] = W2[j]; |
||||
} |
||||
|
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
hash[k] += regs[k]; |
||||
|
||||
//// FERTIG |
||||
|
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
message[k] = SWAB32(hash[k]); |
||||
} |
||||
|
||||
#define SPH_C32(x) ((uint32_t)(x ## U)) |
||||
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
||||
|
||||
#define PC32up(j, r) ((uint32_t)((j) + (r))) |
||||
#define PC32dn(j, r) 0 |
||||
#define QC32up(j, r) 0xFFFFFFFF |
||||
#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24))) |
||||
|
||||
#define B32_0(x) __byte_perm(x, 0, 0x4440) |
||||
//((x) & 0xFF) |
||||
#define B32_1(x) __byte_perm(x, 0, 0x4441) |
||||
//(((x) >> 8) & 0xFF) |
||||
#define B32_2(x) __byte_perm(x, 0, 0x4442) |
||||
//(((x) >> 16) & 0xFF) |
||||
#define B32_3(x) __byte_perm(x, 0, 0x4443) |
||||
//((x) >> 24) |
||||
|
||||
#if MAXWELL_OR_FEMRI |
||||
#define USE_SHARED 1 |
||||
// Maxwell and Fermi cards get the best speed with SHARED access it seems. |
||||
#if USE_SHARED |
||||
#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) |
||||
#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x)))) |
||||
#define T1up(x) (*((uint32_t*)mixtabs + (512+(x)))) |
||||
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) |
||||
#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x)))) |
||||
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) |
||||
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) |
||||
#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x)))) |
||||
#else |
||||
#define T0up(x) tex1Dfetch(t0up1, x) |
||||
#define T0dn(x) tex1Dfetch(t0dn1, x) |
||||
#define T1up(x) tex1Dfetch(t1up1, x) |
||||
#define T1dn(x) tex1Dfetch(t1dn1, x) |
||||
#define T2up(x) tex1Dfetch(t2up1, x) |
||||
#define T2dn(x) tex1Dfetch(t2dn1, x) |
||||
#define T3up(x) tex1Dfetch(t3up1, x) |
||||
#define T3dn(x) tex1Dfetch(t3dn1, x) |
||||
#endif |
||||
#else |
||||
#define USE_SHARED 1 |
||||
// a healthy mix between shared and textured access provides the highest speed on Compute 3.0 and 3.5! |
||||
#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) |
||||
#define T0dn(x) tex1Dfetch(t0dn1, x) |
||||
#define T1up(x) tex1Dfetch(t1up1, x) |
||||
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) |
||||
#define T2up(x) tex1Dfetch(t2up1, x) |
||||
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) |
||||
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) |
||||
#define T3dn(x) tex1Dfetch(t3dn1, x) |
||||
#endif |
||||
|
||||
texture<unsigned int, 1, cudaReadModeElementType> t0up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t0dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t1up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t1dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t2up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t2dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t3up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t3dn1; |
||||
|
||||
extern uint32_t T0up_cpu[]; |
||||
extern uint32_t T0dn_cpu[]; |
||||
extern uint32_t T1up_cpu[]; |
||||
extern uint32_t T1dn_cpu[]; |
||||
extern uint32_t T2up_cpu[]; |
||||
extern uint32_t T2dn_cpu[]; |
||||
extern uint32_t T3up_cpu[]; |
||||
extern uint32_t T3dn_cpu[]; |
||||
|
||||
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) |
||||
|
||||
|
||||
__device__ __forceinline__ void myriadgroestl_perm_P(uint32_t *a, char *mixtabs) |
||||
{ |
||||
uint32_t t[32]; |
||||
|
||||
//#pragma unroll 14 |
||||
for(int r=0;r<14;r++) |
||||
{ |
||||
switch(r) |
||||
{ |
||||
case 0: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 0); break; |
||||
case 1: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 1); break; |
||||
case 2: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 2); break; |
||||
case 3: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 3); break; |
||||
case 4: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 4); break; |
||||
case 5: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 5); break; |
||||
case 6: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 6); break; |
||||
case 7: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 7); break; |
||||
case 8: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 8); break; |
||||
case 9: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 9); break; |
||||
case 10: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 10); break; |
||||
case 11: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 11); break; |
||||
case 12: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 12); break; |
||||
case 13: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) a[(k*2)+0] ^= PC32up(k * 0x10, 13); break; |
||||
} |
||||
|
||||
// RBTT |
||||
#pragma unroll 16 |
||||
for(int k=0;k<32;k+=2) |
||||
{ |
||||
uint32_t t0_0 = B32_0(a[(k ) & 0x1f]), t9_0 = B32_0(a[(k + 9) & 0x1f]); |
||||
uint32_t t2_1 = B32_1(a[(k + 2) & 0x1f]), t11_1 = B32_1(a[(k + 11) & 0x1f]); |
||||
uint32_t t4_2 = B32_2(a[(k + 4) & 0x1f]), t13_2 = B32_2(a[(k + 13) & 0x1f]); |
||||
uint32_t t6_3 = B32_3(a[(k + 6) & 0x1f]), t23_3 = B32_3(a[(k + 23) & 0x1f]); |
||||
|
||||
t[k + 0] = T0up( t0_0 ) ^ T1up( t2_1 ) ^ T2up( t4_2 ) ^ T3up( t6_3 ) ^ |
||||
T0dn( t9_0 ) ^ T1dn( t11_1 ) ^ T2dn( t13_2 ) ^ T3dn( t23_3 ); |
||||
|
||||
t[k + 1] = T0dn( t0_0 ) ^ T1dn( t2_1 ) ^ T2dn( t4_2 ) ^ T3dn( t6_3 ) ^ |
||||
T0up( t9_0 ) ^ T1up( t11_1 ) ^ T2up( t13_2 ) ^ T3up( t23_3 ); |
||||
} |
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
a[k] = t[k]; |
||||
} |
||||
} |
||||
|
||||
__device__ __forceinline__ void myriadgroestl_perm_Q(uint32_t *a, char *mixtabs) |
||||
{ |
||||
//#pragma unroll 14 |
||||
for(int r=0;r<14;r++) |
||||
{ |
||||
uint32_t t[32]; |
||||
|
||||
switch(r) |
||||
{ |
||||
case 0: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 0); a[(k*2)+1] ^= QC32dn(k * 0x10, 0);} break; |
||||
case 1: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 1); a[(k*2)+1] ^= QC32dn(k * 0x10, 1);} break; |
||||
case 2: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 2); a[(k*2)+1] ^= QC32dn(k * 0x10, 2);} break; |
||||
case 3: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 3); a[(k*2)+1] ^= QC32dn(k * 0x10, 3);} break; |
||||
case 4: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 4); a[(k*2)+1] ^= QC32dn(k * 0x10, 4);} break; |
||||
case 5: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 5); a[(k*2)+1] ^= QC32dn(k * 0x10, 5);} break; |
||||
case 6: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 6); a[(k*2)+1] ^= QC32dn(k * 0x10, 6);} break; |
||||
case 7: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 7); a[(k*2)+1] ^= QC32dn(k * 0x10, 7);} break; |
||||
case 8: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 8); a[(k*2)+1] ^= QC32dn(k * 0x10, 8);} break; |
||||
case 9: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 9); a[(k*2)+1] ^= QC32dn(k * 0x10, 9);} break; |
||||
case 10: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 10); a[(k*2)+1] ^= QC32dn(k * 0x10, 10);} break; |
||||
case 11: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 11); a[(k*2)+1] ^= QC32dn(k * 0x10, 11);} break; |
||||
case 12: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 12); a[(k*2)+1] ^= QC32dn(k * 0x10, 12);} break; |
||||
case 13: |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) { a[(k*2)+0] ^= QC32up(k * 0x10, 13); a[(k*2)+1] ^= QC32dn(k * 0x10, 13);} break; |
||||
} |
||||
|
||||
// RBTT |
||||
#pragma unroll 16 |
||||
for(int k=0;k<32;k+=2) |
||||
{ |
||||
uint32_t t2_0 = B32_0(a[(k + 2) & 0x1f]), t1_0 = B32_0(a[(k + 1) & 0x1f]); |
||||
uint32_t t6_1 = B32_1(a[(k + 6) & 0x1f]), t5_1 = B32_1(a[(k + 5) & 0x1f]); |
||||
uint32_t t10_2 = B32_2(a[(k + 10) & 0x1f]), t9_2 = B32_2(a[(k + 9) & 0x1f]); |
||||
uint32_t t22_3 = B32_3(a[(k + 22) & 0x1f]), t13_3 = B32_3(a[(k + 13) & 0x1f]); |
||||
|
||||
t[k + 0] = T0up( t2_0 ) ^ T1up( t6_1 ) ^ T2up( t10_2 ) ^ T3up( t22_3 ) ^ |
||||
T0dn( t1_0 ) ^ T1dn( t5_1 ) ^ T2dn( t9_2 ) ^ T3dn( t13_3 ); |
||||
|
||||
t[k + 1] = T0dn( t2_0 ) ^ T1dn( t6_1 ) ^ T2dn( t10_2 ) ^ T3dn( t22_3 ) ^ |
||||
T0up( t1_0 ) ^ T1up( t5_1 ) ^ T2up( t9_2 ) ^ T3up( t13_3 ); |
||||
} |
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
a[k] = t[k]; |
||||
} |
||||
} |
||||
|
||||
__global__ void |
||||
myriadgroestl_gpu_hash(int threads, uint32_t startNounce, uint32_t *resNounce) |
||||
{ |
||||
#if USE_SHARED |
||||
extern __shared__ char mixtabs[]; |
||||
|
||||
if (threadIdx.x < 256) |
||||
{ |
||||
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x); |
||||
} |
||||
|
||||
__syncthreads(); |
||||
#endif |
||||
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
// GROESTL |
||||
uint32_t message[32]; |
||||
uint32_t state[32]; |
||||
|
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) message[k] = myriadgroestl_gpu_msg[k]; |
||||
|
||||
uint32_t nounce = startNounce + thread; |
||||
message[19] = SWAB32(nounce); |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) state[u] = message[u]; |
||||
state[31] ^= 0x20000; |
||||
|
||||
// Perm |
||||
#if USE_SHARED |
||||
myriadgroestl_perm_P(state, mixtabs); |
||||
state[31] ^= 0x20000; |
||||
myriadgroestl_perm_Q(message, mixtabs); |
||||
#else |
||||
myriadgroestl_perm_P(state, NULL); |
||||
state[31] ^= 0x20000; |
||||
myriadgroestl_perm_Q(message, NULL); |
||||
#endif |
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) state[u] ^= message[u]; |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) message[u] = state[u]; |
||||
|
||||
#if USE_SHARED |
||||
myriadgroestl_perm_P(message, mixtabs); |
||||
#else |
||||
myriadgroestl_perm_P(message, NULL); |
||||
#endif |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) state[u] ^= message[u]; |
||||
|
||||
uint32_t out_state[16]; |
||||
#pragma unroll 16 |
||||
for(int u=0;u<16;u++) out_state[u] = state[u+16]; |
||||
myriadgroestl_gpu_sha256(out_state); |
||||
|
||||
int i, position = -1; |
||||
bool rc = true; |
||||
|
||||
#pragma unroll 8 |
||||
for (i = 7; i >= 0; i--) { |
||||
if (out_state[i] > pTarget[i]) { |
||||
if(position < i) { |
||||
position = i; |
||||
rc = false; |
||||
} |
||||
} |
||||
if (out_state[i] < pTarget[i]) { |
||||
if(position < i) { |
||||
position = i; |
||||
rc = true; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if(rc == true) |
||||
if(resNounce[0] > nounce) |
||||
resNounce[0] = nounce; |
||||
} |
||||
} |
||||
|
||||
#define texDef(texname, texmem, texsource, texsize) \ |
||||
unsigned int *texmem; \ |
||||
cudaMalloc(&texmem, texsize); \ |
||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ |
||||
texname.normalized = 0; \ |
||||
texname.filterMode = cudaFilterModePoint; \ |
||||
texname.addressMode[0] = cudaAddressModeClamp; \ |
||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \ |
||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ |
||||
|
||||
// Setup-Funktionen |
||||
__host__ void myriadgroestl_cpu_init(int thr_id, int threads) |
||||
{ |
||||
cudaSetDevice(device_map[thr_id]); |
||||
|
||||
cudaMemcpyToSymbol( myr_sha256_gpu_hashTable, |
||||
myr_sha256_cpu_hashTable, |
||||
sizeof(uint32_t) * 8 ); |
||||
|
||||
cudaMemcpyToSymbol( myr_sha256_gpu_constantTable, |
||||
myr_sha256_cpu_constantTable, |
||||
sizeof(uint32_t) * 64 ); |
||||
|
||||
cudaGetDeviceProperties(&props, device_map[thr_id]); |
||||
|
||||
// Texturen mit obigem Makro initialisieren |
||||
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); |
||||
|
||||
// Speicher für Gewinner-Nonce belegen |
||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); |
||||
} |
||||
|
||||
__host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn) |
||||
{ |
||||
// Nachricht expandieren und setzen |
||||
uint32_t msgBlock[32]; |
||||
|
||||
memset(msgBlock, 0, sizeof(uint32_t) * 32); |
||||
memcpy(&msgBlock[0], data, 80); |
||||
|
||||
// Erweitere die Nachricht auf den Nachrichtenblock (padding) |
||||
// Unsere Nachricht hat 80 Byte |
||||
msgBlock[20] = 0x80; |
||||
msgBlock[31] = 0x01000000; |
||||
|
||||
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird |
||||
// auf der GPU ausgeführt) |
||||
|
||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch) |
||||
cudaMemcpyToSymbol( myriadgroestl_gpu_msg, |
||||
msgBlock, |
||||
128); |
||||
|
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); |
||||
cudaMemcpyToSymbol( pTarget, |
||||
pTargetIn, |
||||
sizeof(uint32_t) * 8 ); |
||||
} |
||||
|
||||
__host__ void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) |
||||
{ |
||||
// Compute 3.x und 5.x Geräte am besten mit 768 Threads ansteuern, |
||||
// alle anderen mit 512 Threads. |
||||
int threadsperblock = (props.major >= 3) ? 768 : 512; |
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen |
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs |
||||
#if USE_SHARED |
||||
size_t shared_size = 8 * 256 * sizeof(uint32_t); |
||||
#else |
||||
size_t shared_size = 0; |
||||
#endif |
||||
|
||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
||||
//fprintf(stderr, "ThrID: %d\n", thr_id); |
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); |
||||
myriadgroestl_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_resultNonce[thr_id]); |
||||
|
||||
// Strategisches Sleep Kommando zur Senkung der CPU Last |
||||
MyStreamSynchronize(NULL, 0, thr_id); |
||||
|
||||
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); |
||||
} |
@ -0,0 +1,106 @@
@@ -0,0 +1,106 @@
|
||||
#include "uint256.h" |
||||
#include "sph/sph_groestl.h" |
||||
|
||||
#include "cpuminer-config.h" |
||||
#include "miner.h" |
||||
|
||||
#include <string.h> |
||||
#include <stdint.h> |
||||
#include <openssl/sha.h> |
||||
|
||||
extern bool opt_benchmark; |
||||
|
||||
void myriadgroestl_cpu_init(int thr_id, int threads); |
||||
void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn); |
||||
void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); |
||||
|
||||
#define SWAP32(x) \ |
||||
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ |
||||
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) |
||||
|
||||
static void myriadhash(void *state, const void *input) |
||||
{ |
||||
sph_groestl512_context ctx_groestl; |
||||
|
||||
uint32_t hashA[16], hashB[16]; |
||||
|
||||
sph_groestl512_init(&ctx_groestl); |
||||
sph_groestl512 (&ctx_groestl, input, 80); |
||||
sph_groestl512_close(&ctx_groestl, hashA); |
||||
|
||||
SHA256_CTX sha256; |
||||
SHA256_Init(&sha256); |
||||
SHA256_Update(&sha256,(unsigned char *)hashA, 64); |
||||
SHA256_Final((unsigned char *)hashB, &sha256); |
||||
memcpy(state, hashB, 32); |
||||
} |
||||
|
||||
|
||||
|
||||
extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, |
||||
uint32_t max_nonce, unsigned long *hashes_done) |
||||
{ |
||||
uint32_t start_nonce = pdata[19]++; |
||||
const uint32_t throughPut = 128 * 1024; |
||||
// const uint32_t throughPut = 1;
|
||||
uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t)); |
||||
|
||||
// TODO: entfernen für eine Release! Ist nur zum Testen!
|
||||
if (opt_benchmark) |
||||
((uint32_t*)ptarget)[7] = 0x0000ff; |
||||
|
||||
const uint32_t Htarg = ptarget[7]; |
||||
|
||||
// init
|
||||
static bool init[8] = { false, false, false, false, false, false, false, false }; |
||||
if(!init[thr_id]) |
||||
{ |
||||
#if BIG_DEBUG |
||||
#else |
||||
myriadgroestl_cpu_init(thr_id, throughPut); |
||||
#endif |
||||
init[thr_id] = true; |
||||
} |
||||
|
||||
uint32_t endiandata[32]; |
||||
for (int kk=0; kk < 32; kk++) |
||||
be32enc(&endiandata[kk], pdata[kk]); |
||||
|
||||
// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
|
||||
myriadgroestl_cpu_setBlock(thr_id, endiandata, (void*)ptarget); |
||||
|
||||
do { |
||||
// GPU
|
||||
uint32_t foundNounce = 0xFFFFFFFF; |
||||
|
||||
myriadgroestl_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce); |
||||
|
||||
if(foundNounce < 0xffffffff) |
||||
{ |
||||
uint32_t tmpHash[8]; |
||||
endiandata[19] = SWAP32(foundNounce); |
||||
myriadhash(tmpHash, endiandata); |
||||
if (tmpHash[7] <= Htarg && |
||||
fulltest(tmpHash, ptarget)) { |
||||
pdata[19] = foundNounce; |
||||
*hashes_done = foundNounce - start_nonce; |
||||
free(outputHash); |
||||
return true; |
||||
} else { |
||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNounce); |
||||
} |
||||
|
||||
foundNounce = 0xffffffff; |
||||
} |
||||
|
||||
if (pdata[19] + throughPut < pdata[19]) |
||||
pdata[19] = max_nonce; |
||||
else pdata[19] += throughPut; |
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); |
||||
|
||||
*hashes_done = pdata[19] - start_nonce; |
||||
free(outputHash); |
||||
return 0; |
||||
} |
||||
|
@ -0,0 +1,107 @@
@@ -0,0 +1,107 @@
|
||||
#include <cuda.h> |
||||
#include "cuda_runtime.h" |
||||
#include "device_launch_parameters.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <memory.h> |
||||
|
||||
// Folgende Definitionen später durch header ersetzen |
||||
typedef unsigned char uint8_t; |
||||
typedef unsigned int uint32_t; |
||||
typedef unsigned long long uint64_t; |
||||
|
||||
// das Hash Target gegen das wir testen sollen |
||||
__constant__ uint32_t pTarget[8]; |
||||
|
||||
uint32_t *d_resNounce[8]; |
||||
uint32_t *h_resNounce[8]; |
||||
|
||||
// aus heavy.cu |
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||
|
||||
__global__ void quark_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce) |
||||
{ |
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
// bestimme den aktuellen Zähler |
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); |
||||
|
||||
int hashPosition = nounce - startNounce; |
||||
uint32_t *inpHash = &g_hash[16 * hashPosition]; |
||||
|
||||
uint32_t hash[8]; |
||||
#pragma unroll 8 |
||||
for (int i=0; i < 8; i++) |
||||
hash[i] = inpHash[i]; |
||||
|
||||
// kopiere Ergebnis |
||||
int i, position = -1; |
||||
bool rc = true; |
||||
|
||||
#pragma unroll 8 |
||||
for (i = 7; i >= 0; i--) { |
||||
if (hash[i] > pTarget[i]) { |
||||
if(position < i) { |
||||
position = i; |
||||
rc = false; |
||||
} |
||||
} |
||||
if (hash[i] < pTarget[i]) { |
||||
if(position < i) { |
||||
position = i; |
||||
rc = true; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if(rc == true) |
||||
if(resNounce[0] > nounce) |
||||
resNounce[0] = nounce; |
||||
} |
||||
} |
||||
|
||||
// Setup-Funktionen |
||||
__host__ void quark_check_cpu_init(int thr_id, int threads) |
||||
{ |
||||
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)); |
||||
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)); |
||||
} |
||||
|
||||
// Target Difficulty setzen |
||||
__host__ void quark_check_cpu_setTarget(const void *ptarget) |
||||
{ |
||||
// die Message zur Berechnung auf der GPU |
||||
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); |
||||
} |
||||
|
||||
__host__ uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order) |
||||
{ |
||||
uint32_t result = 0xffffffff; |
||||
cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)); |
||||
|
||||
const int threadsperblock = 256; |
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen |
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs |
||||
size_t shared_size = 0; |
||||
|
||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
||||
|
||||
quark_check_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]); |
||||
|
||||
// Strategisches Sleep Kommando zur Senkung der CPU Last |
||||
MyStreamSynchronize(NULL, order, thr_id); |
||||
|
||||
// Ergebnis zum Host kopieren (in page locked memory, damits schneller geht) |
||||
cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); |
||||
|
||||
// cudaMemcpy() ist asynchron! |
||||
cudaThreadSynchronize(); |
||||
result = *h_resNounce[thr_id]; |
||||
|
||||
return result; |
||||
} |
@ -0,0 +1,392 @@
@@ -0,0 +1,392 @@
|
||||
/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */ |
||||
/*
|
||||
* AES tables. This file is not meant to be compiled by itself; it |
||||
* is included by some hash function implementations. It contains |
||||
* the precomputed tables and helper macros for evaluating an AES |
||||
* round, optionally with a final XOR with a subkey. |
||||
* |
||||
* By default, this file defines the tables and macros for little-endian |
||||
* processing (i.e. it is assumed that the input bytes have been read |
||||
* from memory and assembled with the little-endian convention). If |
||||
* the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value) |
||||
* when this file is included, then the tables and macros for big-endian |
||||
* processing are defined instead. The big-endian tables and macros have |
||||
* names distinct from the little-endian tables and macros, hence it is |
||||
* possible to have both simultaneously, by including this file twice |
||||
* (with and without the AES_BIG_ENDIAN macro). |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#include "sph_types.h" |
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
#if AES_BIG_ENDIAN |
||||
|
||||
#define AESx(x) ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \ |
||||
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ |
||||
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ |
||||
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) |
||||
|
||||
#define AES0 AES0_BE |
||||
#define AES1 AES1_BE |
||||
#define AES2 AES2_BE |
||||
#define AES3 AES3_BE |
||||
|
||||
#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \ |
||||
(Y0) = AES0[((X0) >> 24) & 0xFF] \ |
||||
^ AES1[((X1) >> 16) & 0xFF] \ |
||||
^ AES2[((X2) >> 8) & 0xFF] \ |
||||
^ AES3[(X3) & 0xFF] ^ (K0); \ |
||||
(Y1) = AES0[((X1) >> 24) & 0xFF] \ |
||||
^ AES1[((X2) >> 16) & 0xFF] \ |
||||
^ AES2[((X3) >> 8) & 0xFF] \ |
||||
^ AES3[(X0) & 0xFF] ^ (K1); \ |
||||
(Y2) = AES0[((X2) >> 24) & 0xFF] \ |
||||
^ AES1[((X3) >> 16) & 0xFF] \ |
||||
^ AES2[((X0) >> 8) & 0xFF] \ |
||||
^ AES3[(X1) & 0xFF] ^ (K2); \ |
||||
(Y3) = AES0[((X3) >> 24) & 0xFF] \ |
||||
^ AES1[((X0) >> 16) & 0xFF] \ |
||||
^ AES2[((X1) >> 8) & 0xFF] \ |
||||
^ AES3[(X2) & 0xFF] ^ (K3); \ |
||||
} while (0) |
||||
|
||||
#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ |
||||
AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3) |
||||
|
||||
#else |
||||
|
||||
#define AESx(x) SPH_C32(x) |
||||
#define AES0 AES0_LE |
||||
#define AES1 AES1_LE |
||||
#define AES2 AES2_LE |
||||
#define AES3 AES3_LE |
||||
|
||||
#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \ |
||||
(Y0) = AES0[(X0) & 0xFF] \ |
||||
^ AES1[((X1) >> 8) & 0xFF] \ |
||||
^ AES2[((X2) >> 16) & 0xFF] \ |
||||
^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \ |
||||
(Y1) = AES0[(X1) & 0xFF] \ |
||||
^ AES1[((X2) >> 8) & 0xFF] \ |
||||
^ AES2[((X3) >> 16) & 0xFF] \ |
||||
^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \ |
||||
(Y2) = AES0[(X2) & 0xFF] \ |
||||
^ AES1[((X3) >> 8) & 0xFF] \ |
||||
^ AES2[((X0) >> 16) & 0xFF] \ |
||||
^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \ |
||||
(Y3) = AES0[(X3) & 0xFF] \ |
||||
^ AES1[((X0) >> 8) & 0xFF] \ |
||||
^ AES2[((X1) >> 16) & 0xFF] \ |
||||
^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \ |
||||
} while (0) |
||||
|
||||
#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ |
||||
AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3) |
||||
|
||||
#endif |
||||
|
||||
/*
|
||||
* The AES*[] tables allow us to perform a fast evaluation of an AES |
||||
* round; table AESi[] combines SubBytes for a byte at row i, and |
||||
* MixColumns for the column where that byte goes after ShiftRows. |
||||
*/ |
||||
|
||||
static const sph_u32 AES0[256] = { |
||||
AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6), |
||||
AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591), |
||||
AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56), |
||||
AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC), |
||||
AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA), |
||||
AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB), |
||||
AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45), |
||||
AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B), |
||||
AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C), |
||||
AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83), |
||||
AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9), |
||||
AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A), |
||||
AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D), |
||||
AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F), |
||||
AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF), |
||||
AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA), |
||||
AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34), |
||||
AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B), |
||||
AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D), |
||||
AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413), |
||||
AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1), |
||||
AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6), |
||||
AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972), |
||||
AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85), |
||||
AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED), |
||||
AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511), |
||||
AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE), |
||||
AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B), |
||||
AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05), |
||||
AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1), |
||||
AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142), |
||||
AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF), |
||||
AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3), |
||||
AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E), |
||||
AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A), |
||||
AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6), |
||||
AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3), |
||||
AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B), |
||||
AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428), |
||||
AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD), |
||||
AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14), |
||||
AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8), |
||||
AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4), |
||||
AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2), |
||||
AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA), |
||||
AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949), |
||||
AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF), |
||||
AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810), |
||||
AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C), |
||||
AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697), |
||||
AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E), |
||||
AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F), |
||||
AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC), |
||||
AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C), |
||||
AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969), |
||||
AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27), |
||||
AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122), |
||||
AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433), |
||||
AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9), |
||||
AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5), |
||||
AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A), |
||||
AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0), |
||||
AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E), |
||||
AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C) |
||||
}; |
||||
|
||||
static const sph_u32 AES1[256] = { |
||||
AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D), |
||||
AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154), |
||||
AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D), |
||||
AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A), |
||||
AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87), |
||||
AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B), |
||||
AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA), |
||||
AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B), |
||||
AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A), |
||||
AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F), |
||||
AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908), |
||||
AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F), |
||||
AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E), |
||||
AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5), |
||||
AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D), |
||||
AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F), |
||||
AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E), |
||||
AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB), |
||||
AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE), |
||||
AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397), |
||||
AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C), |
||||
AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED), |
||||
AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B), |
||||
AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A), |
||||
AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16), |
||||
AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194), |
||||
AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81), |
||||
AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3), |
||||
AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A), |
||||
AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104), |
||||
AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263), |
||||
AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D), |
||||
AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F), |
||||
AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39), |
||||
AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47), |
||||
AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695), |
||||
AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F), |
||||
AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83), |
||||
AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C), |
||||
AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76), |
||||
AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E), |
||||
AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4), |
||||
AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6), |
||||
AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B), |
||||
AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7), |
||||
AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0), |
||||
AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25), |
||||
AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018), |
||||
AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72), |
||||
AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751), |
||||
AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21), |
||||
AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85), |
||||
AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA), |
||||
AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12), |
||||
AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0), |
||||
AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9), |
||||
AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233), |
||||
AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7), |
||||
AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920), |
||||
AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A), |
||||
AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17), |
||||
AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8), |
||||
AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11), |
||||
AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A) |
||||
}; |
||||
|
||||
static const sph_u32 AES2[256] = { |
||||
AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B), |
||||
AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5), |
||||
AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B), |
||||
AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76), |
||||
AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D), |
||||
AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0), |
||||
AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF), |
||||
AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0), |
||||
AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26), |
||||
AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC), |
||||
AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1), |
||||
AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15), |
||||
AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3), |
||||
AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A), |
||||
AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2), |
||||
AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75), |
||||
AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A), |
||||
AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0), |
||||
AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3), |
||||
AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784), |
||||
AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED), |
||||
AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B), |
||||
AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39), |
||||
AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF), |
||||
AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB), |
||||
AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485), |
||||
AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F), |
||||
AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8), |
||||
AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F), |
||||
AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5), |
||||
AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321), |
||||
AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2), |
||||
AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC), |
||||
AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917), |
||||
AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D), |
||||
AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573), |
||||
AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC), |
||||
AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388), |
||||
AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14), |
||||
AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB), |
||||
AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A), |
||||
AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C), |
||||
AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662), |
||||
AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79), |
||||
AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D), |
||||
AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9), |
||||
AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA), |
||||
AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808), |
||||
AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E), |
||||
AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6), |
||||
AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F), |
||||
AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A), |
||||
AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66), |
||||
AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E), |
||||
AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9), |
||||
AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E), |
||||
AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311), |
||||
AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794), |
||||
AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9), |
||||
AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF), |
||||
AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D), |
||||
AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868), |
||||
AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F), |
||||
AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16) |
||||
}; |
||||
|
||||
static const sph_u32 AES3[256] = { |
||||
AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B), |
||||
AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5), |
||||
AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B), |
||||
AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676), |
||||
AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D), |
||||
AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0), |
||||
AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF), |
||||
AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0), |
||||
AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626), |
||||
AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC), |
||||
AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1), |
||||
AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515), |
||||
AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3), |
||||
AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A), |
||||
AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2), |
||||
AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575), |
||||
AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A), |
||||
AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0), |
||||
AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3), |
||||
AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484), |
||||
AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED), |
||||
AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B), |
||||
AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939), |
||||
AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF), |
||||
AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB), |
||||
AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585), |
||||
AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F), |
||||
AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8), |
||||
AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F), |
||||
AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5), |
||||
AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121), |
||||
AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2), |
||||
AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC), |
||||
AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717), |
||||
AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D), |
||||
AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373), |
||||
AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC), |
||||
AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888), |
||||
AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414), |
||||
AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB), |
||||
AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A), |
||||
AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C), |
||||
AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262), |
||||
AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979), |
||||
AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D), |
||||
AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9), |
||||
AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA), |
||||
AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808), |
||||
AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E), |
||||
AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6), |
||||
AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F), |
||||
AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A), |
||||
AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666), |
||||
AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E), |
||||
AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9), |
||||
AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E), |
||||
AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111), |
||||
AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494), |
||||
AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9), |
||||
AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF), |
||||
AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D), |
||||
AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868), |
||||
AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F), |
||||
AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616) |
||||
}; |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
@ -0,0 +1,957 @@
@@ -0,0 +1,957 @@
|
||||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */ |
||||
/*
|
||||
* BMW implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <string.h> |
||||
#include <limits.h> |
||||
|
||||
#include "sph_bmw.h" |
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW |
||||
#define SPH_SMALL_FOOTPRINT_BMW 1 |
||||
#endif |
||||
|
||||
#ifdef _MSC_VER |
||||
#pragma warning (disable: 4146) |
||||
#endif |
||||
|
||||
static const sph_u32 IV224[] = { |
||||
SPH_C32(0x00010203), SPH_C32(0x04050607), |
||||
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F), |
||||
SPH_C32(0x10111213), SPH_C32(0x14151617), |
||||
SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F), |
||||
SPH_C32(0x20212223), SPH_C32(0x24252627), |
||||
SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F), |
||||
SPH_C32(0x30313233), SPH_C32(0x34353637), |
||||
SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F) |
||||
}; |
||||
|
||||
static const sph_u32 IV256[] = { |
||||
SPH_C32(0x40414243), SPH_C32(0x44454647), |
||||
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F), |
||||
SPH_C32(0x50515253), SPH_C32(0x54555657), |
||||
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F), |
||||
SPH_C32(0x60616263), SPH_C32(0x64656667), |
||||
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F), |
||||
SPH_C32(0x70717273), SPH_C32(0x74757677), |
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F) |
||||
}; |
||||
|
||||
#if SPH_64 |
||||
|
||||
static const sph_u64 IV384[] = { |
||||
SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F), |
||||
SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F), |
||||
SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F), |
||||
SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F), |
||||
SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F), |
||||
SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F), |
||||
SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F), |
||||
SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F) |
||||
}; |
||||
|
||||
static const sph_u64 IV512[] = { |
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F), |
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F), |
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF), |
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF), |
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF), |
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF), |
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF), |
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF) |
||||
}; |
||||
|
||||
#endif |
||||
|
||||
#define XCAT(x, y) XCAT_(x, y) |
||||
#define XCAT_(x, y) x ## y |
||||
|
||||
#define LPAR ( |
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 |
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 |
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 |
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 |
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 |
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 |
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 |
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 |
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 |
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 |
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 |
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 |
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 |
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 |
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 |
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11 |
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12 |
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13 |
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14 |
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15 |
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16 |
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1 |
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2 |
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3 |
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4 |
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5 |
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6 |
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7 |
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8 |
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9 |
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10 |
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \ |
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19)) |
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \ |
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23)) |
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \ |
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25)) |
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \ |
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29)) |
||||
#define ss4(x) (((x) >> 1) ^ (x)) |
||||
#define ss5(x) (((x) >> 2) ^ (x)) |
||||
#define rs1(x) SPH_ROTL32(x, 3) |
||||
#define rs2(x) SPH_ROTL32(x, 7) |
||||
#define rs3(x) SPH_ROTL32(x, 13) |
||||
#define rs4(x) SPH_ROTL32(x, 16) |
||||
#define rs5(x) SPH_ROTL32(x, 19) |
||||
#define rs6(x) SPH_ROTL32(x, 23) |
||||
#define rs7(x) SPH_ROTL32(x, 27) |
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555)) |
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \ |
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \ |
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m)) |
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \ |
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \ |
||||
i9, i10, i11, i12, i13, i14, i15, \ |
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ |
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \ |
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \ |
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \ |
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \ |
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) |
||||
|
||||
#define expand1s(qf, mf, hf, i16) \ |
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) |
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \ |
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy) |
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \ |
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \ |
||||
i9, i10, i11, i12, i13, i14, i15, \ |
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ |
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \ |
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \ |
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \ |
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \ |
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) |
||||
|
||||
#define expand2s(qf, mf, hf, i16) \ |
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) |
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \ |
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy) |
||||
|
||||
#if SPH_64 |
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \ |
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37)) |
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \ |
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43)) |
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \ |
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53)) |
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \ |
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59)) |
||||
#define sb4(x) (((x) >> 1) ^ (x)) |
||||
#define sb5(x) (((x) >> 2) ^ (x)) |
||||
#define rb1(x) SPH_ROTL64(x, 5) |
||||
#define rb2(x) SPH_ROTL64(x, 11) |
||||
#define rb3(x) SPH_ROTL64(x, 27) |
||||
#define rb4(x) SPH_ROTL64(x, 32) |
||||
#define rb5(x) SPH_ROTL64(x, 37) |
||||
#define rb6(x) SPH_ROTL64(x, 43) |
||||
#define rb7(x) SPH_ROTL64(x, 53) |
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555)) |
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW |
||||
|
||||
static const sph_u64 Kb_tab[] = { |
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23), |
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31) |
||||
}; |
||||
|
||||
#define rol_off(mf, j, off) \ |
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1) |
||||
|
||||
#define add_elt_b(mf, hf, j) \ |
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \ |
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15)) |
||||
|
||||
#define expand1b(qf, mf, hf, i) \ |
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \ |
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \ |
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \ |
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \ |
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \ |
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \ |
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \ |
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \ |
||||
+ add_elt_b(mf, hf, (i) - 16)) |
||||
|
||||
#define expand2b(qf, mf, hf, i) \ |
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \ |
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \ |
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \ |
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \ |
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \ |
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \ |
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \ |
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \ |
||||
+ add_elt_b(mf, hf, (i) - 16)) |
||||
|
||||
#else |
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \ |
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \ |
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m)) |
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \ |
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \ |
||||
i9, i10, i11, i12, i13, i14, i15, \ |
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ |
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \ |
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \ |
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \ |
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \ |
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) |
||||
|
||||
#define expand1b(qf, mf, hf, i16) \ |
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) |
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \ |
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy) |
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \ |
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \ |
||||
i9, i10, i11, i12, i13, i14, i15, \ |
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \ |
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \ |
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \ |
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \ |
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \ |
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16)) |
||||
|
||||
#define expand2b(qf, mf, hf, i16) \ |
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16) |
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \ |
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy) |
||||
|
||||
#endif |
||||
|
||||
#endif |
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \ |
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \ |
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4))) |
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14) |
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15) |
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15) |
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13) |
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14) |
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15) |
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13) |
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14) |
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15) |
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14) |
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15) |
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9) |
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10) |
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11) |
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12) |
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13) |
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW |
||||
|
||||
#define MAKE_Qas do { \ |
||||
unsigned u; \ |
||||
sph_u32 Ws[16]; \ |
||||
Ws[ 0] = Ws0; \ |
||||
Ws[ 1] = Ws1; \ |
||||
Ws[ 2] = Ws2; \ |
||||
Ws[ 3] = Ws3; \ |
||||
Ws[ 4] = Ws4; \ |
||||
Ws[ 5] = Ws5; \ |
||||
Ws[ 6] = Ws6; \ |
||||
Ws[ 7] = Ws7; \ |
||||
Ws[ 8] = Ws8; \ |
||||
Ws[ 9] = Ws9; \ |
||||
Ws[10] = Ws10; \ |
||||
Ws[11] = Ws11; \ |
||||
Ws[12] = Ws12; \ |
||||
Ws[13] = Ws13; \ |
||||
Ws[14] = Ws14; \ |
||||
Ws[15] = Ws15; \ |
||||
for (u = 0; u < 15; u += 5) { \ |
||||
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \ |
||||
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \ |
||||
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \ |
||||
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \ |
||||
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \ |
||||
} \ |
||||
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \ |
||||
} while (0) |
||||
|
||||
#define MAKE_Qbs do { \ |
||||
qt[16] = expand1s(Qs, M, H, 16); \ |
||||
qt[17] = expand1s(Qs, M, H, 17); \ |
||||
qt[18] = expand2s(Qs, M, H, 18); \ |
||||
qt[19] = expand2s(Qs, M, H, 19); \ |
||||
qt[20] = expand2s(Qs, M, H, 20); \ |
||||
qt[21] = expand2s(Qs, M, H, 21); \ |
||||
qt[22] = expand2s(Qs, M, H, 22); \ |
||||
qt[23] = expand2s(Qs, M, H, 23); \ |
||||
qt[24] = expand2s(Qs, M, H, 24); \ |
||||
qt[25] = expand2s(Qs, M, H, 25); \ |
||||
qt[26] = expand2s(Qs, M, H, 26); \ |
||||
qt[27] = expand2s(Qs, M, H, 27); \ |
||||
qt[28] = expand2s(Qs, M, H, 28); \ |
||||
qt[29] = expand2s(Qs, M, H, 29); \ |
||||
qt[30] = expand2s(Qs, M, H, 30); \ |
||||
qt[31] = expand2s(Qs, M, H, 31); \ |
||||
} while (0) |
||||
|
||||
#else |
||||
|
||||
#define MAKE_Qas do { \ |
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \ |
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \ |
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \ |
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \ |
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \ |
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \ |
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \ |
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \ |
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \ |
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \ |
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \ |
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \ |
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \ |
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \ |
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \ |
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \ |
||||
} while (0) |
||||
|
||||
#define MAKE_Qbs do { \ |
||||
qt[16] = expand1s(Qs, M, H, 16); \ |
||||
qt[17] = expand1s(Qs, M, H, 17); \ |
||||
qt[18] = expand2s(Qs, M, H, 18); \ |
||||
qt[19] = expand2s(Qs, M, H, 19); \ |
||||
qt[20] = expand2s(Qs, M, H, 20); \ |
||||
qt[21] = expand2s(Qs, M, H, 21); \ |
||||
qt[22] = expand2s(Qs, M, H, 22); \ |
||||
qt[23] = expand2s(Qs, M, H, 23); \ |
||||
qt[24] = expand2s(Qs, M, H, 24); \ |
||||
qt[25] = expand2s(Qs, M, H, 25); \ |
||||
qt[26] = expand2s(Qs, M, H, 26); \ |
||||
qt[27] = expand2s(Qs, M, H, 27); \ |
||||
qt[28] = expand2s(Qs, M, H, 28); \ |
||||
qt[29] = expand2s(Qs, M, H, 29); \ |
||||
qt[30] = expand2s(Qs, M, H, 30); \ |
||||
qt[31] = expand2s(Qs, M, H, 31); \ |
||||
} while (0) |
||||
|
||||
#endif |
||||
|
||||
#define MAKE_Qs do { \ |
||||
MAKE_Qas; \ |
||||
MAKE_Qbs; \ |
||||
} while (0) |
||||
|
||||
#define Qs(j) (qt[j]) |
||||
|
||||
#if SPH_64 |
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14) |
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15) |
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15) |
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13) |
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14) |
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15) |
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13) |
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14) |
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15) |
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14) |
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15) |
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9) |
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10) |
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11) |
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12) |
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13) |
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW |
||||
|
||||
#define MAKE_Qab do { \ |
||||
unsigned u; \ |
||||
sph_u64 Wb[16]; \ |
||||
Wb[ 0] = Wb0; \ |
||||
Wb[ 1] = Wb1; \ |
||||
Wb[ 2] = Wb2; \ |
||||
Wb[ 3] = Wb3; \ |
||||
Wb[ 4] = Wb4; \ |
||||
Wb[ 5] = Wb5; \ |
||||
Wb[ 6] = Wb6; \ |
||||
Wb[ 7] = Wb7; \ |
||||
Wb[ 8] = Wb8; \ |
||||
Wb[ 9] = Wb9; \ |
||||
Wb[10] = Wb10; \ |
||||
Wb[11] = Wb11; \ |
||||
Wb[12] = Wb12; \ |
||||
Wb[13] = Wb13; \ |
||||
Wb[14] = Wb14; \ |
||||
Wb[15] = Wb15; \ |
||||
for (u = 0; u < 15; u += 5) { \ |
||||
qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \ |
||||
qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \ |
||||
qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \ |
||||
qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \ |
||||
qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \ |
||||
} \ |
||||
qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \ |
||||
} while (0) |
||||
|
||||
#define MAKE_Qbb do { \ |
||||
unsigned u; \ |
||||
for (u = 16; u < 18; u ++) \ |
||||
qt[u] = expand1b(Qb, M, H, u); \ |
||||
for (u = 18; u < 32; u ++) \ |
||||
qt[u] = expand2b(Qb, M, H, u); \ |
||||
} while (0) |
||||
|
||||
#else |
||||
|
||||
#define MAKE_Qab do { \ |
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \ |
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \ |
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \ |
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \ |
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \ |
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \ |
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \ |
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \ |
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \ |
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \ |
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \ |
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \ |
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \ |
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \ |
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \ |
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \ |
||||
} while (0) |
||||
|
||||
#define MAKE_Qbb do { \ |
||||
qt[16] = expand1b(Qb, M, H, 16); \ |
||||
qt[17] = expand1b(Qb, M, H, 17); \ |
||||
qt[18] = expand2b(Qb, M, H, 18); \ |
||||
qt[19] = expand2b(Qb, M, H, 19); \ |
||||
qt[20] = expand2b(Qb, M, H, 20); \ |
||||
qt[21] = expand2b(Qb, M, H, 21); \ |
||||
qt[22] = expand2b(Qb, M, H, 22); \ |
||||
qt[23] = expand2b(Qb, M, H, 23); \ |
||||
qt[24] = expand2b(Qb, M, H, 24); \ |
||||
qt[25] = expand2b(Qb, M, H, 25); \ |
||||
qt[26] = expand2b(Qb, M, H, 26); \ |
||||
qt[27] = expand2b(Qb, M, H, 27); \ |
||||
qt[28] = expand2b(Qb, M, H, 28); \ |
||||
qt[29] = expand2b(Qb, M, H, 29); \ |
||||
qt[30] = expand2b(Qb, M, H, 30); \ |
||||
qt[31] = expand2b(Qb, M, H, 31); \ |
||||
} while (0) |
||||
|
||||
#endif |
||||
|
||||
#define MAKE_Qb do { \ |
||||
MAKE_Qab; \ |
||||
MAKE_Qbb; \ |
||||
} while (0) |
||||
|
||||
#define Qb(j) (qt[j]) |
||||
|
||||
#endif |
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \ |
||||
type qt[32], xl, xh; \ |
||||
mkQ; \ |
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \ |
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \ |
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \ |
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \ |
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \ |
||||
+ (xl ^ qf(24) ^ qf( 0))); \ |
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \ |
||||
+ (xl ^ qf(25) ^ qf( 1))); \ |
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \ |
||||
+ (xl ^ qf(26) ^ qf( 2))); \ |
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \ |
||||
+ (xl ^ qf(27) ^ qf( 3))); \ |
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \ |
||||
+ (xl ^ qf(28) ^ qf( 4))); \ |
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \ |
||||
+ (xl ^ qf(29) ^ qf( 5))); \ |
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \ |
||||
+ (xl ^ qf(30) ^ qf( 6))); \ |
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \ |
||||
+ (xl ^ qf(31) ^ qf( 7))); \ |
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \ |
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \ |
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \ |
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \ |
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \ |
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \ |
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \ |
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \ |
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \ |
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \ |
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \ |
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \ |
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \ |
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \ |
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \ |
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \ |
||||
} while (0) |
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH) |
||||
|
||||
#if SPH_64 |
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH) |
||||
|
||||
#endif |
||||
|
||||
static void |
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16]) |
||||
{ |
||||
#if SPH_LITTLE_FAST |
||||
#define M(x) sph_dec32le_aligned(data + 4 * (x)) |
||||
#else |
||||
sph_u32 mv[16]; |
||||
|
||||
mv[ 0] = sph_dec32le_aligned(data + 0); |
||||
mv[ 1] = sph_dec32le_aligned(data + 4); |
||||
mv[ 2] = sph_dec32le_aligned(data + 8); |
||||
mv[ 3] = sph_dec32le_aligned(data + 12); |
||||
mv[ 4] = sph_dec32le_aligned(data + 16); |
||||
mv[ 5] = sph_dec32le_aligned(data + 20); |
||||
mv[ 6] = sph_dec32le_aligned(data + 24); |
||||
mv[ 7] = sph_dec32le_aligned(data + 28); |
||||
mv[ 8] = sph_dec32le_aligned(data + 32); |
||||
mv[ 9] = sph_dec32le_aligned(data + 36); |
||||
mv[10] = sph_dec32le_aligned(data + 40); |
||||
mv[11] = sph_dec32le_aligned(data + 44); |
||||
mv[12] = sph_dec32le_aligned(data + 48); |
||||
mv[13] = sph_dec32le_aligned(data + 52); |
||||
mv[14] = sph_dec32le_aligned(data + 56); |
||||
mv[15] = sph_dec32le_aligned(data + 60); |
||||
#define M(x) (mv[x]) |
||||
#endif |
||||
#define H(x) (h[x]) |
||||
#define dH(x) (dh[x]) |
||||
|
||||
FOLDs; |
||||
|
||||
#undef M |
||||
#undef H |
||||
#undef dH |
||||
} |
||||
|
||||
static const sph_u32 final_s[16] = { |
||||
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2), |
||||
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5), |
||||
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8), |
||||
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab), |
||||
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae), |
||||
SPH_C32(0xaaaaaaaf) |
||||
}; |
||||
|
||||
static void |
||||
bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv) |
||||
{ |
||||
memcpy(sc->H, iv, sizeof sc->H); |
||||
sc->ptr = 0; |
||||
#if SPH_64 |
||||
sc->bit_count = 0; |
||||
#else |
||||
sc->bit_count_high = 0; |
||||
sc->bit_count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
static void |
||||
bmw32(sph_bmw_small_context *sc, const void *data, size_t len) |
||||
{ |
||||
unsigned char *buf; |
||||
size_t ptr; |
||||
sph_u32 htmp[16]; |
||||
sph_u32 *h1, *h2; |
||||
#if !SPH_64 |
||||
sph_u32 tmp; |
||||
#endif |
||||
|
||||
#if SPH_64 |
||||
sc->bit_count += (sph_u64)len << 3; |
||||
#else |
||||
tmp = sc->bit_count_low; |
||||
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3)); |
||||
if (sc->bit_count_low < tmp) |
||||
sc->bit_count_high ++; |
||||
sc->bit_count_high += len >> 29; |
||||
#endif |
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
h1 = sc->H; |
||||
h2 = htmp; |
||||
while (len > 0) { |
||||
size_t clen; |
||||
|
||||
clen = (sizeof sc->buf) - ptr; |
||||
if (clen > len) |
||||
clen = len; |
||||
memcpy(buf + ptr, data, clen); |
||||
data = (const unsigned char *)data + clen; |
||||
len -= clen; |
||||
ptr += clen; |
||||
if (ptr == sizeof sc->buf) { |
||||
sph_u32 *ht; |
||||
|
||||
compress_small(buf, h1, h2); |
||||
ht = h1; |
||||
h1 = h2; |
||||
h2 = ht; |
||||
ptr = 0; |
||||
} |
||||
} |
||||
sc->ptr = ptr; |
||||
if (h1 != sc->H) |
||||
memcpy(sc->H, h1, sizeof sc->H); |
||||
} |
||||
|
||||
static void |
||||
bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n, |
||||
void *dst, size_t out_size_w32) |
||||
{ |
||||
unsigned char *buf, *out; |
||||
size_t ptr, u, v; |
||||
unsigned z; |
||||
sph_u32 h1[16], h2[16], *h; |
||||
|
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
z = 0x80 >> n; |
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF; |
||||
h = sc->H; |
||||
if (ptr > (sizeof sc->buf) - 8) { |
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr); |
||||
compress_small(buf, h, h1); |
||||
ptr = 0; |
||||
h = h1; |
||||
} |
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr); |
||||
#if SPH_64 |
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8, |
||||
SPH_T64(sc->bit_count + n)); |
||||
#else |
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8, |
||||
sc->bit_count_low + n); |
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4, |
||||
SPH_T32(sc->bit_count_high)); |
||||
#endif |
||||
compress_small(buf, h, h2); |
||||
for (u = 0; u < 16; u ++) |
||||
sph_enc32le_aligned(buf + 4 * u, h2[u]); |
||||
compress_small(buf, final_s, h1); |
||||
out = dst; |
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++) |
||||
sph_enc32le(out + 4 * u, h1[v]); |
||||
} |
||||
|
||||
#if SPH_64 |
||||
|
||||
static void |
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16]) |
||||
{ |
||||
#if SPH_LITTLE_FAST |
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x)) |
||||
#else |
||||
sph_u64 mv[16]; |
||||
|
||||
mv[ 0] = sph_dec64le_aligned(data + 0); |
||||
mv[ 1] = sph_dec64le_aligned(data + 8); |
||||
mv[ 2] = sph_dec64le_aligned(data + 16); |
||||
mv[ 3] = sph_dec64le_aligned(data + 24); |
||||
mv[ 4] = sph_dec64le_aligned(data + 32); |
||||
mv[ 5] = sph_dec64le_aligned(data + 40); |
||||
mv[ 6] = sph_dec64le_aligned(data + 48); |
||||
mv[ 7] = sph_dec64le_aligned(data + 56); |
||||
mv[ 8] = sph_dec64le_aligned(data + 64); |
||||
mv[ 9] = sph_dec64le_aligned(data + 72); |
||||
mv[10] = sph_dec64le_aligned(data + 80); |
||||
mv[11] = sph_dec64le_aligned(data + 88); |
||||
mv[12] = sph_dec64le_aligned(data + 96); |
||||
mv[13] = sph_dec64le_aligned(data + 104); |
||||
mv[14] = sph_dec64le_aligned(data + 112); |
||||
mv[15] = sph_dec64le_aligned(data + 120); |
||||
#define M(x) (mv[x]) |
||||
#endif |
||||
#define H(x) (h[x]) |
||||
#define dH(x) (dh[x]) |
||||
|
||||
FOLDb; |
||||
|
||||
#undef M |
||||
#undef H |
||||
#undef dH |
||||
} |
||||
|
||||
static const sph_u64 final_b[16] = { |
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1), |
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3), |
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5), |
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7), |
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9), |
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab), |
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad), |
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf) |
||||
}; |
||||
|
||||
static void |
||||
bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv) |
||||
{ |
||||
memcpy(sc->H, iv, sizeof sc->H); |
||||
sc->ptr = 0; |
||||
sc->bit_count = 0; |
||||
} |
||||
|
||||
static void |
||||
bmw64(sph_bmw_big_context *sc, const void *data, size_t len) |
||||
{ |
||||
unsigned char *buf; |
||||
size_t ptr; |
||||
sph_u64 htmp[16]; |
||||
sph_u64 *h1, *h2; |
||||
|
||||
sc->bit_count += (sph_u64)len << 3; |
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
h1 = sc->H; |
||||
h2 = htmp; |
||||
while (len > 0) { |
||||
size_t clen; |
||||
|
||||
clen = (sizeof sc->buf) - ptr; |
||||
if (clen > len) |
||||
clen = len; |
||||
memcpy(buf + ptr, data, clen); |
||||
data = (const unsigned char *)data + clen; |
||||
len -= clen; |
||||
ptr += clen; |
||||
if (ptr == sizeof sc->buf) { |
||||
sph_u64 *ht; |
||||
|
||||
compress_big(buf, h1, h2); |
||||
ht = h1; |
||||
h1 = h2; |
||||
h2 = ht; |
||||
ptr = 0; |
||||
} |
||||
} |
||||
sc->ptr = ptr; |
||||
if (h1 != sc->H) |
||||
memcpy(sc->H, h1, sizeof sc->H); |
||||
} |
||||
|
||||
static void |
||||
bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n, |
||||
void *dst, size_t out_size_w64) |
||||
{ |
||||
unsigned char *buf, *out; |
||||
size_t ptr, u, v; |
||||
unsigned z; |
||||
sph_u64 h1[16], h2[16], *h; |
||||
|
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
z = 0x80 >> n; |
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF; |
||||
h = sc->H; |
||||
if (ptr > (sizeof sc->buf) - 8) { |
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr); |
||||
compress_big(buf, h, h1); |
||||
ptr = 0; |
||||
h = h1; |
||||
} |
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr); |
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8, |
||||
SPH_T64(sc->bit_count + n)); |
||||
compress_big(buf, h, h2); |
||||
for (u = 0; u < 16; u ++) |
||||
sph_enc64le_aligned(buf + 8 * u, h2[u]); |
||||
compress_big(buf, final_b, h1); |
||||
out = dst; |
||||
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++) |
||||
sph_enc64le(out + 8 * u, h1[v]); |
||||
} |
||||
|
||||
#endif |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw224_init(void *cc) |
||||
{ |
||||
bmw32_init(cc, IV224); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw224(void *cc, const void *data, size_t len) |
||||
{ |
||||
bmw32(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw224_close(void *cc, void *dst) |
||||
{ |
||||
sph_bmw224_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
bmw32_close(cc, ub, n, dst, 7); |
||||
sph_bmw224_init(cc); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw256_init(void *cc) |
||||
{ |
||||
bmw32_init(cc, IV256); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw256(void *cc, const void *data, size_t len) |
||||
{ |
||||
bmw32(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw256_close(void *cc, void *dst) |
||||
{ |
||||
sph_bmw256_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
bmw32_close(cc, ub, n, dst, 8); |
||||
sph_bmw256_init(cc); |
||||
} |
||||
|
||||
#if SPH_64 |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw384_init(void *cc) |
||||
{ |
||||
bmw64_init(cc, IV384); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw384(void *cc, const void *data, size_t len) |
||||
{ |
||||
bmw64(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw384_close(void *cc, void *dst) |
||||
{ |
||||
sph_bmw384_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
bmw64_close(cc, ub, n, dst, 6); |
||||
sph_bmw384_init(cc); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw512_init(void *cc) |
||||
{ |
||||
bmw64_init(cc, IV512); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw512(void *cc, const void *data, size_t len) |
||||
{ |
||||
bmw64(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw512_close(void *cc, void *dst) |
||||
{ |
||||
sph_bmw512_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_bmw.h */ |
||||
void |
||||
sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
bmw64_close(cc, ub, n, dst, 8); |
||||
sph_bmw512_init(cc); |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,723 @@
@@ -0,0 +1,723 @@
|
||||
/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */ |
||||
/*
|
||||
* CubeHash implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <string.h> |
||||
#include <limits.h> |
||||
|
||||
#include "sph_cubehash.h" |
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH |
||||
#define SPH_SMALL_FOOTPRINT_CUBEHASH 1 |
||||
#endif |
||||
|
||||
/*
|
||||
* Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit |
||||
* mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302). |
||||
* It appears that the optimal settings are: |
||||
* -- full unroll, no state copy on the "big" systems (x86, PowerPC) |
||||
* -- unroll to 4 or 8, state copy on the "small" system (MIPS) |
||||
*/ |
||||
|
||||
#if SPH_SMALL_FOOTPRINT_CUBEHASH |
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL |
||||
#define SPH_CUBEHASH_UNROLL 4 |
||||
#endif |
||||
#if !defined SPH_CUBEHASH_NOCOPY |
||||
#define SPH_CUBEHASH_NOCOPY 1 |
||||
#endif |
||||
|
||||
#else |
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL |
||||
#define SPH_CUBEHASH_UNROLL 0 |
||||
#endif |
||||
#if !defined SPH_CUBEHASH_NOCOPY |
||||
#define SPH_CUBEHASH_NOCOPY 0 |
||||
#endif |
||||
|
||||
#endif |
||||
|
||||
#ifdef _MSC_VER |
||||
#pragma warning (disable: 4146) |
||||
#endif |
||||
|
||||
static const sph_u32 IV224[] = { |
||||
SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22), |
||||
SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24), |
||||
SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3), |
||||
SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774), |
||||
SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66), |
||||
SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0), |
||||
SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314), |
||||
SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE), |
||||
SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF), |
||||
SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE), |
||||
SPH_C32(0xFD20151C), SPH_C32(0x00CB573E) |
||||
}; |
||||
|
||||
static const sph_u32 IV256[] = { |
||||
SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71), |
||||
SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63), |
||||
SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696), |
||||
SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C), |
||||
SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00), |
||||
SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5), |
||||
SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549), |
||||
SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285), |
||||
SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD), |
||||
SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6), |
||||
SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A) |
||||
}; |
||||
|
||||
static const sph_u32 IV384[] = { |
||||
SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453), |
||||
SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88), |
||||
SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922), |
||||
SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052), |
||||
SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E), |
||||
SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D), |
||||
SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70), |
||||
SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4), |
||||
SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC), |
||||
SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01), |
||||
SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E) |
||||
}; |
||||
|
||||
static const sph_u32 IV512[] = { |
||||
SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B), |
||||
SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C), |
||||
SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787), |
||||
SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537), |
||||
SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33), |
||||
SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485), |
||||
SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159), |
||||
SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9), |
||||
SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456), |
||||
SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1), |
||||
SPH_C32(0x7795D246), SPH_C32(0xD43E3B44) |
||||
}; |
||||
|
||||
#define T32 SPH_T32 |
||||
#define ROTL32 SPH_ROTL32 |
||||
|
||||
#if SPH_CUBEHASH_NOCOPY |
||||
|
||||
#define DECL_STATE |
||||
#define READ_STATE(cc) |
||||
#define WRITE_STATE(cc) |
||||
|
||||
#define x0 ((sc)->state[ 0]) |
||||
#define x1 ((sc)->state[ 1]) |
||||
#define x2 ((sc)->state[ 2]) |
||||
#define x3 ((sc)->state[ 3]) |
||||
#define x4 ((sc)->state[ 4]) |
||||
#define x5 ((sc)->state[ 5]) |
||||
#define x6 ((sc)->state[ 6]) |
||||
#define x7 ((sc)->state[ 7]) |
||||
#define x8 ((sc)->state[ 8]) |
||||
#define x9 ((sc)->state[ 9]) |
||||
#define xa ((sc)->state[10]) |
||||
#define xb ((sc)->state[11]) |
||||
#define xc ((sc)->state[12]) |
||||
#define xd ((sc)->state[13]) |
||||
#define xe ((sc)->state[14]) |
||||
#define xf ((sc)->state[15]) |
||||
#define xg ((sc)->state[16]) |
||||
#define xh ((sc)->state[17]) |
||||
#define xi ((sc)->state[18]) |
||||
#define xj ((sc)->state[19]) |
||||
#define xk ((sc)->state[20]) |
||||
#define xl ((sc)->state[21]) |
||||
#define xm ((sc)->state[22]) |
||||
#define xn ((sc)->state[23]) |
||||
#define xo ((sc)->state[24]) |
||||
#define xp ((sc)->state[25]) |
||||
#define xq ((sc)->state[26]) |
||||
#define xr ((sc)->state[27]) |
||||
#define xs ((sc)->state[28]) |
||||
#define xt ((sc)->state[29]) |
||||
#define xu ((sc)->state[30]) |
||||
#define xv ((sc)->state[31]) |
||||
|
||||
#else |
||||
|
||||
#define DECL_STATE \ |
||||
sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \ |
||||
sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \ |
||||
sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \ |
||||
sph_u32 xo, xp, xq, xr, xs, xt, xu, xv; |
||||
|
||||
#define READ_STATE(cc) do { \ |
||||
x0 = (cc)->state[ 0]; \ |
||||
x1 = (cc)->state[ 1]; \ |
||||
x2 = (cc)->state[ 2]; \ |
||||
x3 = (cc)->state[ 3]; \ |
||||
x4 = (cc)->state[ 4]; \ |
||||
x5 = (cc)->state[ 5]; \ |
||||
x6 = (cc)->state[ 6]; \ |
||||
x7 = (cc)->state[ 7]; \ |
||||
x8 = (cc)->state[ 8]; \ |
||||
x9 = (cc)->state[ 9]; \ |
||||
xa = (cc)->state[10]; \ |
||||
xb = (cc)->state[11]; \ |
||||
xc = (cc)->state[12]; \ |
||||
xd = (cc)->state[13]; \ |
||||
xe = (cc)->state[14]; \ |
||||
xf = (cc)->state[15]; \ |
||||
xg = (cc)->state[16]; \ |
||||
xh = (cc)->state[17]; \ |
||||
xi = (cc)->state[18]; \ |
||||
xj = (cc)->state[19]; \ |
||||
xk = (cc)->state[20]; \ |
||||
xl = (cc)->state[21]; \ |
||||
xm = (cc)->state[22]; \ |
||||
xn = (cc)->state[23]; \ |
||||
xo = (cc)->state[24]; \ |
||||
xp = (cc)->state[25]; \ |
||||
xq = (cc)->state[26]; \ |
||||
xr = (cc)->state[27]; \ |
||||
xs = (cc)->state[28]; \ |
||||
xt = (cc)->state[29]; \ |
||||
xu = (cc)->state[30]; \ |
||||
xv = (cc)->state[31]; \ |
||||
} while (0) |
||||
|
||||
#define WRITE_STATE(cc) do { \ |
||||
(cc)->state[ 0] = x0; \ |
||||
(cc)->state[ 1] = x1; \ |
||||
(cc)->state[ 2] = x2; \ |
||||
(cc)->state[ 3] = x3; \ |
||||
(cc)->state[ 4] = x4; \ |
||||
(cc)->state[ 5] = x5; \ |
||||
(cc)->state[ 6] = x6; \ |
||||
(cc)->state[ 7] = x7; \ |
||||
(cc)->state[ 8] = x8; \ |
||||
(cc)->state[ 9] = x9; \ |
||||
(cc)->state[10] = xa; \ |
||||
(cc)->state[11] = xb; \ |
||||
(cc)->state[12] = xc; \ |
||||
(cc)->state[13] = xd; \ |
||||
(cc)->state[14] = xe; \ |
||||
(cc)->state[15] = xf; \ |
||||
(cc)->state[16] = xg; \ |
||||
(cc)->state[17] = xh; \ |
||||
(cc)->state[18] = xi; \ |
||||
(cc)->state[19] = xj; \ |
||||
(cc)->state[20] = xk; \ |
||||
(cc)->state[21] = xl; \ |
||||
(cc)->state[22] = xm; \ |
||||
(cc)->state[23] = xn; \ |
||||
(cc)->state[24] = xo; \ |
||||
(cc)->state[25] = xp; \ |
||||
(cc)->state[26] = xq; \ |
||||
(cc)->state[27] = xr; \ |
||||
(cc)->state[28] = xs; \ |
||||
(cc)->state[29] = xt; \ |
||||
(cc)->state[30] = xu; \ |
||||
(cc)->state[31] = xv; \ |
||||
} while (0) |
||||
|
||||
#endif |
||||
|
||||
#define INPUT_BLOCK do { \ |
||||
x0 ^= sph_dec32le_aligned(buf + 0); \ |
||||
x1 ^= sph_dec32le_aligned(buf + 4); \ |
||||
x2 ^= sph_dec32le_aligned(buf + 8); \ |
||||
x3 ^= sph_dec32le_aligned(buf + 12); \ |
||||
x4 ^= sph_dec32le_aligned(buf + 16); \ |
||||
x5 ^= sph_dec32le_aligned(buf + 20); \ |
||||
x6 ^= sph_dec32le_aligned(buf + 24); \ |
||||
x7 ^= sph_dec32le_aligned(buf + 28); \ |
||||
} while (0) |
||||
|
||||
#define ROUND_EVEN do { \ |
||||
xg = T32(x0 + xg); \ |
||||
x0 = ROTL32(x0, 7); \ |
||||
xh = T32(x1 + xh); \ |
||||
x1 = ROTL32(x1, 7); \ |
||||
xi = T32(x2 + xi); \ |
||||
x2 = ROTL32(x2, 7); \ |
||||
xj = T32(x3 + xj); \ |
||||
x3 = ROTL32(x3, 7); \ |
||||
xk = T32(x4 + xk); \ |
||||
x4 = ROTL32(x4, 7); \ |
||||
xl = T32(x5 + xl); \ |
||||
x5 = ROTL32(x5, 7); \ |
||||
xm = T32(x6 + xm); \ |
||||
x6 = ROTL32(x6, 7); \ |
||||
xn = T32(x7 + xn); \ |
||||
x7 = ROTL32(x7, 7); \ |
||||
xo = T32(x8 + xo); \ |
||||
x8 = ROTL32(x8, 7); \ |
||||
xp = T32(x9 + xp); \ |
||||
x9 = ROTL32(x9, 7); \ |
||||
xq = T32(xa + xq); \ |
||||
xa = ROTL32(xa, 7); \ |
||||
xr = T32(xb + xr); \ |
||||
xb = ROTL32(xb, 7); \ |
||||
xs = T32(xc + xs); \ |
||||
xc = ROTL32(xc, 7); \ |
||||
xt = T32(xd + xt); \ |
||||
xd = ROTL32(xd, 7); \ |
||||
xu = T32(xe + xu); \ |
||||
xe = ROTL32(xe, 7); \ |
||||
xv = T32(xf + xv); \ |
||||
xf = ROTL32(xf, 7); \ |
||||
x8 ^= xg; \ |
||||
x9 ^= xh; \ |
||||
xa ^= xi; \ |
||||
xb ^= xj; \ |
||||
xc ^= xk; \ |
||||
xd ^= xl; \ |
||||
xe ^= xm; \ |
||||
xf ^= xn; \ |
||||
x0 ^= xo; \ |
||||
x1 ^= xp; \ |
||||
x2 ^= xq; \ |
||||
x3 ^= xr; \ |
||||
x4 ^= xs; \ |
||||
x5 ^= xt; \ |
||||
x6 ^= xu; \ |
||||
x7 ^= xv; \ |
||||
xi = T32(x8 + xi); \ |
||||
x8 = ROTL32(x8, 11); \ |
||||
xj = T32(x9 + xj); \ |
||||
x9 = ROTL32(x9, 11); \ |
||||
xg = T32(xa + xg); \ |
||||
xa = ROTL32(xa, 11); \ |
||||
xh = T32(xb + xh); \ |
||||
xb = ROTL32(xb, 11); \ |
||||
xm = T32(xc + xm); \ |
||||
xc = ROTL32(xc, 11); \ |
||||
xn = T32(xd + xn); \ |
||||
xd = ROTL32(xd, 11); \ |
||||
xk = T32(xe + xk); \ |
||||
xe = ROTL32(xe, 11); \ |
||||
xl = T32(xf + xl); \ |
||||
xf = ROTL32(xf, 11); \ |
||||
xq = T32(x0 + xq); \ |
||||
x0 = ROTL32(x0, 11); \ |
||||
xr = T32(x1 + xr); \ |
||||
x1 = ROTL32(x1, 11); \ |
||||
xo = T32(x2 + xo); \ |
||||
x2 = ROTL32(x2, 11); \ |
||||
xp = T32(x3 + xp); \ |
||||
x3 = ROTL32(x3, 11); \ |
||||
xu = T32(x4 + xu); \ |
||||
x4 = ROTL32(x4, 11); \ |
||||
xv = T32(x5 + xv); \ |
||||
x5 = ROTL32(x5, 11); \ |
||||
xs = T32(x6 + xs); \ |
||||
x6 = ROTL32(x6, 11); \ |
||||
xt = T32(x7 + xt); \ |
||||
x7 = ROTL32(x7, 11); \ |
||||
xc ^= xi; \ |
||||
xd ^= xj; \ |
||||
xe ^= xg; \ |
||||
xf ^= xh; \ |
||||
x8 ^= xm; \ |
||||
x9 ^= xn; \ |
||||
xa ^= xk; \ |
||||
xb ^= xl; \ |
||||
x4 ^= xq; \ |
||||
x5 ^= xr; \ |
||||
x6 ^= xo; \ |
||||
x7 ^= xp; \ |
||||
x0 ^= xu; \ |
||||
x1 ^= xv; \ |
||||
x2 ^= xs; \ |
||||
x3 ^= xt; \ |
||||
} while (0) |
||||
|
||||
#define ROUND_ODD do { \ |
||||
xj = T32(xc + xj); \ |
||||
xc = ROTL32(xc, 7); \ |
||||
xi = T32(xd + xi); \ |
||||
xd = ROTL32(xd, 7); \ |
||||
xh = T32(xe + xh); \ |
||||
xe = ROTL32(xe, 7); \ |
||||
xg = T32(xf + xg); \ |
||||
xf = ROTL32(xf, 7); \ |
||||
xn = T32(x8 + xn); \ |
||||
x8 = ROTL32(x8, 7); \ |
||||
xm = T32(x9 + xm); \ |
||||
x9 = ROTL32(x9, 7); \ |
||||
xl = T32(xa + xl); \ |
||||
xa = ROTL32(xa, 7); \ |
||||
xk = T32(xb + xk); \ |
||||
xb = ROTL32(xb, 7); \ |
||||
xr = T32(x4 + xr); \ |
||||
x4 = ROTL32(x4, 7); \ |
||||
xq = T32(x5 + xq); \ |
||||
x5 = ROTL32(x5, 7); \ |
||||
xp = T32(x6 + xp); \ |
||||
x6 = ROTL32(x6, 7); \ |
||||
xo = T32(x7 + xo); \ |
||||
x7 = ROTL32(x7, 7); \ |
||||
xv = T32(x0 + xv); \ |
||||
x0 = ROTL32(x0, 7); \ |
||||
xu = T32(x1 + xu); \ |
||||
x1 = ROTL32(x1, 7); \ |
||||
xt = T32(x2 + xt); \ |
||||
x2 = ROTL32(x2, 7); \ |
||||
xs = T32(x3 + xs); \ |
||||
x3 = ROTL32(x3, 7); \ |
||||
x4 ^= xj; \ |
||||
x5 ^= xi; \ |
||||
x6 ^= xh; \ |
||||
x7 ^= xg; \ |
||||
x0 ^= xn; \ |
||||
x1 ^= xm; \ |
||||
x2 ^= xl; \ |
||||
x3 ^= xk; \ |
||||
xc ^= xr; \ |
||||
xd ^= xq; \ |
||||
xe ^= xp; \ |
||||
xf ^= xo; \ |
||||
x8 ^= xv; \ |
||||
x9 ^= xu; \ |
||||
xa ^= xt; \ |
||||
xb ^= xs; \ |
||||
xh = T32(x4 + xh); \ |
||||
x4 = ROTL32(x4, 11); \ |
||||
xg = T32(x5 + xg); \ |
||||
x5 = ROTL32(x5, 11); \ |
||||
xj = T32(x6 + xj); \ |
||||
x6 = ROTL32(x6, 11); \ |
||||
xi = T32(x7 + xi); \ |
||||
x7 = ROTL32(x7, 11); \ |
||||
xl = T32(x0 + xl); \ |
||||
x0 = ROTL32(x0, 11); \ |
||||
xk = T32(x1 + xk); \ |
||||
x1 = ROTL32(x1, 11); \ |
||||
xn = T32(x2 + xn); \ |
||||
x2 = ROTL32(x2, 11); \ |
||||
xm = T32(x3 + xm); \ |
||||
x3 = ROTL32(x3, 11); \ |
||||
xp = T32(xc + xp); \ |
||||
xc = ROTL32(xc, 11); \ |
||||
xo = T32(xd + xo); \ |
||||
xd = ROTL32(xd, 11); \ |
||||
xr = T32(xe + xr); \ |
||||
xe = ROTL32(xe, 11); \ |
||||
xq = T32(xf + xq); \ |
||||
xf = ROTL32(xf, 11); \ |
||||
xt = T32(x8 + xt); \ |
||||
x8 = ROTL32(x8, 11); \ |
||||
xs = T32(x9 + xs); \ |
||||
x9 = ROTL32(x9, 11); \ |
||||
xv = T32(xa + xv); \ |
||||
xa = ROTL32(xa, 11); \ |
||||
xu = T32(xb + xu); \ |
||||
xb = ROTL32(xb, 11); \ |
||||
x0 ^= xh; \ |
||||
x1 ^= xg; \ |
||||
x2 ^= xj; \ |
||||
x3 ^= xi; \ |
||||
x4 ^= xl; \ |
||||
x5 ^= xk; \ |
||||
x6 ^= xn; \ |
||||
x7 ^= xm; \ |
||||
x8 ^= xp; \ |
||||
x9 ^= xo; \ |
||||
xa ^= xr; \ |
||||
xb ^= xq; \ |
||||
xc ^= xt; \ |
||||
xd ^= xs; \ |
||||
xe ^= xv; \ |
||||
xf ^= xu; \ |
||||
} while (0) |
||||
|
||||
/*
|
||||
* There is no need to unroll all 16 rounds. The word-swapping permutation |
||||
* is an involution, so we need to unroll an even number of rounds. On |
||||
* "big" systems, unrolling 4 rounds yields about 97% of the speed |
||||
* achieved with full unrolling; and it keeps the code more compact |
||||
* for small architectures. |
||||
*/ |
||||
|
||||
#if SPH_CUBEHASH_UNROLL == 2 |
||||
|
||||
#define SIXTEEN_ROUNDS do { \ |
||||
int j; \ |
||||
for (j = 0; j < 8; j ++) { \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
} \ |
||||
} while (0) |
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 4 |
||||
|
||||
#define SIXTEEN_ROUNDS do { \ |
||||
int j; \ |
||||
for (j = 0; j < 4; j ++) { \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
} \ |
||||
} while (0) |
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 8 |
||||
|
||||
#define SIXTEEN_ROUNDS do { \ |
||||
int j; \ |
||||
for (j = 0; j < 2; j ++) { \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
} \ |
||||
} while (0) |
||||
|
||||
#else |
||||
|
||||
#define SIXTEEN_ROUNDS do { \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
ROUND_EVEN; \ |
||||
ROUND_ODD; \ |
||||
} while (0) |
||||
|
||||
#endif |
||||
|
||||
static void |
||||
cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv) |
||||
{ |
||||
memcpy(sc->state, iv, sizeof sc->state); |
||||
sc->ptr = 0; |
||||
} |
||||
|
||||
static void |
||||
cubehash_core(sph_cubehash_context *sc, const void *data, size_t len) |
||||
{ |
||||
unsigned char *buf; |
||||
size_t ptr; |
||||
DECL_STATE |
||||
|
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
if (len < (sizeof sc->buf) - ptr) { |
||||
memcpy(buf + ptr, data, len); |
||||
ptr += len; |
||||
sc->ptr = ptr; |
||||
return; |
||||
} |
||||
|
||||
READ_STATE(sc); |
||||
while (len > 0) { |
||||
size_t clen; |
||||
|
||||
clen = (sizeof sc->buf) - ptr; |
||||
if (clen > len) |
||||
clen = len; |
||||
memcpy(buf + ptr, data, clen); |
||||
ptr += clen; |
||||
data = (const unsigned char *)data + clen; |
||||
len -= clen; |
||||
if (ptr == sizeof sc->buf) { |
||||
INPUT_BLOCK; |
||||
SIXTEEN_ROUNDS; |
||||
ptr = 0; |
||||
} |
||||
} |
||||
WRITE_STATE(sc); |
||||
sc->ptr = ptr; |
||||
} |
||||
|
||||
static void |
||||
cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n, |
||||
void *dst, size_t out_size_w32) |
||||
{ |
||||
unsigned char *buf, *out; |
||||
size_t ptr; |
||||
unsigned z; |
||||
int i; |
||||
DECL_STATE |
||||
|
||||
buf = sc->buf; |
||||
ptr = sc->ptr; |
||||
z = 0x80 >> n; |
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF; |
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr); |
||||
READ_STATE(sc); |
||||
INPUT_BLOCK; |
||||
for (i = 0; i < 11; i ++) { |
||||
SIXTEEN_ROUNDS; |
||||
if (i == 0) |
||||
xv ^= SPH_C32(1); |
||||
} |
||||
WRITE_STATE(sc); |
||||
out = dst; |
||||
for (z = 0; z < out_size_w32; z ++) |
||||
sph_enc32le(out + (z << 2), sc->state[z]); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash224_init(void *cc) |
||||
{ |
||||
cubehash_init(cc, IV224); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash224(void *cc, const void *data, size_t len) |
||||
{ |
||||
cubehash_core(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash224_close(void *cc, void *dst) |
||||
{ |
||||
sph_cubehash224_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
cubehash_close(cc, ub, n, dst, 7); |
||||
sph_cubehash224_init(cc); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash256_init(void *cc) |
||||
{ |
||||
cubehash_init(cc, IV256); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash256(void *cc, const void *data, size_t len) |
||||
{ |
||||
cubehash_core(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash256_close(void *cc, void *dst) |
||||
{ |
||||
sph_cubehash256_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
cubehash_close(cc, ub, n, dst, 8); |
||||
sph_cubehash256_init(cc); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash384_init(void *cc) |
||||
{ |
||||
cubehash_init(cc, IV384); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash384(void *cc, const void *data, size_t len) |
||||
{ |
||||
cubehash_core(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash384_close(void *cc, void *dst) |
||||
{ |
||||
sph_cubehash384_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
cubehash_close(cc, ub, n, dst, 12); |
||||
sph_cubehash384_init(cc); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash512_init(void *cc) |
||||
{ |
||||
cubehash_init(cc, IV512); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash512(void *cc, const void *data, size_t len) |
||||
{ |
||||
cubehash_core(cc, data, len); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash512_close(void *cc, void *dst) |
||||
{ |
||||
sph_cubehash512_addbits_and_close(cc, 0, 0, dst); |
||||
} |
||||
|
||||
/* see sph_cubehash.h */ |
||||
void |
||||
sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
cubehash_close(cc, ub, n, dst, 16); |
||||
sph_cubehash512_init(cc); |
||||
} |
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
@ -0,0 +1,320 @@
@@ -0,0 +1,320 @@
|
||||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */ |
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of |
||||
* functions which differ by their output size; this implementation |
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_bmw.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_BMW_H__ |
||||
#define SPH_BMW_H__ |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-224. |
||||
*/ |
||||
#define SPH_SIZE_bmw224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-256. |
||||
*/ |
||||
#define SPH_SIZE_bmw256 256 |
||||
|
||||
#if SPH_64 |
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-384. |
||||
*/ |
||||
#define SPH_SIZE_bmw384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-512. |
||||
*/ |
||||
#define SPH_SIZE_bmw512 512 |
||||
|
||||
#endif |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 and BMW-256 computations: |
||||
* it contains the intermediate values and some data from the last |
||||
* entered block. Once a BMW computation has been performed, the |
||||
* context can be reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running BMW |
||||
* computation can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 H[16]; |
||||
#if SPH_64 |
||||
sph_u64 bit_count; |
||||
#else |
||||
sph_u32 bit_count_high, bit_count_low; |
||||
#endif |
||||
#endif |
||||
} sph_bmw_small_context; |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 computations. It is |
||||
* identical to the common <code>sph_bmw_small_context</code>. |
||||
*/ |
||||
typedef sph_bmw_small_context sph_bmw224_context; |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-256 computations. It is |
||||
* identical to the common <code>sph_bmw_small_context</code>. |
||||
*/ |
||||
typedef sph_bmw_small_context sph_bmw256_context; |
||||
|
||||
#if SPH_64 |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 and BMW-512 computations: |
||||
* it contains the intermediate values and some data from the last |
||||
* entered block. Once a BMW computation has been performed, the |
||||
* context can be reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running BMW |
||||
* computation can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[128]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u64 H[16]; |
||||
sph_u64 bit_count; |
||||
#endif |
||||
} sph_bmw_big_context; |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 computations. It is |
||||
* identical to the common <code>sph_bmw_small_context</code>. |
||||
*/ |
||||
typedef sph_bmw_big_context sph_bmw384_context; |
||||
|
||||
/**
|
||||
* This structure is a context for BMW-512 computations. It is |
||||
* identical to the common <code>sph_bmw_small_context</code>. |
||||
*/ |
||||
typedef sph_bmw_big_context sph_bmw512_context; |
||||
|
||||
#endif |
||||
|
||||
/**
|
||||
* Initialize a BMW-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the BMW-224 context (pointer to a |
||||
* <code>sph_bmw224_context</code>) |
||||
*/ |
||||
void sph_bmw224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the BMW-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_bmw224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current BMW-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the BMW-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the BMW-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a BMW-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the BMW-256 context (pointer to a |
||||
* <code>sph_bmw256_context</code>) |
||||
*/ |
||||
void sph_bmw256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the BMW-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_bmw256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current BMW-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the BMW-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the BMW-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#if SPH_64 |
||||
|
||||
/**
|
||||
* Initialize a BMW-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the BMW-384 context (pointer to a |
||||
* <code>sph_bmw384_context</code>) |
||||
*/ |
||||
void sph_bmw384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the BMW-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_bmw384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current BMW-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the BMW-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the BMW-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a BMW-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the BMW-512 context (pointer to a |
||||
* <code>sph_bmw512_context</code>) |
||||
*/ |
||||
void sph_bmw512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the BMW-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_bmw512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current BMW-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the BMW-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the BMW-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_bmw512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,292 @@
@@ -0,0 +1,292 @@
|
||||
/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */ |
||||
/**
|
||||
* CubeHash interface. CubeHash is a family of functions which differ by |
||||
* their output size; this implementation defines CubeHash for output |
||||
* sizes 224, 256, 384 and 512 bits, with the "standard parameters" |
||||
* (CubeHash16/32 with the CubeHash specification notations). |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_cubehash.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_CUBEHASH_H__ |
||||
#define SPH_CUBEHASH_H__ |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-224. |
||||
*/ |
||||
#define SPH_SIZE_cubehash224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-256. |
||||
*/ |
||||
#define SPH_SIZE_cubehash256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-384. |
||||
*/ |
||||
#define SPH_SIZE_cubehash384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-512. |
||||
*/ |
||||
#define SPH_SIZE_cubehash512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for CubeHash computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* a CubeHash computation has been performed, the context can be reused for |
||||
* another computation. |
||||
* |
||||
* The contents of this structure are private. A running CubeHash computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[32]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 state[32]; |
||||
#endif |
||||
} sph_cubehash_context; |
||||
|
||||
/**
|
||||
* Type for a CubeHash-224 context (identical to the common context). |
||||
*/ |
||||
typedef sph_cubehash_context sph_cubehash224_context; |
||||
|
||||
/**
|
||||
* Type for a CubeHash-256 context (identical to the common context). |
||||
*/ |
||||
typedef sph_cubehash_context sph_cubehash256_context; |
||||
|
||||
/**
|
||||
* Type for a CubeHash-384 context (identical to the common context). |
||||
*/ |
||||
typedef sph_cubehash_context sph_cubehash384_context; |
||||
|
||||
/**
|
||||
* Type for a CubeHash-512 context (identical to the common context). |
||||
*/ |
||||
typedef sph_cubehash_context sph_cubehash512_context; |
||||
|
||||
/**
|
||||
* Initialize a CubeHash-224 context. This process performs no memory |
||||
* allocation. |
||||
* |
||||
* @param cc the CubeHash-224 context (pointer to a |
||||
* <code>sph_cubehash224_context</code>) |
||||
*/ |
||||
void sph_cubehash224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the CubeHash-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_cubehash224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the CubeHash-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the CubeHash-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a CubeHash-256 context. This process performs no memory |
||||
* allocation. |
||||
* |
||||
* @param cc the CubeHash-256 context (pointer to a |
||||
* <code>sph_cubehash256_context</code>) |
||||
*/ |
||||
void sph_cubehash256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the CubeHash-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_cubehash256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the CubeHash-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the CubeHash-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a CubeHash-384 context. This process performs no memory |
||||
* allocation. |
||||
* |
||||
* @param cc the CubeHash-384 context (pointer to a |
||||
* <code>sph_cubehash384_context</code>) |
||||
*/ |
||||
void sph_cubehash384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the CubeHash-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_cubehash384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the CubeHash-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the CubeHash-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a CubeHash-512 context. This process performs no memory |
||||
* allocation. |
||||
* |
||||
* @param cc the CubeHash-512 context (pointer to a |
||||
* <code>sph_cubehash512_context</code>) |
||||
*/ |
||||
void sph_cubehash512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the CubeHash-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_cubehash512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the CubeHash-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the CubeHash-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_cubehash512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,320 @@
@@ -0,0 +1,320 @@
|
||||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */ |
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by |
||||
* their output size; this implementation defines ECHO for output |
||||
* sizes 224, 256, 384 and 512 bits. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_echo.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_ECHO_H__ |
||||
#define SPH_ECHO_H__ |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224. |
||||
*/ |
||||
#define SPH_SIZE_echo224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256. |
||||
*/ |
||||
#define SPH_SIZE_echo256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384. |
||||
*/ |
||||
#define SPH_SIZE_echo384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512. |
||||
*/ |
||||
#define SPH_SIZE_echo512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* an ECHO computation has been performed, the context can be reused for |
||||
* another computation. This specific structure is used for ECHO-224 |
||||
* and ECHO-256. |
||||
* |
||||
* The contents of this structure are private. A running ECHO computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[192]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
union { |
||||
sph_u32 Vs[4][4]; |
||||
#if SPH_64 |
||||
sph_u64 Vb[4][2]; |
||||
#endif |
||||
} u; |
||||
sph_u32 C0, C1, C2, C3; |
||||
#endif |
||||
} sph_echo_small_context; |
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* an ECHO computation has been performed, the context can be reused for |
||||
* another computation. This specific structure is used for ECHO-384 |
||||
* and ECHO-512. |
||||
* |
||||
* The contents of this structure are private. A running ECHO computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[128]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
union { |
||||
sph_u32 Vs[8][4]; |
||||
#if SPH_64 |
||||
sph_u64 Vb[8][2]; |
||||
#endif |
||||
} u; |
||||
sph_u32 C0, C1, C2, C3; |
||||
#endif |
||||
} sph_echo_big_context; |
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context). |
||||
*/ |
||||
typedef sph_echo_small_context sph_echo224_context; |
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context). |
||||
*/ |
||||
typedef sph_echo_small_context sph_echo256_context; |
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_echo_big_context sph_echo384_context; |
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_echo_big_context sph_echo512_context; |
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the ECHO-224 context (pointer to a |
||||
* <code>sph_echo224_context</code>) |
||||
*/ |
||||
void sph_echo224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the ECHO-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_echo224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the ECHO-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the ECHO-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the ECHO-256 context (pointer to a |
||||
* <code>sph_echo256_context</code>) |
||||
*/ |
||||
void sph_echo256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the ECHO-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_echo256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the ECHO-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the ECHO-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the ECHO-384 context (pointer to a |
||||
* <code>sph_echo384_context</code>) |
||||
*/ |
||||
void sph_echo384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the ECHO-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_echo384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the ECHO-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the ECHO-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the ECHO-512 context (pointer to a |
||||
* <code>sph_echo512_context</code>) |
||||
*/ |
||||
void sph_echo512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the ECHO-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_echo512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the ECHO-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the ECHO-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_echo512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,290 @@
@@ -0,0 +1,290 @@
|
||||
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */ |
||||
/**
|
||||
* JH interface. JH is a family of functions which differ by |
||||
* their output size; this implementation defines JH for output |
||||
* sizes 224, 256, 384 and 512 bits. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_jh.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_JH_H__ |
||||
#define SPH_JH_H__ |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-224. |
||||
*/ |
||||
#define SPH_SIZE_jh224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-256. |
||||
*/ |
||||
#define SPH_SIZE_jh256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-384. |
||||
*/ |
||||
#define SPH_SIZE_jh384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-512. |
||||
*/ |
||||
#define SPH_SIZE_jh512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for JH computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* a JH computation has been performed, the context can be reused for |
||||
* another computation. |
||||
* |
||||
* The contents of this structure are private. A running JH computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
union { |
||||
#if SPH_64 |
||||
sph_u64 wide[16]; |
||||
#endif |
||||
sph_u32 narrow[32]; |
||||
} H; |
||||
#if SPH_64 |
||||
sph_u64 block_count; |
||||
#else |
||||
sph_u32 block_count_high, block_count_low; |
||||
#endif |
||||
#endif |
||||
} sph_jh_context; |
||||
|
||||
/**
|
||||
* Type for a JH-224 context (identical to the common context). |
||||
*/ |
||||
typedef sph_jh_context sph_jh224_context; |
||||
|
||||
/**
|
||||
* Type for a JH-256 context (identical to the common context). |
||||
*/ |
||||
typedef sph_jh_context sph_jh256_context; |
||||
|
||||
/**
|
||||
* Type for a JH-384 context (identical to the common context). |
||||
*/ |
||||
typedef sph_jh_context sph_jh384_context; |
||||
|
||||
/**
|
||||
* Type for a JH-512 context (identical to the common context). |
||||
*/ |
||||
typedef sph_jh_context sph_jh512_context; |
||||
|
||||
/**
|
||||
* Initialize a JH-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the JH-224 context (pointer to a |
||||
* <code>sph_jh224_context</code>) |
||||
*/ |
||||
void sph_jh224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the JH-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_jh224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current JH-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the JH-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the JH-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a JH-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the JH-256 context (pointer to a |
||||
* <code>sph_jh256_context</code>) |
||||
*/ |
||||
void sph_jh256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the JH-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_jh256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current JH-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the JH-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the JH-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a JH-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the JH-384 context (pointer to a |
||||
* <code>sph_jh384_context</code>) |
||||
*/ |
||||
void sph_jh384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the JH-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_jh384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current JH-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the JH-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the JH-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a JH-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the JH-512 context (pointer to a |
||||
* <code>sph_jh512_context</code>) |
||||
*/ |
||||
void sph_jh512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the JH-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_jh512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current JH-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the JH-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the JH-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_jh512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#endif |
@ -0,0 +1,296 @@
@@ -0,0 +1,296 @@
|
||||
/* $Id: sph_luffa.h 154 2010-04-26 17:00:24Z tp $ */ |
||||
/**
|
||||
* Luffa interface. Luffa is a family of functions which differ by |
||||
* their output size; this implementation defines Luffa for output |
||||
* sizes 224, 256, 384 and 512 bits. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_luffa.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_LUFFA_H__ |
||||
#define SPH_LUFFA_H__ |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-224. |
||||
*/ |
||||
#define SPH_SIZE_luffa224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-256. |
||||
*/ |
||||
#define SPH_SIZE_luffa256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-384. |
||||
*/ |
||||
#define SPH_SIZE_luffa384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-512. |
||||
*/ |
||||
#define SPH_SIZE_luffa512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-224 computations: it contains |
||||
* the intermediate values and some data from the last entered block. |
||||
* Once a Luffa computation has been performed, the context can be |
||||
* reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running Luffa |
||||
* computation can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[32]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 V[3][8]; |
||||
#endif |
||||
} sph_luffa224_context; |
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-256 computations. It is |
||||
* identical to <code>sph_luffa224_context</code>. |
||||
*/ |
||||
typedef sph_luffa224_context sph_luffa256_context; |
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-384 computations. |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[32]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 V[4][8]; |
||||
#endif |
||||
} sph_luffa384_context; |
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-512 computations. |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[32]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 V[5][8]; |
||||
#endif |
||||
} sph_luffa512_context; |
||||
|
||||
/**
|
||||
* Initialize a Luffa-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Luffa-224 context (pointer to a |
||||
* <code>sph_luffa224_context</code>) |
||||
*/ |
||||
void sph_luffa224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Luffa-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_luffa224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Luffa-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Luffa-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Luffa-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Luffa-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Luffa-256 context (pointer to a |
||||
* <code>sph_luffa256_context</code>) |
||||
*/ |
||||
void sph_luffa256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Luffa-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_luffa256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Luffa-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Luffa-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Luffa-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Luffa-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Luffa-384 context (pointer to a |
||||
* <code>sph_luffa384_context</code>) |
||||
*/ |
||||
void sph_luffa384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Luffa-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_luffa384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Luffa-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Luffa-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Luffa-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Luffa-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Luffa-512 context (pointer to a |
||||
* <code>sph_luffa512_context</code>) |
||||
*/ |
||||
void sph_luffa512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Luffa-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_luffa512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Luffa-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Luffa-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Luffa-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_luffa512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,314 @@
@@ -0,0 +1,314 @@
|
||||
/* $Id: sph_shavite.h 208 2010-06-02 20:33:00Z tp $ */ |
||||
/**
|
||||
* SHAvite-3 interface. This code implements SHAvite-3 with the |
||||
* recommended parameters for SHA-3, with outputs of 224, 256, 384 and |
||||
* 512 bits. In the following, we call the function "SHAvite" (without |
||||
* the "-3" suffix), thus "SHAvite-224" is "SHAvite-3 with a 224-bit |
||||
* output". |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_shavite.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_SHAVITE_H__ |
||||
#define SPH_SHAVITE_H__ |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-224. |
||||
*/ |
||||
#define SPH_SIZE_shavite224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-256. |
||||
*/ |
||||
#define SPH_SIZE_shavite256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-384. |
||||
*/ |
||||
#define SPH_SIZE_shavite384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-512. |
||||
*/ |
||||
#define SPH_SIZE_shavite512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 and SHAvite-256 computations: |
||||
* it contains the intermediate values and some data from the last |
||||
* entered block. Once a SHAvite computation has been performed, the |
||||
* context can be reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running SHAvite |
||||
* computation can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 h[8]; |
||||
sph_u32 count0, count1; |
||||
#endif |
||||
} sph_shavite_small_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 computations. It is |
||||
* identical to the common <code>sph_shavite_small_context</code>. |
||||
*/ |
||||
typedef sph_shavite_small_context sph_shavite224_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-256 computations. It is |
||||
* identical to the common <code>sph_shavite_small_context</code>. |
||||
*/ |
||||
typedef sph_shavite_small_context sph_shavite256_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 and SHAvite-512 computations: |
||||
* it contains the intermediate values and some data from the last |
||||
* entered block. Once a SHAvite computation has been performed, the |
||||
* context can be reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running SHAvite |
||||
* computation can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[128]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 h[16]; |
||||
sph_u32 count0, count1, count2, count3; |
||||
#endif |
||||
} sph_shavite_big_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 computations. It is |
||||
* identical to the common <code>sph_shavite_small_context</code>. |
||||
*/ |
||||
typedef sph_shavite_big_context sph_shavite384_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-512 computations. It is |
||||
* identical to the common <code>sph_shavite_small_context</code>. |
||||
*/ |
||||
typedef sph_shavite_big_context sph_shavite512_context; |
||||
|
||||
/**
|
||||
* Initialize a SHAvite-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SHAvite-224 context (pointer to a |
||||
* <code>sph_shavite224_context</code>) |
||||
*/ |
||||
void sph_shavite224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SHAvite-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_shavite224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SHAvite-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SHAvite-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a SHAvite-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SHAvite-256 context (pointer to a |
||||
* <code>sph_shavite256_context</code>) |
||||
*/ |
||||
void sph_shavite256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SHAvite-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_shavite256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SHAvite-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SHAvite-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a SHAvite-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SHAvite-384 context (pointer to a |
||||
* <code>sph_shavite384_context</code>) |
||||
*/ |
||||
void sph_shavite384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SHAvite-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_shavite384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SHAvite-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SHAvite-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a SHAvite-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SHAvite-512 context (pointer to a |
||||
* <code>sph_shavite512_context</code>) |
||||
*/ |
||||
void sph_shavite512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SHAvite-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_shavite512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SHAvite-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SHAvite-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_shavite512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,309 @@
@@ -0,0 +1,309 @@
|
||||
/* $Id: sph_simd.h 154 2010-04-26 17:00:24Z tp $ */ |
||||
/**
|
||||
* SIMD interface. SIMD is a family of functions which differ by |
||||
* their output size; this implementation defines SIMD for output |
||||
* sizes 224, 256, 384 and 512 bits. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_simd.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_SIMD_H__ |
||||
#define SPH_SIMD_H__ |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C"{ |
||||
#endif |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-224. |
||||
*/ |
||||
#define SPH_SIZE_simd224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-256. |
||||
*/ |
||||
#define SPH_SIZE_simd256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-384. |
||||
*/ |
||||
#define SPH_SIZE_simd384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-512. |
||||
*/ |
||||
#define SPH_SIZE_simd512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* an SIMD computation has been performed, the context can be reused for |
||||
* another computation. This specific structure is used for SIMD-224 |
||||
* and SIMD-256. |
||||
* |
||||
* The contents of this structure are private. A running SIMD computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 state[16]; |
||||
sph_u32 count_low, count_high; |
||||
#endif |
||||
} sph_simd_small_context; |
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* an SIMD computation has been performed, the context can be reused for |
||||
* another computation. This specific structure is used for SIMD-384 |
||||
* and SIMD-512. |
||||
* |
||||
* The contents of this structure are private. A running SIMD computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[128]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u32 state[32]; |
||||
sph_u32 count_low, count_high; |
||||
#endif |
||||
} sph_simd_big_context; |
||||
|
||||
/**
|
||||
* Type for a SIMD-224 context (identical to the common "small" context). |
||||
*/ |
||||
typedef sph_simd_small_context sph_simd224_context; |
||||
|
||||
/**
|
||||
* Type for a SIMD-256 context (identical to the common "small" context). |
||||
*/ |
||||
typedef sph_simd_small_context sph_simd256_context; |
||||
|
||||
/**
|
||||
* Type for a SIMD-384 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_simd_big_context sph_simd384_context; |
||||
|
||||
/**
|
||||
* Type for a SIMD-512 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_simd_big_context sph_simd512_context; |
||||
|
||||
/**
|
||||
* Initialize an SIMD-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SIMD-224 context (pointer to a |
||||
* <code>sph_simd224_context</code>) |
||||
*/ |
||||
void sph_simd224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SIMD-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_simd224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SIMD-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SIMD-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SIMD-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an SIMD-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SIMD-256 context (pointer to a |
||||
* <code>sph_simd256_context</code>) |
||||
*/ |
||||
void sph_simd256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SIMD-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_simd256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SIMD-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SIMD-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SIMD-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an SIMD-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SIMD-384 context (pointer to a |
||||
* <code>sph_simd384_context</code>) |
||||
*/ |
||||
void sph_simd384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SIMD-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_simd384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SIMD-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SIMD-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SIMD-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize an SIMD-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the SIMD-512 context (pointer to a |
||||
* <code>sph_simd512_context</code>) |
||||
*/ |
||||
void sph_simd512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the SIMD-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_simd512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current SIMD-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the SIMD-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the SIMD-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_simd512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,290 @@
@@ -0,0 +1,290 @@
|
||||
/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */ |
||||
/**
|
||||
* Skein interface. The Skein specification defines three main |
||||
* functions, called Skein-256, Skein-512 and Skein-1024, which can be |
||||
* further parameterized with an output length. For the SHA-3 |
||||
* competition, Skein-512 is used for output sizes of 224, 256, 384 and |
||||
* 512 bits; this is what this code implements. Thus, we hereafter call |
||||
* Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein |
||||
* specification defines as Skein-512-224, Skein-512-256, Skein-512-384 |
||||
* and Skein-512-512, respectively. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_skein.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_SKEIN_H__ |
||||
#define SPH_SKEIN_H__ |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
#if SPH_64 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-224. |
||||
*/ |
||||
#define SPH_SIZE_skein224 224 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-256. |
||||
*/ |
||||
#define SPH_SIZE_skein256 256 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-384. |
||||
*/ |
||||
#define SPH_SIZE_skein384 384 |
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-512. |
||||
*/ |
||||
#define SPH_SIZE_skein512 512 |
||||
|
||||
/**
|
||||
* This structure is a context for Skein computations (with a 384- or |
||||
* 512-bit output): it contains the intermediate values and some data |
||||
* from the last entered block. Once a Skein computation has been |
||||
* performed, the context can be reused for another computation. |
||||
* |
||||
* The contents of this structure are private. A running Skein computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
size_t ptr; |
||||
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; |
||||
sph_u64 bcount; |
||||
#endif |
||||
} sph_skein_big_context; |
||||
|
||||
/**
|
||||
* Type for a Skein-224 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_skein_big_context sph_skein224_context; |
||||
|
||||
/**
|
||||
* Type for a Skein-256 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_skein_big_context sph_skein256_context; |
||||
|
||||
/**
|
||||
* Type for a Skein-384 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_skein_big_context sph_skein384_context; |
||||
|
||||
/**
|
||||
* Type for a Skein-512 context (identical to the common "big" context). |
||||
*/ |
||||
typedef sph_skein_big_context sph_skein512_context; |
||||
|
||||
/**
|
||||
* Initialize a Skein-224 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Skein-224 context (pointer to a |
||||
* <code>sph_skein224_context</code>) |
||||
*/ |
||||
void sph_skein224_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Skein-224 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_skein224(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Skein-224 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (28 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Skein-224 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein224_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Skein-224 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein224_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Skein-256 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Skein-256 context (pointer to a |
||||
* <code>sph_skein256_context</code>) |
||||
*/ |
||||
void sph_skein256_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Skein-256 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_skein256(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Skein-256 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (32 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Skein-256 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein256_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Skein-256 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein256_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Skein-384 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Skein-384 context (pointer to a |
||||
* <code>sph_skein384_context</code>) |
||||
*/ |
||||
void sph_skein384_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Skein-384 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_skein384(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Skein-384 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (48 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Skein-384 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein384_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Skein-384 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein384_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
/**
|
||||
* Initialize a Skein-512 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the Skein-512 context (pointer to a |
||||
* <code>sph_skein512_context</code>) |
||||
*/ |
||||
void sph_skein512_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the Skein-512 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_skein512(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current Skein-512 computation and output the result into |
||||
* the provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (64 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the Skein-512 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein512_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then |
||||
* terminate it and output the result in the provided buffer, which must |
||||
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||
* level). The context is automatically reinitialized. |
||||
* |
||||
* @param cc the Skein-512 context |
||||
* @param ub the extra bits |
||||
* @param n the number of extra bits (0 to 7) |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_skein512_addbits_and_close( |
||||
void *cc, unsigned ub, unsigned n, void *dst); |
||||
|
||||
#endif |
||||
|
||||
#endif |
Loading…
Reference in new issue