Browse Source
Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>master
Tanguy Pruvot
10 years ago
32 changed files with 8179 additions and 436 deletions
@ -0,0 +1,810 @@ |
|||||||
|
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ |
||||||
|
/*
|
||||||
|
* Shabal implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "sph_shabal.h" |
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C"{ |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef _MSC_VER |
||||||
|
#pragma warning (disable: 4146) |
||||||
|
#endif |
||||||
|
|
||||||
|
/*
|
||||||
|
* Part of this code was automatically generated (the part between |
||||||
|
* the "BEGIN" and "END" markers). |
||||||
|
*/ |
||||||
|
|
||||||
|
#define sM 16 |
||||||
|
|
||||||
|
#define C32 SPH_C32 |
||||||
|
#define T32 SPH_T32 |
||||||
|
|
||||||
|
#define O1 13 |
||||||
|
#define O2 9 |
||||||
|
#define O3 6 |
||||||
|
|
||||||
|
/*
|
||||||
|
* We copy the state into local variables, so that the compiler knows |
||||||
|
* that it can optimize them at will. |
||||||
|
*/ |
||||||
|
|
||||||
|
/* BEGIN -- automatically generated code. */ |
||||||
|
|
||||||
|
#define DECL_STATE \ |
||||||
|
sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \ |
||||||
|
A08, A09, A0A, A0B; \ |
||||||
|
sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \ |
||||||
|
B8, B9, BA, BB, BC, BD, BE, BF; \ |
||||||
|
sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \ |
||||||
|
C8, C9, CA, CB, CC, CD, CE, CF; \ |
||||||
|
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \ |
||||||
|
M8, M9, MA, MB, MC, MD, ME, MF; \ |
||||||
|
sph_u32 Wlow, Whigh; |
||||||
|
|
||||||
|
#define READ_STATE(state) do { \ |
||||||
|
A00 = (state)->A[0]; \ |
||||||
|
A01 = (state)->A[1]; \ |
||||||
|
A02 = (state)->A[2]; \ |
||||||
|
A03 = (state)->A[3]; \ |
||||||
|
A04 = (state)->A[4]; \ |
||||||
|
A05 = (state)->A[5]; \ |
||||||
|
A06 = (state)->A[6]; \ |
||||||
|
A07 = (state)->A[7]; \ |
||||||
|
A08 = (state)->A[8]; \ |
||||||
|
A09 = (state)->A[9]; \ |
||||||
|
A0A = (state)->A[10]; \ |
||||||
|
A0B = (state)->A[11]; \ |
||||||
|
B0 = (state)->B[0]; \ |
||||||
|
B1 = (state)->B[1]; \ |
||||||
|
B2 = (state)->B[2]; \ |
||||||
|
B3 = (state)->B[3]; \ |
||||||
|
B4 = (state)->B[4]; \ |
||||||
|
B5 = (state)->B[5]; \ |
||||||
|
B6 = (state)->B[6]; \ |
||||||
|
B7 = (state)->B[7]; \ |
||||||
|
B8 = (state)->B[8]; \ |
||||||
|
B9 = (state)->B[9]; \ |
||||||
|
BA = (state)->B[10]; \ |
||||||
|
BB = (state)->B[11]; \ |
||||||
|
BC = (state)->B[12]; \ |
||||||
|
BD = (state)->B[13]; \ |
||||||
|
BE = (state)->B[14]; \ |
||||||
|
BF = (state)->B[15]; \ |
||||||
|
C0 = (state)->C[0]; \ |
||||||
|
C1 = (state)->C[1]; \ |
||||||
|
C2 = (state)->C[2]; \ |
||||||
|
C3 = (state)->C[3]; \ |
||||||
|
C4 = (state)->C[4]; \ |
||||||
|
C5 = (state)->C[5]; \ |
||||||
|
C6 = (state)->C[6]; \ |
||||||
|
C7 = (state)->C[7]; \ |
||||||
|
C8 = (state)->C[8]; \ |
||||||
|
C9 = (state)->C[9]; \ |
||||||
|
CA = (state)->C[10]; \ |
||||||
|
CB = (state)->C[11]; \ |
||||||
|
CC = (state)->C[12]; \ |
||||||
|
CD = (state)->C[13]; \ |
||||||
|
CE = (state)->C[14]; \ |
||||||
|
CF = (state)->C[15]; \ |
||||||
|
Wlow = (state)->Wlow; \ |
||||||
|
Whigh = (state)->Whigh; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define WRITE_STATE(state) do { \ |
||||||
|
(state)->A[0] = A00; \ |
||||||
|
(state)->A[1] = A01; \ |
||||||
|
(state)->A[2] = A02; \ |
||||||
|
(state)->A[3] = A03; \ |
||||||
|
(state)->A[4] = A04; \ |
||||||
|
(state)->A[5] = A05; \ |
||||||
|
(state)->A[6] = A06; \ |
||||||
|
(state)->A[7] = A07; \ |
||||||
|
(state)->A[8] = A08; \ |
||||||
|
(state)->A[9] = A09; \ |
||||||
|
(state)->A[10] = A0A; \ |
||||||
|
(state)->A[11] = A0B; \ |
||||||
|
(state)->B[0] = B0; \ |
||||||
|
(state)->B[1] = B1; \ |
||||||
|
(state)->B[2] = B2; \ |
||||||
|
(state)->B[3] = B3; \ |
||||||
|
(state)->B[4] = B4; \ |
||||||
|
(state)->B[5] = B5; \ |
||||||
|
(state)->B[6] = B6; \ |
||||||
|
(state)->B[7] = B7; \ |
||||||
|
(state)->B[8] = B8; \ |
||||||
|
(state)->B[9] = B9; \ |
||||||
|
(state)->B[10] = BA; \ |
||||||
|
(state)->B[11] = BB; \ |
||||||
|
(state)->B[12] = BC; \ |
||||||
|
(state)->B[13] = BD; \ |
||||||
|
(state)->B[14] = BE; \ |
||||||
|
(state)->B[15] = BF; \ |
||||||
|
(state)->C[0] = C0; \ |
||||||
|
(state)->C[1] = C1; \ |
||||||
|
(state)->C[2] = C2; \ |
||||||
|
(state)->C[3] = C3; \ |
||||||
|
(state)->C[4] = C4; \ |
||||||
|
(state)->C[5] = C5; \ |
||||||
|
(state)->C[6] = C6; \ |
||||||
|
(state)->C[7] = C7; \ |
||||||
|
(state)->C[8] = C8; \ |
||||||
|
(state)->C[9] = C9; \ |
||||||
|
(state)->C[10] = CA; \ |
||||||
|
(state)->C[11] = CB; \ |
||||||
|
(state)->C[12] = CC; \ |
||||||
|
(state)->C[13] = CD; \ |
||||||
|
(state)->C[14] = CE; \ |
||||||
|
(state)->C[15] = CF; \ |
||||||
|
(state)->Wlow = Wlow; \ |
||||||
|
(state)->Whigh = Whigh; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define DECODE_BLOCK do { \ |
||||||
|
M0 = sph_dec32le_aligned(buf + 0); \ |
||||||
|
M1 = sph_dec32le_aligned(buf + 4); \ |
||||||
|
M2 = sph_dec32le_aligned(buf + 8); \ |
||||||
|
M3 = sph_dec32le_aligned(buf + 12); \ |
||||||
|
M4 = sph_dec32le_aligned(buf + 16); \ |
||||||
|
M5 = sph_dec32le_aligned(buf + 20); \ |
||||||
|
M6 = sph_dec32le_aligned(buf + 24); \ |
||||||
|
M7 = sph_dec32le_aligned(buf + 28); \ |
||||||
|
M8 = sph_dec32le_aligned(buf + 32); \ |
||||||
|
M9 = sph_dec32le_aligned(buf + 36); \ |
||||||
|
MA = sph_dec32le_aligned(buf + 40); \ |
||||||
|
MB = sph_dec32le_aligned(buf + 44); \ |
||||||
|
MC = sph_dec32le_aligned(buf + 48); \ |
||||||
|
MD = sph_dec32le_aligned(buf + 52); \ |
||||||
|
ME = sph_dec32le_aligned(buf + 56); \ |
||||||
|
MF = sph_dec32le_aligned(buf + 60); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define INPUT_BLOCK_ADD do { \ |
||||||
|
B0 = T32(B0 + M0); \ |
||||||
|
B1 = T32(B1 + M1); \ |
||||||
|
B2 = T32(B2 + M2); \ |
||||||
|
B3 = T32(B3 + M3); \ |
||||||
|
B4 = T32(B4 + M4); \ |
||||||
|
B5 = T32(B5 + M5); \ |
||||||
|
B6 = T32(B6 + M6); \ |
||||||
|
B7 = T32(B7 + M7); \ |
||||||
|
B8 = T32(B8 + M8); \ |
||||||
|
B9 = T32(B9 + M9); \ |
||||||
|
BA = T32(BA + MA); \ |
||||||
|
BB = T32(BB + MB); \ |
||||||
|
BC = T32(BC + MC); \ |
||||||
|
BD = T32(BD + MD); \ |
||||||
|
BE = T32(BE + ME); \ |
||||||
|
BF = T32(BF + MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define INPUT_BLOCK_SUB do { \ |
||||||
|
C0 = T32(C0 - M0); \ |
||||||
|
C1 = T32(C1 - M1); \ |
||||||
|
C2 = T32(C2 - M2); \ |
||||||
|
C3 = T32(C3 - M3); \ |
||||||
|
C4 = T32(C4 - M4); \ |
||||||
|
C5 = T32(C5 - M5); \ |
||||||
|
C6 = T32(C6 - M6); \ |
||||||
|
C7 = T32(C7 - M7); \ |
||||||
|
C8 = T32(C8 - M8); \ |
||||||
|
C9 = T32(C9 - M9); \ |
||||||
|
CA = T32(CA - MA); \ |
||||||
|
CB = T32(CB - MB); \ |
||||||
|
CC = T32(CC - MC); \ |
||||||
|
CD = T32(CD - MD); \ |
||||||
|
CE = T32(CE - ME); \ |
||||||
|
CF = T32(CF - MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define XOR_W do { \ |
||||||
|
A00 ^= Wlow; \ |
||||||
|
A01 ^= Whigh; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define SWAP(v1, v2) do { \ |
||||||
|
sph_u32 tmp = (v1); \ |
||||||
|
(v1) = (v2); \ |
||||||
|
(v2) = tmp; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define SWAP_BC do { \ |
||||||
|
SWAP(B0, C0); \ |
||||||
|
SWAP(B1, C1); \ |
||||||
|
SWAP(B2, C2); \ |
||||||
|
SWAP(B3, C3); \ |
||||||
|
SWAP(B4, C4); \ |
||||||
|
SWAP(B5, C5); \ |
||||||
|
SWAP(B6, C6); \ |
||||||
|
SWAP(B7, C7); \ |
||||||
|
SWAP(B8, C8); \ |
||||||
|
SWAP(B9, C9); \ |
||||||
|
SWAP(BA, CA); \ |
||||||
|
SWAP(BB, CB); \ |
||||||
|
SWAP(BC, CC); \ |
||||||
|
SWAP(BD, CD); \ |
||||||
|
SWAP(BE, CE); \ |
||||||
|
SWAP(BF, CF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ |
||||||
|
xa0 = T32((xa0 \ |
||||||
|
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ |
||||||
|
^ xc) * 3U) \ |
||||||
|
^ xb1 ^ (xb2 & ~xb3) ^ xm; \ |
||||||
|
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_0 do { \ |
||||||
|
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_1 do { \ |
||||||
|
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_2 do { \ |
||||||
|
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define APPLY_P do { \ |
||||||
|
B0 = T32(B0 << 17) | (B0 >> 15); \ |
||||||
|
B1 = T32(B1 << 17) | (B1 >> 15); \ |
||||||
|
B2 = T32(B2 << 17) | (B2 >> 15); \ |
||||||
|
B3 = T32(B3 << 17) | (B3 >> 15); \ |
||||||
|
B4 = T32(B4 << 17) | (B4 >> 15); \ |
||||||
|
B5 = T32(B5 << 17) | (B5 >> 15); \ |
||||||
|
B6 = T32(B6 << 17) | (B6 >> 15); \ |
||||||
|
B7 = T32(B7 << 17) | (B7 >> 15); \ |
||||||
|
B8 = T32(B8 << 17) | (B8 >> 15); \ |
||||||
|
B9 = T32(B9 << 17) | (B9 >> 15); \ |
||||||
|
BA = T32(BA << 17) | (BA >> 15); \ |
||||||
|
BB = T32(BB << 17) | (BB >> 15); \ |
||||||
|
BC = T32(BC << 17) | (BC >> 15); \ |
||||||
|
BD = T32(BD << 17) | (BD >> 15); \ |
||||||
|
BE = T32(BE << 17) | (BE >> 15); \ |
||||||
|
BF = T32(BF << 17) | (BF >> 15); \ |
||||||
|
PERM_STEP_0; \ |
||||||
|
PERM_STEP_1; \ |
||||||
|
PERM_STEP_2; \ |
||||||
|
A0B = T32(A0B + C6); \ |
||||||
|
A0A = T32(A0A + C5); \ |
||||||
|
A09 = T32(A09 + C4); \ |
||||||
|
A08 = T32(A08 + C3); \ |
||||||
|
A07 = T32(A07 + C2); \ |
||||||
|
A06 = T32(A06 + C1); \ |
||||||
|
A05 = T32(A05 + C0); \ |
||||||
|
A04 = T32(A04 + CF); \ |
||||||
|
A03 = T32(A03 + CE); \ |
||||||
|
A02 = T32(A02 + CD); \ |
||||||
|
A01 = T32(A01 + CC); \ |
||||||
|
A00 = T32(A00 + CB); \ |
||||||
|
A0B = T32(A0B + CA); \ |
||||||
|
A0A = T32(A0A + C9); \ |
||||||
|
A09 = T32(A09 + C8); \ |
||||||
|
A08 = T32(A08 + C7); \ |
||||||
|
A07 = T32(A07 + C6); \ |
||||||
|
A06 = T32(A06 + C5); \ |
||||||
|
A05 = T32(A05 + C4); \ |
||||||
|
A04 = T32(A04 + C3); \ |
||||||
|
A03 = T32(A03 + C2); \ |
||||||
|
A02 = T32(A02 + C1); \ |
||||||
|
A01 = T32(A01 + C0); \ |
||||||
|
A00 = T32(A00 + CF); \ |
||||||
|
A0B = T32(A0B + CE); \ |
||||||
|
A0A = T32(A0A + CD); \ |
||||||
|
A09 = T32(A09 + CC); \ |
||||||
|
A08 = T32(A08 + CB); \ |
||||||
|
A07 = T32(A07 + CA); \ |
||||||
|
A06 = T32(A06 + C9); \ |
||||||
|
A05 = T32(A05 + C8); \ |
||||||
|
A04 = T32(A04 + C7); \ |
||||||
|
A03 = T32(A03 + C6); \ |
||||||
|
A02 = T32(A02 + C5); \ |
||||||
|
A01 = T32(A01 + C4); \ |
||||||
|
A00 = T32(A00 + C3); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define INCR_W do { \ |
||||||
|
if ((Wlow = T32(Wlow + 1)) == 0) \ |
||||||
|
Whigh = T32(Whigh + 1); \ |
||||||
|
} while (0) |
||||||
|
#if 0 |
||||||
|
static const sph_u32 A_init_192[] = { |
||||||
|
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), |
||||||
|
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), |
||||||
|
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 B_init_192[] = { |
||||||
|
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), |
||||||
|
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), |
||||||
|
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), |
||||||
|
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 C_init_192[] = { |
||||||
|
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), |
||||||
|
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), |
||||||
|
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), |
||||||
|
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 A_init_224[] = { |
||||||
|
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), |
||||||
|
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), |
||||||
|
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 B_init_224[] = { |
||||||
|
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), |
||||||
|
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), |
||||||
|
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), |
||||||
|
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 C_init_224[] = { |
||||||
|
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), |
||||||
|
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), |
||||||
|
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), |
||||||
|
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 A_init_256[] = { |
||||||
|
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), |
||||||
|
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), |
||||||
|
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 B_init_256[] = { |
||||||
|
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), |
||||||
|
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), |
||||||
|
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), |
||||||
|
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 C_init_256[] = { |
||||||
|
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), |
||||||
|
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), |
||||||
|
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), |
||||||
|
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 A_init_384[] = { |
||||||
|
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), |
||||||
|
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), |
||||||
|
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 B_init_384[] = { |
||||||
|
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), |
||||||
|
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), |
||||||
|
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), |
||||||
|
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 C_init_384[] = { |
||||||
|
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), |
||||||
|
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), |
||||||
|
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), |
||||||
|
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) |
||||||
|
}; |
||||||
|
#endif |
||||||
|
static const sph_u32 A_init_512[] = { |
||||||
|
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), |
||||||
|
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), |
||||||
|
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 B_init_512[] = { |
||||||
|
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), |
||||||
|
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), |
||||||
|
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), |
||||||
|
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) |
||||||
|
}; |
||||||
|
|
||||||
|
static const sph_u32 C_init_512[] = { |
||||||
|
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), |
||||||
|
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), |
||||||
|
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), |
||||||
|
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) |
||||||
|
}; |
||||||
|
|
||||||
|
/* END -- automatically generated code. */ |
||||||
|
|
||||||
|
static void |
||||||
|
shabal_init(void *cc, unsigned size) |
||||||
|
{ |
||||||
|
/*
|
||||||
|
* We have precomputed initial states for all the supported |
||||||
|
* output bit lengths. |
||||||
|
*/ |
||||||
|
const sph_u32 *A_init, *B_init, *C_init; |
||||||
|
sph_shabal_context *sc; |
||||||
|
|
||||||
|
switch (size) { |
||||||
|
#if 0 |
||||||
|
case 192: |
||||||
|
A_init = A_init_192; |
||||||
|
B_init = B_init_192; |
||||||
|
C_init = C_init_192; |
||||||
|
break; |
||||||
|
case 224: |
||||||
|
A_init = A_init_224; |
||||||
|
B_init = B_init_224; |
||||||
|
C_init = C_init_224; |
||||||
|
break; |
||||||
|
case 256: |
||||||
|
A_init = A_init_256; |
||||||
|
B_init = B_init_256; |
||||||
|
C_init = C_init_256; |
||||||
|
break; |
||||||
|
case 384: |
||||||
|
A_init = A_init_384; |
||||||
|
B_init = B_init_384; |
||||||
|
C_init = C_init_384; |
||||||
|
break; |
||||||
|
#endif |
||||||
|
case 512: |
||||||
|
A_init = A_init_512; |
||||||
|
B_init = B_init_512; |
||||||
|
C_init = C_init_512; |
||||||
|
break; |
||||||
|
default: |
||||||
|
return; |
||||||
|
} |
||||||
|
sc = cc; |
||||||
|
memcpy(sc->A, A_init, sizeof sc->A); |
||||||
|
memcpy(sc->B, B_init, sizeof sc->B); |
||||||
|
memcpy(sc->C, C_init, sizeof sc->C); |
||||||
|
sc->Wlow = 1; |
||||||
|
sc->Whigh = 0; |
||||||
|
sc->ptr = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
shabal_core(void *cc, const unsigned char *data, size_t len) |
||||||
|
{ |
||||||
|
sph_shabal_context *sc; |
||||||
|
unsigned char *buf; |
||||||
|
size_t ptr; |
||||||
|
DECL_STATE |
||||||
|
|
||||||
|
sc = cc; |
||||||
|
buf = sc->buf; |
||||||
|
ptr = sc->ptr; |
||||||
|
|
||||||
|
/*
|
||||||
|
* We do not want to copy the state to local variables if the |
||||||
|
* amount of data is less than what is needed to complete the |
||||||
|
* current block. Note that it is anyway suboptimal to call |
||||||
|
* this method many times for small chunks of data. |
||||||
|
*/ |
||||||
|
if (len < (sizeof sc->buf) - ptr) { |
||||||
|
memcpy(buf + ptr, data, len); |
||||||
|
ptr += len; |
||||||
|
sc->ptr = ptr; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
READ_STATE(sc); |
||||||
|
while (len > 0) { |
||||||
|
size_t clen; |
||||||
|
|
||||||
|
clen = (sizeof sc->buf) - ptr; |
||||||
|
if (clen > len) |
||||||
|
clen = len; |
||||||
|
memcpy(buf + ptr, data, clen); |
||||||
|
ptr += clen; |
||||||
|
data += clen; |
||||||
|
len -= clen; |
||||||
|
if (ptr == sizeof sc->buf) { |
||||||
|
DECODE_BLOCK; |
||||||
|
INPUT_BLOCK_ADD; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
INPUT_BLOCK_SUB; |
||||||
|
SWAP_BC; |
||||||
|
INCR_W; |
||||||
|
ptr = 0; |
||||||
|
} |
||||||
|
} |
||||||
|
WRITE_STATE(sc); |
||||||
|
sc->ptr = ptr; |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words) |
||||||
|
{ |
||||||
|
sph_shabal_context *sc; |
||||||
|
unsigned char *buf; |
||||||
|
size_t ptr; |
||||||
|
int i; |
||||||
|
unsigned z; |
||||||
|
union { |
||||||
|
unsigned char tmp_out[64]; |
||||||
|
sph_u32 dummy; |
||||||
|
} u; |
||||||
|
size_t out_len; |
||||||
|
DECL_STATE |
||||||
|
|
||||||
|
sc = cc; |
||||||
|
buf = sc->buf; |
||||||
|
ptr = sc->ptr; |
||||||
|
z = 0x80 >> n; |
||||||
|
buf[ptr] = ((ub & -z) | z) & 0xFF; |
||||||
|
memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1)); |
||||||
|
READ_STATE(sc); |
||||||
|
DECODE_BLOCK; |
||||||
|
INPUT_BLOCK_ADD; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
for (i = 0; i < 3; i ++) { |
||||||
|
SWAP_BC; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* We just use our local variables; no need to go through |
||||||
|
* the state structure. In order to share some code, we |
||||||
|
* emit the relevant words into a temporary buffer, which |
||||||
|
* we finally copy into the destination array. |
||||||
|
*/ |
||||||
|
switch (size_words) { |
||||||
|
case 16: |
||||||
|
sph_enc32le_aligned(u.tmp_out + 0, B0); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 4, B1); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 8, B2); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 12, B3); |
||||||
|
/* fall through */ |
||||||
|
case 12: |
||||||
|
sph_enc32le_aligned(u.tmp_out + 16, B4); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 20, B5); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 24, B6); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 28, B7); |
||||||
|
/* fall through */ |
||||||
|
case 8: |
||||||
|
sph_enc32le_aligned(u.tmp_out + 32, B8); |
||||||
|
/* fall through */ |
||||||
|
case 7: |
||||||
|
sph_enc32le_aligned(u.tmp_out + 36, B9); |
||||||
|
/* fall through */ |
||||||
|
case 6: |
||||||
|
sph_enc32le_aligned(u.tmp_out + 40, BA); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 44, BB); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 48, BC); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 52, BD); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 56, BE); |
||||||
|
sph_enc32le_aligned(u.tmp_out + 60, BF); |
||||||
|
break; |
||||||
|
default: |
||||||
|
return; |
||||||
|
} |
||||||
|
out_len = size_words << 2; |
||||||
|
memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len); |
||||||
|
shabal_init(sc, size_words << 5); |
||||||
|
} |
||||||
|
|
||||||
|
#if 0 |
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal192_init(void *cc) |
||||||
|
{ |
||||||
|
shabal_init(cc, 192); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal192(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
shabal_core(cc, data, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal192_close(void *cc, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, 0, 0, dst, 6); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, ub, n, dst, 6); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal224_init(void *cc) |
||||||
|
{ |
||||||
|
shabal_init(cc, 224); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal224(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
shabal_core(cc, data, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal224_close(void *cc, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, 0, 0, dst, 7); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, ub, n, dst, 7); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal256_init(void *cc) |
||||||
|
{ |
||||||
|
shabal_init(cc, 256); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal256(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
shabal_core(cc, data, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal256_close(void *cc, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, 0, 0, dst, 8); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, ub, n, dst, 8); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal384_init(void *cc) |
||||||
|
{ |
||||||
|
shabal_init(cc, 384); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal384(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
shabal_core(cc, data, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal384_close(void *cc, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, 0, 0, dst, 12); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, ub, n, dst, 12); |
||||||
|
} |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal512_init(void *cc) |
||||||
|
{ |
||||||
|
shabal_init(cc, 512); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal512(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
shabal_core(cc, data, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal512_close(void *cc, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, 0, 0, dst, 16); |
||||||
|
} |
||||||
|
|
||||||
|
/* see sph_shabal.h */ |
||||||
|
void |
||||||
|
sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||||
|
{ |
||||||
|
shabal_close(cc, ub, n, dst, 16); |
||||||
|
} |
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
@ -0,0 +1,344 @@ |
|||||||
|
/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */ |
||||||
|
/**
|
||||||
|
* Shabal interface. Shabal is a family of functions which differ by |
||||||
|
* their output size; this implementation defines Shabal for output |
||||||
|
* sizes 192, 224, 256, 384 and 512 bits. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @file sph_shabal.h |
||||||
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef SPH_SHABAL_H__ |
||||||
|
#define SPH_SHABAL_H__ |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C"{ |
||||||
|
#endif |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include "sph_types.h" |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for Shabal-192. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_shabal192 192 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for Shabal-224. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_shabal224 224 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for Shabal-256. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_shabal256 256 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for Shabal-384. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_shabal384 384 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for Shabal-512. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_shabal512 512 |
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure is a context for Shabal computations: it contains the |
||||||
|
* intermediate values and some data from the last entered block. Once |
||||||
|
* a Shabal computation has been performed, the context can be reused for |
||||||
|
* another computation. |
||||||
|
* |
||||||
|
* The contents of this structure are private. A running Shabal computation |
||||||
|
* can be cloned by copying the context (e.g. with a simple |
||||||
|
* <code>memcpy()</code>). |
||||||
|
*/ |
||||||
|
typedef struct { |
||||||
|
#ifndef DOXYGEN_IGNORE |
||||||
|
unsigned char buf[64]; /* first field, for alignment */ |
||||||
|
size_t ptr; |
||||||
|
sph_u32 A[12], B[16], C[16]; |
||||||
|
sph_u32 Whigh, Wlow; |
||||||
|
#endif |
||||||
|
} sph_shabal_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for a Shabal-192 context (identical to the common context). |
||||||
|
*/ |
||||||
|
typedef sph_shabal_context sph_shabal192_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for a Shabal-224 context (identical to the common context). |
||||||
|
*/ |
||||||
|
typedef sph_shabal_context sph_shabal224_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for a Shabal-256 context (identical to the common context). |
||||||
|
*/ |
||||||
|
typedef sph_shabal_context sph_shabal256_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for a Shabal-384 context (identical to the common context). |
||||||
|
*/ |
||||||
|
typedef sph_shabal_context sph_shabal384_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for a Shabal-512 context (identical to the common context). |
||||||
|
*/ |
||||||
|
typedef sph_shabal_context sph_shabal512_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a Shabal-192 context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the Shabal-192 context (pointer to a |
||||||
|
* <code>sph_shabal192_context</code>) |
||||||
|
*/ |
||||||
|
void sph_shabal192_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). |
||||||
|
* |
||||||
|
* @param cc the Shabal-192 context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_shabal192(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current Shabal-192 computation and output the result into |
||||||
|
* the provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (24 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-192 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal192_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a few additional bits (0 to 7) to the current computation, then |
||||||
|
* terminate it and output the result in the provided buffer, which must |
||||||
|
* be wide enough to accomodate the result (24 bytes). If bit number i |
||||||
|
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||||
|
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||||
|
* level). The context is automatically reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-192 context |
||||||
|
* @param ub the extra bits |
||||||
|
* @param n the number of extra bits (0 to 7) |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal192_addbits_and_close( |
||||||
|
void *cc, unsigned ub, unsigned n, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a Shabal-224 context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the Shabal-224 context (pointer to a |
||||||
|
* <code>sph_shabal224_context</code>) |
||||||
|
*/ |
||||||
|
void sph_shabal224_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). |
||||||
|
* |
||||||
|
* @param cc the Shabal-224 context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_shabal224(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current Shabal-224 computation and output the result into |
||||||
|
* the provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (28 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-224 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal224_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a few additional bits (0 to 7) to the current computation, then |
||||||
|
* terminate it and output the result in the provided buffer, which must |
||||||
|
* be wide enough to accomodate the result (28 bytes). If bit number i |
||||||
|
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||||
|
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||||
|
* level). The context is automatically reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-224 context |
||||||
|
* @param ub the extra bits |
||||||
|
* @param n the number of extra bits (0 to 7) |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal224_addbits_and_close( |
||||||
|
void *cc, unsigned ub, unsigned n, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a Shabal-256 context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the Shabal-256 context (pointer to a |
||||||
|
* <code>sph_shabal256_context</code>) |
||||||
|
*/ |
||||||
|
void sph_shabal256_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). |
||||||
|
* |
||||||
|
* @param cc the Shabal-256 context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_shabal256(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current Shabal-256 computation and output the result into |
||||||
|
* the provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (32 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-256 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal256_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a few additional bits (0 to 7) to the current computation, then |
||||||
|
* terminate it and output the result in the provided buffer, which must |
||||||
|
* be wide enough to accomodate the result (32 bytes). If bit number i |
||||||
|
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||||
|
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||||
|
* level). The context is automatically reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-256 context |
||||||
|
* @param ub the extra bits |
||||||
|
* @param n the number of extra bits (0 to 7) |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal256_addbits_and_close( |
||||||
|
void *cc, unsigned ub, unsigned n, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a Shabal-384 context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the Shabal-384 context (pointer to a |
||||||
|
* <code>sph_shabal384_context</code>) |
||||||
|
*/ |
||||||
|
void sph_shabal384_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). |
||||||
|
* |
||||||
|
* @param cc the Shabal-384 context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_shabal384(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current Shabal-384 computation and output the result into |
||||||
|
* the provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (48 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-384 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal384_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a few additional bits (0 to 7) to the current computation, then |
||||||
|
* terminate it and output the result in the provided buffer, which must |
||||||
|
* be wide enough to accomodate the result (48 bytes). If bit number i |
||||||
|
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||||
|
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||||
|
* level). The context is automatically reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-384 context |
||||||
|
* @param ub the extra bits |
||||||
|
* @param n the number of extra bits (0 to 7) |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal384_addbits_and_close( |
||||||
|
void *cc, unsigned ub, unsigned n, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a Shabal-512 context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the Shabal-512 context (pointer to a |
||||||
|
* <code>sph_shabal512_context</code>) |
||||||
|
*/ |
||||||
|
void sph_shabal512_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). |
||||||
|
* |
||||||
|
* @param cc the Shabal-512 context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_shabal512(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current Shabal-512 computation and output the result into |
||||||
|
* the provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (64 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-512 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal512_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a few additional bits (0 to 7) to the current computation, then |
||||||
|
* terminate it and output the result in the provided buffer, which must |
||||||
|
* be wide enough to accomodate the result (64 bytes). If bit number i |
||||||
|
* in <code>ub</code> has value 2^i, then the extra bits are those |
||||||
|
* numbered 7 downto 8-n (this is the big-endian convention at the byte |
||||||
|
* level). The context is automatically reinitialized. |
||||||
|
* |
||||||
|
* @param cc the Shabal-512 context |
||||||
|
* @param ub the extra bits |
||||||
|
* @param n the number of extra bits (0 to 7) |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_shabal512_addbits_and_close( |
||||||
|
void *cc, unsigned ub, unsigned n, void *dst); |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,216 @@ |
|||||||
|
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */ |
||||||
|
/**
|
||||||
|
* WHIRLPOOL interface. |
||||||
|
* |
||||||
|
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original |
||||||
|
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1" |
||||||
|
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current |
||||||
|
* version, 2003, with a new diffusion matrix, also described as "plain |
||||||
|
* WHIRLPOOL"). All three variants are implemented here. |
||||||
|
* |
||||||
|
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L. |
||||||
|
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open |
||||||
|
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000. |
||||||
|
* |
||||||
|
* The current WHIRLPOOL specification and a reference implementation |
||||||
|
* can be found on the WHIRLPOOL web page: |
||||||
|
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @file sph_whirlpool.h |
||||||
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef SPH_WHIRLPOOL_H__ |
||||||
|
#define SPH_WHIRLPOOL_H__ |
||||||
|
|
||||||
|
#include <stddef.h> |
||||||
|
#include "sph_types.h" |
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C"{ |
||||||
|
#endif |
||||||
|
|
||||||
|
#if SPH_64 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for WHIRLPOOL. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_whirlpool 512 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for WHIRLPOOL-0. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_whirlpool0 512 |
||||||
|
|
||||||
|
/**
|
||||||
|
* Output size (in bits) for WHIRLPOOL-1. |
||||||
|
*/ |
||||||
|
#define SPH_SIZE_whirlpool1 512 |
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure is a context for WHIRLPOOL computations: it contains the |
||||||
|
* intermediate values and some data from the last entered block. Once |
||||||
|
* a WHIRLPOOL computation has been performed, the context can be reused for |
||||||
|
* another computation. |
||||||
|
* |
||||||
|
* The contents of this structure are private. A running WHIRLPOOL computation |
||||||
|
* can be cloned by copying the context (e.g. with a simple |
||||||
|
* <code>memcpy()</code>). |
||||||
|
*/ |
||||||
|
typedef struct { |
||||||
|
#ifndef DOXYGEN_IGNORE |
||||||
|
unsigned char buf[64]; /* first field, for alignment */ |
||||||
|
sph_u64 state[8]; |
||||||
|
#if SPH_64 |
||||||
|
sph_u64 count; |
||||||
|
#else |
||||||
|
sph_u32 count_high, count_low; |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
} sph_whirlpool_context; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize a WHIRLPOOL context. This process performs no memory allocation. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context (pointer to a |
||||||
|
* <code>sph_whirlpool_context</code>) |
||||||
|
*/ |
||||||
|
void sph_whirlpool_init(void *cc); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). This function applies the |
||||||
|
* plain WHIRLPOOL algorithm. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_whirlpool(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current WHIRLPOOL computation and output the result into the |
||||||
|
* provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (64 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_whirlpool_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL. |
||||||
|
*/ |
||||||
|
typedef sph_whirlpool_context sph_whirlpool0_context; |
||||||
|
|
||||||
|
#ifdef DOXYGEN_IGNORE |
||||||
|
/**
|
||||||
|
* Initialize a WHIRLPOOL-0 context. This function is identical to |
||||||
|
* <code>sph_whirlpool_init()</code>. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context (pointer to a |
||||||
|
* <code>sph_whirlpool0_context</code>) |
||||||
|
*/ |
||||||
|
void sph_whirlpool0_init(void *cc); |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef DOXYGEN_IGNORE |
||||||
|
#define sph_whirlpool0_init sph_whirlpool_init |
||||||
|
#endif |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). This function applies the |
||||||
|
* WHIRLPOOL-0 algorithm. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_whirlpool0(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current WHIRLPOOL-0 computation and output the result into the |
||||||
|
* provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (64 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL-0 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_whirlpool0_close(void *cc, void *dst); |
||||||
|
|
||||||
|
/**
|
||||||
|
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL. |
||||||
|
*/ |
||||||
|
typedef sph_whirlpool_context sph_whirlpool1_context; |
||||||
|
|
||||||
|
#ifdef DOXYGEN_IGNORE |
||||||
|
/**
|
||||||
|
* Initialize a WHIRLPOOL-1 context. This function is identical to |
||||||
|
* <code>sph_whirlpool_init()</code>. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context (pointer to a |
||||||
|
* <code>sph_whirlpool1_context</code>) |
||||||
|
*/ |
||||||
|
void sph_whirlpool1_init(void *cc); |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef DOXYGEN_IGNORE |
||||||
|
#define sph_whirlpool1_init sph_whirlpool_init |
||||||
|
#endif |
||||||
|
|
||||||
|
/**
|
||||||
|
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||||
|
* (in which case this function does nothing). This function applies the |
||||||
|
* WHIRLPOOL-1 algorithm. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL context |
||||||
|
* @param data the input data |
||||||
|
* @param len the input data length (in bytes) |
||||||
|
*/ |
||||||
|
void sph_whirlpool1(void *cc, const void *data, size_t len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the current WHIRLPOOL-1 computation and output the result into the |
||||||
|
* provided buffer. The destination buffer must be wide enough to |
||||||
|
* accomodate the result (64 bytes). The context is automatically |
||||||
|
* reinitialized. |
||||||
|
* |
||||||
|
* @param cc the WHIRLPOOL-1 context |
||||||
|
* @param dst the destination buffer |
||||||
|
*/ |
||||||
|
void sph_whirlpool1_close(void *cc, void *dst); |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,346 @@ |
|||||||
|
/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */ |
||||||
|
/*
|
||||||
|
* This file contains some functions which implement the external data |
||||||
|
* handling and padding for Merkle-Damgard hash functions which follow |
||||||
|
* the conventions set out by MD4 (little-endian) or SHA-1 (big-endian). |
||||||
|
* |
||||||
|
* API: this file is meant to be included, not compiled as a stand-alone |
||||||
|
* file. Some macros must be defined: |
||||||
|
* RFUN name for the round function |
||||||
|
* HASH "short name" for the hash function |
||||||
|
* BE32 defined for big-endian, 32-bit based (e.g. SHA-1) |
||||||
|
* LE32 defined for little-endian, 32-bit based (e.g. MD5) |
||||||
|
* BE64 defined for big-endian, 64-bit based (e.g. SHA-512) |
||||||
|
* LE64 defined for little-endian, 64-bit based (no example yet) |
||||||
|
* PW01 if defined, append 0x01 instead of 0x80 (for Tiger) |
||||||
|
* BLEN if defined, length of a message block (in bytes) |
||||||
|
* PLW1 if defined, length is defined on one 64-bit word only (for Tiger) |
||||||
|
* PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL) |
||||||
|
* SVAL if defined, reference to the context state information |
||||||
|
* |
||||||
|
* BLEN is used when a message block is not 16 (32-bit or 64-bit) words: |
||||||
|
* this is used for instance for Tiger, which works on 64-bit words but |
||||||
|
* uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are |
||||||
|
* ignored if 32-bit words are used; if 64-bit words are used and PLW1 is |
||||||
|
* set, then only one word (64 bits) will be used to encode the input |
||||||
|
* message length (in bits), otherwise two words will be used (as in |
||||||
|
* SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but |
||||||
|
* not PLW1), four 64-bit words will be used to encode the message length |
||||||
|
* (in bits). Note that regardless of those settings, only 64-bit message |
||||||
|
* lengths are supported (in bits): messages longer than 2 Exabytes will be |
||||||
|
* improperly hashed (this is unlikely to happen soon: 2 Exabytes is about |
||||||
|
* 2 millions Terabytes, which is huge). |
||||||
|
* |
||||||
|
* If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close() |
||||||
|
* function. This is used for Tiger2, which is identical to Tiger except |
||||||
|
* when it comes to the padding (Tiger2 uses the standard 0x80 byte instead |
||||||
|
* of the 0x01 from original Tiger). |
||||||
|
* |
||||||
|
* The RFUN function is invoked with two arguments, the first pointing to |
||||||
|
* aligned data (as a "const void *"), the second being state information |
||||||
|
* from the context structure. By default, this state information is the |
||||||
|
* "val" field from the context, and this field is assumed to be an array |
||||||
|
* of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64). |
||||||
|
* from the context structure. The "val" field can have any type, except |
||||||
|
* for the output encoding which assumes that it is an array of "sph_u32" |
||||||
|
* values. By defining NO_OUTPUT, this last step is deactivated; the |
||||||
|
* includer code is then responsible for writing out the hash result. When |
||||||
|
* NO_OUTPUT is defined, the third parameter to the "close()" function is |
||||||
|
* ignored. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifdef _MSC_VER |
||||||
|
#pragma warning (disable: 4146) |
||||||
|
#endif |
||||||
|
|
||||||
|
#undef SPH_XCAT |
||||||
|
#define SPH_XCAT(a, b) SPH_XCAT_(a, b) |
||||||
|
#undef SPH_XCAT_ |
||||||
|
#define SPH_XCAT_(a, b) a ## b |
||||||
|
|
||||||
|
#undef SPH_BLEN |
||||||
|
#undef SPH_WLEN |
||||||
|
#if defined BE64 || defined LE64 |
||||||
|
#define SPH_BLEN 128U |
||||||
|
#define SPH_WLEN 8U |
||||||
|
#else |
||||||
|
#define SPH_BLEN 64U |
||||||
|
#define SPH_WLEN 4U |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef BLEN |
||||||
|
#undef SPH_BLEN |
||||||
|
#define SPH_BLEN BLEN |
||||||
|
#endif |
||||||
|
|
||||||
|
#undef SPH_MAXPAD |
||||||
|
#if defined PLW1 |
||||||
|
#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN) |
||||||
|
#elif defined PLW4 |
||||||
|
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2)) |
||||||
|
#else |
||||||
|
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1)) |
||||||
|
#endif |
||||||
|
|
||||||
|
#undef SPH_VAL |
||||||
|
#undef SPH_NO_OUTPUT |
||||||
|
#ifdef SVAL |
||||||
|
#define SPH_VAL SVAL |
||||||
|
#define SPH_NO_OUTPUT 1 |
||||||
|
#else |
||||||
|
#define SPH_VAL sc->val |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef CLOSE_ONLY |
||||||
|
|
||||||
|
#ifdef SPH_UPTR |
||||||
|
static void |
||||||
|
SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len) |
||||||
|
#else |
||||||
|
void |
||||||
|
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) |
||||||
|
#endif |
||||||
|
{ |
||||||
|
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; |
||||||
|
unsigned current; |
||||||
|
|
||||||
|
sc = cc; |
||||||
|
#if SPH_64 |
||||||
|
current = (unsigned)sc->count & (SPH_BLEN - 1U); |
||||||
|
#else |
||||||
|
current = (unsigned)sc->count_low & (SPH_BLEN - 1U); |
||||||
|
#endif |
||||||
|
while (len > 0) { |
||||||
|
unsigned clen; |
||||||
|
#if !SPH_64 |
||||||
|
sph_u32 clow, clow2; |
||||||
|
#endif |
||||||
|
|
||||||
|
clen = SPH_BLEN - current; |
||||||
|
if (clen > len) |
||||||
|
clen = len; |
||||||
|
memcpy(sc->buf + current, data, clen); |
||||||
|
data = (const unsigned char *)data + clen; |
||||||
|
current += clen; |
||||||
|
len -= clen; |
||||||
|
if (current == SPH_BLEN) { |
||||||
|
RFUN(sc->buf, SPH_VAL); |
||||||
|
current = 0; |
||||||
|
} |
||||||
|
#if SPH_64 |
||||||
|
sc->count += clen; |
||||||
|
#else |
||||||
|
clow = sc->count_low; |
||||||
|
clow2 = SPH_T32(clow + clen); |
||||||
|
sc->count_low = clow2; |
||||||
|
if (clow2 < clow) |
||||||
|
sc->count_high ++; |
||||||
|
#endif |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef SPH_UPTR |
||||||
|
void |
||||||
|
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) |
||||||
|
{ |
||||||
|
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; |
||||||
|
unsigned current; |
||||||
|
size_t orig_len; |
||||||
|
#if !SPH_64 |
||||||
|
sph_u32 clow, clow2; |
||||||
|
#endif |
||||||
|
|
||||||
|
if (len < (2 * SPH_BLEN)) { |
||||||
|
SPH_XCAT(HASH, _short)(cc, data, len); |
||||||
|
return; |
||||||
|
} |
||||||
|
sc = cc; |
||||||
|
#if SPH_64 |
||||||
|
current = (unsigned)sc->count & (SPH_BLEN - 1U); |
||||||
|
#else |
||||||
|
current = (unsigned)sc->count_low & (SPH_BLEN - 1U); |
||||||
|
#endif |
||||||
|
if (current > 0) { |
||||||
|
unsigned t; |
||||||
|
|
||||||
|
t = SPH_BLEN - current; |
||||||
|
SPH_XCAT(HASH, _short)(cc, data, t); |
||||||
|
data = (const unsigned char *)data + t; |
||||||
|
len -= t; |
||||||
|
} |
||||||
|
#if !SPH_UNALIGNED |
||||||
|
if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) { |
||||||
|
SPH_XCAT(HASH, _short)(cc, data, len); |
||||||
|
return; |
||||||
|
} |
||||||
|
#endif |
||||||
|
orig_len = len; |
||||||
|
while (len >= SPH_BLEN) { |
||||||
|
RFUN(data, SPH_VAL); |
||||||
|
len -= SPH_BLEN; |
||||||
|
data = (const unsigned char *)data + SPH_BLEN; |
||||||
|
} |
||||||
|
if (len > 0) |
||||||
|
memcpy(sc->buf, data, len); |
||||||
|
#if SPH_64 |
||||||
|
sc->count += (sph_u64)orig_len; |
||||||
|
#else |
||||||
|
clow = sc->count_low; |
||||||
|
clow2 = SPH_T32(clow + orig_len); |
||||||
|
sc->count_low = clow2; |
||||||
|
if (clow2 < clow) |
||||||
|
sc->count_high ++; |
||||||
|
/*
|
||||||
|
* This code handles the improbable situation where "size_t" is |
||||||
|
* greater than 32 bits, and yet we do not have a 64-bit type. |
||||||
|
*/ |
||||||
|
orig_len >>= 12; |
||||||
|
orig_len >>= 10; |
||||||
|
orig_len >>= 10; |
||||||
|
sc->count_high += orig_len; |
||||||
|
#endif |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform padding and produce result. The context is NOT reinitialized |
||||||
|
* by this function. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
SPH_XCAT(HASH, _addbits_and_close)(void *cc, |
||||||
|
unsigned ub, unsigned n, void *dst, unsigned rnum) |
||||||
|
{ |
||||||
|
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; |
||||||
|
unsigned current, u; |
||||||
|
#if !SPH_64 |
||||||
|
sph_u32 low, high; |
||||||
|
#endif |
||||||
|
|
||||||
|
sc = cc; |
||||||
|
#if SPH_64 |
||||||
|
current = (unsigned)sc->count & (SPH_BLEN - 1U); |
||||||
|
#else |
||||||
|
current = (unsigned)sc->count_low & (SPH_BLEN - 1U); |
||||||
|
#endif |
||||||
|
#ifdef PW01 |
||||||
|
sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); |
||||||
|
#else |
||||||
|
{ |
||||||
|
unsigned z; |
||||||
|
|
||||||
|
z = 0x80 >> n; |
||||||
|
sc->buf[current ++] = ((ub & -z) | z) & 0xFF; |
||||||
|
} |
||||||
|
#endif |
||||||
|
if (current > SPH_MAXPAD) { |
||||||
|
memset(sc->buf + current, 0, SPH_BLEN - current); |
||||||
|
RFUN(sc->buf, SPH_VAL); |
||||||
|
memset(sc->buf, 0, SPH_MAXPAD); |
||||||
|
} else { |
||||||
|
memset(sc->buf + current, 0, SPH_MAXPAD - current); |
||||||
|
} |
||||||
|
#if defined BE64 |
||||||
|
#if defined PLW1 |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#elif defined PLW4 |
||||||
|
memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN); |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, |
||||||
|
sc->count >> 61); |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#else |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61); |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#endif |
||||||
|
#elif defined LE64 |
||||||
|
#if defined PLW1 |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#elif defined PLW1 |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); |
||||||
|
memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN); |
||||||
|
#else |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); |
||||||
|
#endif |
||||||
|
#else |
||||||
|
#if SPH_64 |
||||||
|
#ifdef BE32 |
||||||
|
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#else |
||||||
|
sph_enc64le_aligned(sc->buf + SPH_MAXPAD, |
||||||
|
SPH_T64(sc->count << 3) + (sph_u64)n); |
||||||
|
#endif |
||||||
|
#else |
||||||
|
low = sc->count_low; |
||||||
|
high = SPH_T32((sc->count_high << 3) | (low >> 29)); |
||||||
|
low = SPH_T32(low << 3) + (sph_u32)n; |
||||||
|
#ifdef BE32 |
||||||
|
sph_enc32be(sc->buf + SPH_MAXPAD, high); |
||||||
|
sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low); |
||||||
|
#else |
||||||
|
sph_enc32le(sc->buf + SPH_MAXPAD, low); |
||||||
|
sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high); |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
RFUN(sc->buf, SPH_VAL); |
||||||
|
#ifdef SPH_NO_OUTPUT |
||||||
|
(void)dst; |
||||||
|
(void)rnum; |
||||||
|
(void)u; |
||||||
|
#else |
||||||
|
for (u = 0; u < rnum; u ++) { |
||||||
|
#if defined BE64 |
||||||
|
sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]); |
||||||
|
#elif defined LE64 |
||||||
|
sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]); |
||||||
|
#elif defined BE32 |
||||||
|
sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]); |
||||||
|
#else |
||||||
|
sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]); |
||||||
|
#endif |
||||||
|
} |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum) |
||||||
|
{ |
||||||
|
SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum); |
||||||
|
} |
@ -0,0 +1,492 @@ |
|||||||
|
/* |
||||||
|
* Shabal-512 for X14/X15 (STUB) |
||||||
|
*/ |
||||||
|
#include <stdint.h> |
||||||
|
#include <cuda_runtime.h> |
||||||
|
|
||||||
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||||
|
|
||||||
|
|
||||||
|
#define SPH_C64(x) ((uint64_t)(x ## ULL)) |
||||||
|
#define SPH_C32(x) ((uint32_t)(x ## U)) |
||||||
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
||||||
|
|
||||||
|
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) ) |
||||||
|
|
||||||
|
#if __CUDA_ARCH__ < 350 |
||||||
|
// Kepler (Compute 3.0) |
||||||
|
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) |
||||||
|
#else |
||||||
|
// Kepler (Compute 3.5) |
||||||
|
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) |
||||||
|
#endif |
||||||
|
|
||||||
|
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ |
||||||
|
/* |
||||||
|
* Shabal implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
/* |
||||||
|
* Part of this code was automatically generated (the part between |
||||||
|
* the "BEGIN" and "END" markers). |
||||||
|
*/ |
||||||
|
|
||||||
|
#define sM 16 |
||||||
|
|
||||||
|
#define C32 SPH_C32 |
||||||
|
#define T32 SPH_T32 |
||||||
|
|
||||||
|
#define O1 13 |
||||||
|
#define O2 9 |
||||||
|
#define O3 6 |
||||||
|
|
||||||
|
/* |
||||||
|
* We copy the state into local variables, so that the compiler knows |
||||||
|
* that it can optimize them at will. |
||||||
|
*/ |
||||||
|
|
||||||
|
/* BEGIN -- automatically generated code. */ |
||||||
|
|
||||||
|
#define INPUT_BLOCK_ADD do { \ |
||||||
|
B0 = T32(B0 + M0); \ |
||||||
|
B1 = T32(B1 + M1); \ |
||||||
|
B2 = T32(B2 + M2); \ |
||||||
|
B3 = T32(B3 + M3); \ |
||||||
|
B4 = T32(B4 + M4); \ |
||||||
|
B5 = T32(B5 + M5); \ |
||||||
|
B6 = T32(B6 + M6); \ |
||||||
|
B7 = T32(B7 + M7); \ |
||||||
|
B8 = T32(B8 + M8); \ |
||||||
|
B9 = T32(B9 + M9); \ |
||||||
|
BA = T32(BA + MA); \ |
||||||
|
BB = T32(BB + MB); \ |
||||||
|
BC = T32(BC + MC); \ |
||||||
|
BD = T32(BD + MD); \ |
||||||
|
BE = T32(BE + ME); \ |
||||||
|
BF = T32(BF + MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define INPUT_BLOCK_SUB do { \ |
||||||
|
C0 = T32(C0 - M0); \ |
||||||
|
C1 = T32(C1 - M1); \ |
||||||
|
C2 = T32(C2 - M2); \ |
||||||
|
C3 = T32(C3 - M3); \ |
||||||
|
C4 = T32(C4 - M4); \ |
||||||
|
C5 = T32(C5 - M5); \ |
||||||
|
C6 = T32(C6 - M6); \ |
||||||
|
C7 = T32(C7 - M7); \ |
||||||
|
C8 = T32(C8 - M8); \ |
||||||
|
C9 = T32(C9 - M9); \ |
||||||
|
CA = T32(CA - MA); \ |
||||||
|
CB = T32(CB - MB); \ |
||||||
|
CC = T32(CC - MC); \ |
||||||
|
CD = T32(CD - MD); \ |
||||||
|
CE = T32(CE - ME); \ |
||||||
|
CF = T32(CF - MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define XOR_W do { \ |
||||||
|
A00 ^= Wlow; \ |
||||||
|
A01 ^= Whigh; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define SWAP(v1, v2) do { \ |
||||||
|
uint32_t tmp = (v1); \ |
||||||
|
(v1) = (v2); \ |
||||||
|
(v2) = tmp; \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define SWAP_BC do { \ |
||||||
|
SWAP(B0, C0); \ |
||||||
|
SWAP(B1, C1); \ |
||||||
|
SWAP(B2, C2); \ |
||||||
|
SWAP(B3, C3); \ |
||||||
|
SWAP(B4, C4); \ |
||||||
|
SWAP(B5, C5); \ |
||||||
|
SWAP(B6, C6); \ |
||||||
|
SWAP(B7, C7); \ |
||||||
|
SWAP(B8, C8); \ |
||||||
|
SWAP(B9, C9); \ |
||||||
|
SWAP(BA, CA); \ |
||||||
|
SWAP(BB, CB); \ |
||||||
|
SWAP(BC, CC); \ |
||||||
|
SWAP(BD, CD); \ |
||||||
|
SWAP(BE, CE); \ |
||||||
|
SWAP(BF, CF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ |
||||||
|
xa0 = T32((xa0 \ |
||||||
|
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ |
||||||
|
^ xc) * 3U) \ |
||||||
|
^ xb1 ^ (xb2 & ~xb3) ^ xm; \ |
||||||
|
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_0 do { \ |
||||||
|
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_1 do { \ |
||||||
|
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define PERM_STEP_2 do { \ |
||||||
|
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ |
||||||
|
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ |
||||||
|
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ |
||||||
|
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ |
||||||
|
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ |
||||||
|
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ |
||||||
|
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ |
||||||
|
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ |
||||||
|
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ |
||||||
|
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ |
||||||
|
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ |
||||||
|
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ |
||||||
|
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ |
||||||
|
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ |
||||||
|
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ |
||||||
|
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define APPLY_P do { \ |
||||||
|
B0 = T32(B0 << 17) | (B0 >> 15); \ |
||||||
|
B1 = T32(B1 << 17) | (B1 >> 15); \ |
||||||
|
B2 = T32(B2 << 17) | (B2 >> 15); \ |
||||||
|
B3 = T32(B3 << 17) | (B3 >> 15); \ |
||||||
|
B4 = T32(B4 << 17) | (B4 >> 15); \ |
||||||
|
B5 = T32(B5 << 17) | (B5 >> 15); \ |
||||||
|
B6 = T32(B6 << 17) | (B6 >> 15); \ |
||||||
|
B7 = T32(B7 << 17) | (B7 >> 15); \ |
||||||
|
B8 = T32(B8 << 17) | (B8 >> 15); \ |
||||||
|
B9 = T32(B9 << 17) | (B9 >> 15); \ |
||||||
|
BA = T32(BA << 17) | (BA >> 15); \ |
||||||
|
BB = T32(BB << 17) | (BB >> 15); \ |
||||||
|
BC = T32(BC << 17) | (BC >> 15); \ |
||||||
|
BD = T32(BD << 17) | (BD >> 15); \ |
||||||
|
BE = T32(BE << 17) | (BE >> 15); \ |
||||||
|
BF = T32(BF << 17) | (BF >> 15); \ |
||||||
|
PERM_STEP_0; \ |
||||||
|
PERM_STEP_1; \ |
||||||
|
PERM_STEP_2; \ |
||||||
|
A0B = T32(A0B + C6); \ |
||||||
|
A0A = T32(A0A + C5); \ |
||||||
|
A09 = T32(A09 + C4); \ |
||||||
|
A08 = T32(A08 + C3); \ |
||||||
|
A07 = T32(A07 + C2); \ |
||||||
|
A06 = T32(A06 + C1); \ |
||||||
|
A05 = T32(A05 + C0); \ |
||||||
|
A04 = T32(A04 + CF); \ |
||||||
|
A03 = T32(A03 + CE); \ |
||||||
|
A02 = T32(A02 + CD); \ |
||||||
|
A01 = T32(A01 + CC); \ |
||||||
|
A00 = T32(A00 + CB); \ |
||||||
|
A0B = T32(A0B + CA); \ |
||||||
|
A0A = T32(A0A + C9); \ |
||||||
|
A09 = T32(A09 + C8); \ |
||||||
|
A08 = T32(A08 + C7); \ |
||||||
|
A07 = T32(A07 + C6); \ |
||||||
|
A06 = T32(A06 + C5); \ |
||||||
|
A05 = T32(A05 + C4); \ |
||||||
|
A04 = T32(A04 + C3); \ |
||||||
|
A03 = T32(A03 + C2); \ |
||||||
|
A02 = T32(A02 + C1); \ |
||||||
|
A01 = T32(A01 + C0); \ |
||||||
|
A00 = T32(A00 + CF); \ |
||||||
|
A0B = T32(A0B + CE); \ |
||||||
|
A0A = T32(A0A + CD); \ |
||||||
|
A09 = T32(A09 + CC); \ |
||||||
|
A08 = T32(A08 + CB); \ |
||||||
|
A07 = T32(A07 + CA); \ |
||||||
|
A06 = T32(A06 + C9); \ |
||||||
|
A05 = T32(A05 + C8); \ |
||||||
|
A04 = T32(A04 + C7); \ |
||||||
|
A03 = T32(A03 + C6); \ |
||||||
|
A02 = T32(A02 + C5); \ |
||||||
|
A01 = T32(A01 + C4); \ |
||||||
|
A00 = T32(A00 + C3); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define INCR_W do { \ |
||||||
|
if ((Wlow = T32(Wlow + 1)) == 0) \ |
||||||
|
Whigh = T32(Whigh + 1); \ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
|
||||||
|
#if 0 /* other hash sizes init */ |
||||||
|
|
||||||
|
static const uint32_t A_init_192[] = { |
||||||
|
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), |
||||||
|
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), |
||||||
|
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t B_init_192[] = { |
||||||
|
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), |
||||||
|
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), |
||||||
|
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), |
||||||
|
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t C_init_192[] = { |
||||||
|
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), |
||||||
|
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), |
||||||
|
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), |
||||||
|
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t A_init_224[] = { |
||||||
|
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), |
||||||
|
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), |
||||||
|
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t B_init_224[] = { |
||||||
|
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), |
||||||
|
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), |
||||||
|
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), |
||||||
|
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t C_init_224[] = { |
||||||
|
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), |
||||||
|
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), |
||||||
|
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), |
||||||
|
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t A_init_256[] = { |
||||||
|
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), |
||||||
|
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), |
||||||
|
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t B_init_256[] = { |
||||||
|
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), |
||||||
|
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), |
||||||
|
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), |
||||||
|
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t C_init_256[] = { |
||||||
|
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), |
||||||
|
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), |
||||||
|
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), |
||||||
|
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t A_init_384[] = { |
||||||
|
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), |
||||||
|
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), |
||||||
|
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t B_init_384[] = { |
||||||
|
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), |
||||||
|
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), |
||||||
|
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), |
||||||
|
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) |
||||||
|
}; |
||||||
|
|
||||||
|
static const uint32_t C_init_384[] = { |
||||||
|
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), |
||||||
|
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), |
||||||
|
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), |
||||||
|
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) |
||||||
|
}; |
||||||
|
#endif |
||||||
|
|
||||||
|
__device__ |
||||||
|
static const uint32_t d_A512[] = { |
||||||
|
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), |
||||||
|
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), |
||||||
|
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) |
||||||
|
}; |
||||||
|
|
||||||
|
__device__ |
||||||
|
static const uint32_t d_B512[] = { |
||||||
|
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), |
||||||
|
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), |
||||||
|
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), |
||||||
|
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) |
||||||
|
}; |
||||||
|
|
||||||
|
__device__ |
||||||
|
static const uint32_t d_C512[] = { |
||||||
|
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), |
||||||
|
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), |
||||||
|
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), |
||||||
|
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) |
||||||
|
}; |
||||||
|
|
||||||
|
/***************************************************/ |
||||||
|
// GPU Hash Function |
||||||
|
__global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) |
||||||
|
{ |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||||
|
|
||||||
|
if (thread < threads) |
||||||
|
{ |
||||||
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); |
||||||
|
int hashPosition = nounce - startNounce; |
||||||
|
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3]; // [hashPosition * 8] |
||||||
|
|
||||||
|
uint32_t A00 = d_A512[0], A01 = d_A512[1], A02 = d_A512[2], A03 = d_A512[3], |
||||||
|
A04 = d_A512[4], A05 = d_A512[5], A06 = d_A512[6], A07 = d_A512[7], |
||||||
|
A08 = d_A512[8], A09 = d_A512[9], A0A = d_A512[10], A0B = d_A512[11]; |
||||||
|
uint32_t B0 = d_B512[0], B1 = d_B512[1], B2 = d_B512[2], B3 = d_B512[3], |
||||||
|
B4 = d_B512[4], B5 = d_B512[5], B6 = d_B512[6], B7 = d_B512[7], |
||||||
|
B8 = d_B512[8], B9 = d_B512[9], BA = d_B512[10], BB = d_B512[11], |
||||||
|
BC = d_B512[12], BD = d_B512[13], BE = d_B512[14], BF = d_B512[15]; |
||||||
|
uint32_t C0 = d_C512[0], C1 = d_C512[1], C2 = d_C512[2], C3 = d_C512[3], |
||||||
|
C4 = d_C512[4], C5 = d_C512[5], C6 = d_C512[6], C7 = d_C512[7], |
||||||
|
C8 = d_C512[8], C9 = d_C512[9], CA = d_C512[10], CB = d_C512[11], |
||||||
|
CC = d_C512[12], CD = d_C512[13], CE = d_C512[14], CF = d_C512[15]; |
||||||
|
uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; |
||||||
|
uint32_t Wlow = 1, Whigh = 0; |
||||||
|
|
||||||
|
M0 = Hash[0]; |
||||||
|
M1 = Hash[1]; |
||||||
|
M2 = Hash[2]; |
||||||
|
M3 = Hash[3]; |
||||||
|
M4 = Hash[4]; |
||||||
|
M5 = Hash[5]; |
||||||
|
M6 = Hash[6]; |
||||||
|
M7 = Hash[7]; |
||||||
|
|
||||||
|
M8 = Hash[8]; |
||||||
|
M9 = Hash[9]; |
||||||
|
MA = Hash[10]; |
||||||
|
MB = Hash[11]; |
||||||
|
MC = Hash[12]; |
||||||
|
MD = Hash[13]; |
||||||
|
ME = Hash[14]; |
||||||
|
MF = Hash[15]; |
||||||
|
|
||||||
|
INPUT_BLOCK_ADD; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
INPUT_BLOCK_SUB; |
||||||
|
SWAP_BC; |
||||||
|
INCR_W; |
||||||
|
|
||||||
|
M0 = 0x80; |
||||||
|
M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; |
||||||
|
|
||||||
|
INPUT_BLOCK_ADD; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
|
||||||
|
for (uint8_t i = 0; i < 3; i ++) |
||||||
|
{ |
||||||
|
SWAP_BC; |
||||||
|
XOR_W; |
||||||
|
APPLY_P; |
||||||
|
} |
||||||
|
|
||||||
|
Hash[0] = B0; |
||||||
|
Hash[1] = B1; |
||||||
|
Hash[2] = B2; |
||||||
|
Hash[3] = B3; |
||||||
|
Hash[4] = B4; |
||||||
|
Hash[5] = B5; |
||||||
|
Hash[6] = B6; |
||||||
|
Hash[7] = B7; |
||||||
|
|
||||||
|
Hash[8] = B8; |
||||||
|
Hash[9] = B9; |
||||||
|
Hash[10] = BA; |
||||||
|
Hash[11] = BB; |
||||||
|
Hash[12] = BC; |
||||||
|
Hash[13] = BD; |
||||||
|
Hash[14] = BE; |
||||||
|
Hash[15] = BF; |
||||||
|
|
||||||
|
//result = (Hash[3] <= target); |
||||||
|
|
||||||
|
uint32_t *outpHash = (uint32_t*)&g_hash[hashPosition << 3]; // [8 * hashPosition]; |
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) |
||||||
|
outpHash[i] = Hash[i]; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__host__ void x14_shabal512_cpu_init(int thr_id, int threads) |
||||||
|
{ |
||||||
|
} |
||||||
|
#include <stdio.h> |
||||||
|
__host__ void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
||||||
|
{ |
||||||
|
const int threadsperblock = 192; |
||||||
|
|
||||||
|
// berechne wie viele Thread Blocks wir brauchen |
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||||
|
dim3 block(threadsperblock); |
||||||
|
|
||||||
|
size_t shared_size = 0; |
||||||
|
|
||||||
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
||||||
|
|
||||||
|
x14_shabal512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
||||||
|
MyStreamSynchronize(NULL, order, thr_id); |
||||||
|
} |
@ -0,0 +1,259 @@ |
|||||||
|
/* |
||||||
|
* X14 algorithm |
||||||
|
* Added in ccminer by Tanguy Pruvot - 2014 |
||||||
|
*/ |
||||||
|
|
||||||
|
extern "C" { |
||||||
|
#include "sph/sph_blake.h" |
||||||
|
#include "sph/sph_bmw.h" |
||||||
|
#include "sph/sph_groestl.h" |
||||||
|
#include "sph/sph_skein.h" |
||||||
|
#include "sph/sph_jh.h" |
||||||
|
#include "sph/sph_keccak.h" |
||||||
|
|
||||||
|
#include "sph/sph_luffa.h" |
||||||
|
#include "sph/sph_cubehash.h" |
||||||
|
#include "sph/sph_shavite.h" |
||||||
|
#include "sph/sph_simd.h" |
||||||
|
#include "sph/sph_echo.h" |
||||||
|
|
||||||
|
#include "sph/sph_hamsi.h" |
||||||
|
#include "sph/sph_fugue.h" |
||||||
|
#include "sph/sph_shabal.h" |
||||||
|
|
||||||
|
#include "miner.h" |
||||||
|
} |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <cuda_runtime.h> |
||||||
|
|
||||||
|
// from cpu-miner.c |
||||||
|
extern int device_map[8]; |
||||||
|
|
||||||
|
// Memory for the hash functions |
||||||
|
static uint32_t *d_hash[8]; |
||||||
|
|
||||||
|
extern void quark_blake512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_blake512_cpu_setBlock_80(void *pdata); |
||||||
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); |
||||||
|
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_bmw512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_groestl512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_skein512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_keccak512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_jh512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_luffa512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_cubehash512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_shavite512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_simd512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_echo512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x13_hamsi512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x13_fugue512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x14_shabal512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_check_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_check_cpu_setTarget(const void *ptarget); |
||||||
|
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); |
||||||
|
|
||||||
|
extern void quark_compactTest_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, |
||||||
|
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); |
||||||
|
|
||||||
|
// X14 CPU Hash function |
||||||
|
extern "C" void x14hash(void *output, const void *input) |
||||||
|
{ |
||||||
|
unsigned char hash[128]; // uint32_t hashA[16], hashB[16]; |
||||||
|
#define hashB hash+64 |
||||||
|
|
||||||
|
memset(hash, 0, sizeof hash); |
||||||
|
|
||||||
|
sph_blake512_context ctx_blake; |
||||||
|
sph_bmw512_context ctx_bmw; |
||||||
|
sph_groestl512_context ctx_groestl; |
||||||
|
sph_jh512_context ctx_jh; |
||||||
|
sph_keccak512_context ctx_keccak; |
||||||
|
sph_skein512_context ctx_skein; |
||||||
|
sph_luffa512_context ctx_luffa; |
||||||
|
sph_cubehash512_context ctx_cubehash; |
||||||
|
sph_shavite512_context ctx_shavite; |
||||||
|
sph_simd512_context ctx_simd; |
||||||
|
sph_echo512_context ctx_echo; |
||||||
|
sph_hamsi512_context ctx_hamsi; |
||||||
|
sph_fugue512_context ctx_fugue; |
||||||
|
sph_shabal512_context ctx_shabal; |
||||||
|
|
||||||
|
sph_blake512_init(&ctx_blake); |
||||||
|
sph_blake512(&ctx_blake, input, 80); |
||||||
|
sph_blake512_close(&ctx_blake, hash); |
||||||
|
|
||||||
|
sph_bmw512_init(&ctx_bmw); |
||||||
|
sph_bmw512(&ctx_bmw, hash, 64); |
||||||
|
sph_bmw512_close(&ctx_bmw, hashB); |
||||||
|
|
||||||
|
sph_groestl512_init(&ctx_groestl); |
||||||
|
sph_groestl512(&ctx_groestl, hashB, 64); |
||||||
|
sph_groestl512_close(&ctx_groestl, hash); |
||||||
|
|
||||||
|
sph_skein512_init(&ctx_skein); |
||||||
|
sph_skein512(&ctx_skein, hash, 64); |
||||||
|
sph_skein512_close(&ctx_skein, hashB); |
||||||
|
|
||||||
|
sph_jh512_init(&ctx_jh); |
||||||
|
sph_jh512(&ctx_jh, hashB, 64); |
||||||
|
sph_jh512_close(&ctx_jh, hash); |
||||||
|
|
||||||
|
sph_keccak512_init(&ctx_keccak); |
||||||
|
sph_keccak512(&ctx_keccak, hash, 64); |
||||||
|
sph_keccak512_close(&ctx_keccak, hashB); |
||||||
|
|
||||||
|
sph_luffa512_init(&ctx_luffa); |
||||||
|
sph_luffa512(&ctx_luffa, hashB, 64); |
||||||
|
sph_luffa512_close(&ctx_luffa, hash); |
||||||
|
|
||||||
|
sph_cubehash512_init(&ctx_cubehash); |
||||||
|
sph_cubehash512(&ctx_cubehash, hash, 64); |
||||||
|
sph_cubehash512_close(&ctx_cubehash, hashB); |
||||||
|
|
||||||
|
sph_shavite512_init(&ctx_shavite); |
||||||
|
sph_shavite512(&ctx_shavite, hashB, 64); |
||||||
|
sph_shavite512_close(&ctx_shavite, hash); |
||||||
|
|
||||||
|
sph_simd512_init(&ctx_simd); |
||||||
|
sph_simd512(&ctx_simd, hash, 64); |
||||||
|
sph_simd512_close(&ctx_simd, hashB); |
||||||
|
|
||||||
|
sph_echo512_init(&ctx_echo); |
||||||
|
sph_echo512(&ctx_echo, hashB, 64); |
||||||
|
sph_echo512_close(&ctx_echo, hash); |
||||||
|
|
||||||
|
sph_hamsi512_init(&ctx_hamsi); |
||||||
|
sph_hamsi512(&ctx_hamsi, hash, 64); |
||||||
|
sph_hamsi512_close(&ctx_hamsi, hashB); |
||||||
|
|
||||||
|
sph_fugue512_init(&ctx_fugue); |
||||||
|
sph_fugue512(&ctx_fugue, hashB, 64); |
||||||
|
sph_fugue512_close(&ctx_fugue, hash); |
||||||
|
|
||||||
|
sph_shabal512_init(&ctx_shabal); |
||||||
|
sph_shabal512(&ctx_shabal, hash, 64); |
||||||
|
sph_shabal512_close(&ctx_shabal, hash); |
||||||
|
|
||||||
|
memcpy(output, hash, 32); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
extern bool opt_benchmark; |
||||||
|
|
||||||
|
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, |
||||||
|
const uint32_t *ptarget, uint32_t max_nonce, |
||||||
|
unsigned long *hashes_done) |
||||||
|
{ |
||||||
|
const uint32_t first_nonce = pdata[19]; |
||||||
|
const int throughput = 256*256*8; |
||||||
|
static bool init[8] = {0,0,0,0,0,0,0,0}; |
||||||
|
uint32_t endiandata[20]; |
||||||
|
uint32_t Htarg = ptarget[7]; |
||||||
|
|
||||||
|
if (opt_benchmark) |
||||||
|
((uint32_t*)ptarget)[7] = 0xff; |
||||||
|
|
||||||
|
if (!init[thr_id]) |
||||||
|
{ |
||||||
|
cudaSetDevice(device_map[thr_id]); |
||||||
|
|
||||||
|
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); |
||||||
|
|
||||||
|
quark_blake512_cpu_init(thr_id, throughput); |
||||||
|
quark_groestl512_cpu_init(thr_id, throughput); |
||||||
|
quark_skein512_cpu_init(thr_id, throughput); |
||||||
|
quark_bmw512_cpu_init(thr_id, throughput); |
||||||
|
quark_keccak512_cpu_init(thr_id, throughput); |
||||||
|
quark_jh512_cpu_init(thr_id, throughput); |
||||||
|
x11_luffa512_cpu_init(thr_id, throughput); |
||||||
|
x11_cubehash512_cpu_init(thr_id, throughput); |
||||||
|
x11_shavite512_cpu_init(thr_id, throughput); |
||||||
|
x11_simd512_cpu_init(thr_id, throughput); |
||||||
|
x11_echo512_cpu_init(thr_id, throughput); |
||||||
|
x13_hamsi512_cpu_init(thr_id, throughput); |
||||||
|
x13_fugue512_cpu_init(thr_id, throughput); |
||||||
|
x14_shabal512_cpu_init(thr_id, throughput); |
||||||
|
|
||||||
|
quark_check_cpu_init(thr_id, throughput); |
||||||
|
init[thr_id] = true; |
||||||
|
} |
||||||
|
|
||||||
|
for (int k = 0; k < 20; k++) |
||||||
|
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); |
||||||
|
|
||||||
|
quark_blake512_cpu_setBlock_80((void*)endiandata); |
||||||
|
quark_check_cpu_setTarget(ptarget); |
||||||
|
|
||||||
|
do { |
||||||
|
int order = 0; |
||||||
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); |
||||||
|
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
|
||||||
|
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
if (foundNonce != 0xffffffff) |
||||||
|
{ |
||||||
|
/* check now with the CPU to confirm */ |
||||||
|
uint32_t vhash64[8]; |
||||||
|
be32enc(&endiandata[19], foundNonce); |
||||||
|
x14hash(vhash64, endiandata); |
||||||
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { |
||||||
|
pdata[19] = foundNonce; |
||||||
|
*hashes_done = foundNonce - first_nonce + 1; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
else if (vhash64[7] > Htarg) { |
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); |
||||||
|
} |
||||||
|
else { |
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce); |
||||||
|
} |
||||||
|
} |
||||||
|
pdata[19] += throughput; |
||||||
|
|
||||||
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); |
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1; |
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,295 @@ |
|||||||
|
/* |
||||||
|
* X15 algorithm (CHC, BBC, X15C) |
||||||
|
* Added in ccminer by Tanguy Pruvot - 2014 |
||||||
|
*/ |
||||||
|
|
||||||
|
extern "C" { |
||||||
|
#include "sph/sph_blake.h" |
||||||
|
#include "sph/sph_bmw.h" |
||||||
|
#include "sph/sph_groestl.h" |
||||||
|
#include "sph/sph_skein.h" |
||||||
|
#include "sph/sph_jh.h" |
||||||
|
#include "sph/sph_keccak.h" |
||||||
|
|
||||||
|
#include "sph/sph_luffa.h" |
||||||
|
#include "sph/sph_cubehash.h" |
||||||
|
#include "sph/sph_shavite.h" |
||||||
|
#include "sph/sph_simd.h" |
||||||
|
#include "sph/sph_echo.h" |
||||||
|
|
||||||
|
#include "sph/sph_hamsi.h" |
||||||
|
#include "sph/sph_fugue.h" |
||||||
|
#include "sph/sph_shabal.h" |
||||||
|
#include "sph/sph_whirlpool.h" |
||||||
|
|
||||||
|
#include "miner.h" |
||||||
|
} |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <cuda_runtime.h> |
||||||
|
|
||||||
|
// to test gpu hash on a null buffer |
||||||
|
#define NULLTEST 0 |
||||||
|
|
||||||
|
// from cpu-miner.c |
||||||
|
extern int device_map[8]; |
||||||
|
|
||||||
|
// Memory for the hash functions |
||||||
|
static uint32_t *d_hash[8]; |
||||||
|
|
||||||
|
extern void quark_blake512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_blake512_cpu_setBlock_80(void *pdata); |
||||||
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); |
||||||
|
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_bmw512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_groestl512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_skein512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_keccak512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_jh512_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_luffa512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_cubehash512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_shavite512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_simd512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x11_echo512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x13_hamsi512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x13_fugue512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x14_shabal512_cpu_init(int thr_id, int threads); |
||||||
|
extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void x15_whirlpool_cpu_init(int thr_id, int threads); |
||||||
|
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); |
||||||
|
|
||||||
|
extern void quark_check_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_check_cpu_setTarget(const void *ptarget); |
||||||
|
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); |
||||||
|
|
||||||
|
extern void quark_compactTest_cpu_init(int thr_id, int threads); |
||||||
|
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, |
||||||
|
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); |
||||||
|
|
||||||
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||||
|
|
||||||
|
// X15 CPU Hash function |
||||||
|
extern "C" void x15hash(void *output, const void *input) |
||||||
|
{ |
||||||
|
sph_blake512_context ctx_blake; |
||||||
|
sph_bmw512_context ctx_bmw; |
||||||
|
sph_groestl512_context ctx_groestl; |
||||||
|
sph_jh512_context ctx_jh; |
||||||
|
sph_keccak512_context ctx_keccak; |
||||||
|
sph_skein512_context ctx_skein; |
||||||
|
sph_luffa512_context ctx_luffa; |
||||||
|
sph_cubehash512_context ctx_cubehash; |
||||||
|
sph_shavite512_context ctx_shavite; |
||||||
|
sph_simd512_context ctx_simd; |
||||||
|
sph_echo512_context ctx_echo; |
||||||
|
sph_hamsi512_context ctx_hamsi; |
||||||
|
sph_fugue512_context ctx_fugue; |
||||||
|
sph_shabal512_context ctx_shabal; |
||||||
|
sph_whirlpool_context ctx_whirlpool; |
||||||
|
|
||||||
|
unsigned char hash[128]; // uint32_t hashA[16], hashB[16]; |
||||||
|
#define hashB hash+64 |
||||||
|
|
||||||
|
memset(hash, 0, sizeof hash); |
||||||
|
|
||||||
|
sph_blake512_init(&ctx_blake); |
||||||
|
sph_blake512(&ctx_blake, input, 80); |
||||||
|
sph_blake512_close(&ctx_blake, hash); |
||||||
|
|
||||||
|
sph_bmw512_init(&ctx_bmw); |
||||||
|
sph_bmw512(&ctx_bmw, hash, 64); |
||||||
|
sph_bmw512_close(&ctx_bmw, hashB); |
||||||
|
|
||||||
|
sph_groestl512_init(&ctx_groestl); |
||||||
|
sph_groestl512(&ctx_groestl, hashB, 64); |
||||||
|
sph_groestl512_close(&ctx_groestl, hash); |
||||||
|
|
||||||
|
sph_skein512_init(&ctx_skein); |
||||||
|
sph_skein512(&ctx_skein, hash, 64); |
||||||
|
sph_skein512_close(&ctx_skein, hashB); |
||||||
|
|
||||||
|
sph_jh512_init(&ctx_jh); |
||||||
|
sph_jh512(&ctx_jh, hashB, 64); |
||||||
|
sph_jh512_close(&ctx_jh, hash); |
||||||
|
|
||||||
|
sph_keccak512_init(&ctx_keccak); |
||||||
|
sph_keccak512(&ctx_keccak, hash, 64); |
||||||
|
sph_keccak512_close(&ctx_keccak, hashB); |
||||||
|
|
||||||
|
sph_luffa512_init(&ctx_luffa); |
||||||
|
sph_luffa512(&ctx_luffa, hashB, 64); |
||||||
|
sph_luffa512_close(&ctx_luffa, hash); |
||||||
|
|
||||||
|
sph_cubehash512_init(&ctx_cubehash); |
||||||
|
sph_cubehash512(&ctx_cubehash, hash, 64); |
||||||
|
sph_cubehash512_close(&ctx_cubehash, hashB); |
||||||
|
|
||||||
|
sph_shavite512_init(&ctx_shavite); |
||||||
|
sph_shavite512(&ctx_shavite, hashB, 64); |
||||||
|
sph_shavite512_close(&ctx_shavite, hash); |
||||||
|
|
||||||
|
sph_simd512_init(&ctx_simd); |
||||||
|
sph_simd512(&ctx_simd, hash, 64); |
||||||
|
sph_simd512_close(&ctx_simd, hashB); |
||||||
|
|
||||||
|
sph_echo512_init(&ctx_echo); |
||||||
|
sph_echo512(&ctx_echo, hashB, 64); |
||||||
|
sph_echo512_close(&ctx_echo, hash); |
||||||
|
|
||||||
|
sph_hamsi512_init(&ctx_hamsi); |
||||||
|
sph_hamsi512(&ctx_hamsi, hash, 64); |
||||||
|
sph_hamsi512_close(&ctx_hamsi, hashB); |
||||||
|
|
||||||
|
sph_fugue512_init(&ctx_fugue); |
||||||
|
sph_fugue512(&ctx_fugue, hashB, 64); |
||||||
|
sph_fugue512_close(&ctx_fugue, hash); |
||||||
|
|
||||||
|
sph_shabal512_init(&ctx_shabal); |
||||||
|
sph_shabal512(&ctx_shabal, hash, 64); |
||||||
|
sph_shabal512_close(&ctx_shabal, hashB); |
||||||
|
|
||||||
|
sph_whirlpool_init(&ctx_whirlpool); |
||||||
|
sph_whirlpool(&ctx_whirlpool, hashB, 64); |
||||||
|
sph_whirlpool_close(&ctx_whirlpool, hash); |
||||||
|
|
||||||
|
memcpy(output, hash, 32); |
||||||
|
} |
||||||
|
|
||||||
|
extern bool opt_benchmark; |
||||||
|
|
||||||
|
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, |
||||||
|
const uint32_t *ptarget, uint32_t max_nonce, |
||||||
|
unsigned long *hashes_done) |
||||||
|
{ |
||||||
|
const uint32_t first_nonce = pdata[19]; |
||||||
|
const int throughput = 256*256*8; |
||||||
|
static bool init[8] = {0,0,0,0,0,0,0,0}; |
||||||
|
uint32_t endiandata[20]; |
||||||
|
uint32_t Htarg = ptarget[7]; |
||||||
|
|
||||||
|
if (opt_benchmark) |
||||||
|
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; |
||||||
|
|
||||||
|
if (!init[thr_id]) |
||||||
|
{ |
||||||
|
cudaSetDevice(device_map[thr_id]); |
||||||
|
|
||||||
|
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); |
||||||
|
|
||||||
|
quark_blake512_cpu_init(thr_id, throughput); |
||||||
|
quark_groestl512_cpu_init(thr_id, throughput); |
||||||
|
quark_skein512_cpu_init(thr_id, throughput); |
||||||
|
quark_bmw512_cpu_init(thr_id, throughput); |
||||||
|
quark_keccak512_cpu_init(thr_id, throughput); |
||||||
|
quark_jh512_cpu_init(thr_id, throughput); |
||||||
|
x11_luffa512_cpu_init(thr_id, throughput); |
||||||
|
x11_cubehash512_cpu_init(thr_id, throughput); |
||||||
|
x11_shavite512_cpu_init(thr_id, throughput); |
||||||
|
x11_simd512_cpu_init(thr_id, throughput); |
||||||
|
x11_echo512_cpu_init(thr_id, throughput); |
||||||
|
x13_hamsi512_cpu_init(thr_id, throughput); |
||||||
|
x13_fugue512_cpu_init(thr_id, throughput); |
||||||
|
x14_shabal512_cpu_init(thr_id, throughput); |
||||||
|
x15_whirlpool_cpu_init(thr_id, throughput); |
||||||
|
|
||||||
|
quark_check_cpu_init(thr_id, throughput); |
||||||
|
init[thr_id] = true; |
||||||
|
} |
||||||
|
|
||||||
|
for (int k=0; k < 20; k++) |
||||||
|
#if NULLTEST |
||||||
|
endiandata[k] = 0; |
||||||
|
#else |
||||||
|
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); |
||||||
|
#endif |
||||||
|
|
||||||
|
quark_blake512_cpu_setBlock_80((void*)endiandata); |
||||||
|
quark_check_cpu_setTarget(ptarget); |
||||||
|
|
||||||
|
do { |
||||||
|
int order = 0; |
||||||
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); |
||||||
|
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
|
||||||
|
/* Scan with GPU */ |
||||||
|
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); |
||||||
|
|
||||||
|
#if NULLTEST |
||||||
|
uint32_t buf[16]; memset(buf, 0, sizeof(buf)); |
||||||
|
cudaMemcpy(buf, d_hash[thr_id], 16 * sizeof(uint32_t), cudaMemcpyDeviceToHost); |
||||||
|
MyStreamSynchronize(NULL, order, thr_id); |
||||||
|
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[0], buf[1], buf[2], buf[3]); |
||||||
|
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[4], buf[5], buf[6], buf[7]); |
||||||
|
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[8], buf[9], buf[10], buf[11]); |
||||||
|
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[12], buf[13], buf[14], buf[15]); |
||||||
|
return 0; |
||||||
|
#endif |
||||||
|
if (foundNonce != 0xffffffff) |
||||||
|
{ |
||||||
|
/* check now with the CPU to confirm */ |
||||||
|
uint32_t vhash64[8]; |
||||||
|
be32enc(&endiandata[19], foundNonce); |
||||||
|
x15hash(vhash64, endiandata); |
||||||
|
if ((vhash64[7] <= Htarg) /* && fulltest(vhash64, ptarget) */) { |
||||||
|
pdata[19] = foundNonce; |
||||||
|
*hashes_done = foundNonce - first_nonce + 1; |
||||||
|
applog(LOG_INFO, "GPU #%d: result for nonce $%08X is in wanted range, %x <= %x", thr_id, foundNonce, vhash64[7], Htarg); |
||||||
|
return 1; |
||||||
|
} |
||||||
|
else if (vhash64[7] > Htarg) { |
||||||
|
applog(LOG_NOTICE, "Hash0 %08x %08x %08x %08x", vhash64[0], vhash64[1], vhash64[2], vhash64[3]); |
||||||
|
applog(LOG_NOTICE, "Hash1 %08x %08x %08x %08x", vhash64[4], vhash64[5], vhash64[6], vhash64[7]); |
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", |
||||||
|
thr_id, foundNonce, vhash64[7], Htarg); |
||||||
|
} |
||||||
|
else { |
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pdata[19] += throughput; |
||||||
|
|
||||||
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); |
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1; |
||||||
|
return 0; |
||||||
|
} |
Loading…
Reference in new issue