Tanguy Pruvot
9 years ago
15 changed files with 3451 additions and 29 deletions
@ -0,0 +1,441 @@
@@ -0,0 +1,441 @@
|
||||
/* |
||||
* ripemd-160 kernel implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2014, 2016 djm34, tpruvot |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
*/ |
||||
#include <stdio.h> |
||||
#include <stdint.h> |
||||
#include <memory.h> |
||||
|
||||
#include <cuda_helper.h> |
||||
|
||||
static __constant__ uint32_t c_IV[5] = { |
||||
0x67452301u, 0xEFCDAB89u, 0x98BADCFEu, 0x10325476u, 0xC3D2E1F0u |
||||
}; |
||||
|
||||
//__host__ |
||||
//uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c) { |
||||
// return c ^ (a | !b); |
||||
//} |
||||
|
||||
__forceinline__ __device__ |
||||
uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c) |
||||
{ |
||||
uint64_t result; |
||||
asm("{ .reg .u64 m,n; // xornot64\n\t" |
||||
"not.b64 m,%2; \n\t" |
||||
"or.b64 n, %1,m;\n\t" |
||||
"xor.b64 %0, n,%3;\n\t" |
||||
"}\n\t" |
||||
: "=l"(result) : "l"(a), "l"(b), "l"(c)); |
||||
return result; |
||||
} |
||||
|
||||
//__host__ |
||||
//uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c) { |
||||
// return a ^ (b | !c); |
||||
//} |
||||
|
||||
__device__ __forceinline__ |
||||
uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c) |
||||
{ |
||||
uint64_t result; |
||||
asm("{ .reg .u64 m,n; // xornt64\n\t" |
||||
"not.b64 m,%3; \n\t" |
||||
"or.b64 n, %2,m;\n\t" |
||||
"xor.b64 %0, %1,n;\n\t" |
||||
"}\n\t" |
||||
: "=l"(result) : "l"(a), "l"(b), "l"(c)); |
||||
return result; |
||||
} |
||||
|
||||
/* |
||||
* Round functions for RIPEMD-128 and RIPEMD-160. |
||||
*/ |
||||
#if 1 |
||||
#define F1(x, y, z) ((x) ^ (y) ^ (z)) |
||||
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) |
||||
#define F3(x, y, z) (((x) | ~(y)) ^ (z)) |
||||
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) |
||||
#define F5(x, y, z) ((x) ^ ((y) | ~(z))) |
||||
#else |
||||
#define F1(x, y, z) xor3(x,y,z) |
||||
#define F2(x, y, z) xandx(x,y,z) |
||||
#define F3(x, y, z) xornot64(x,y,z) |
||||
#define F4(x, y, z) xandx(z,x,y) |
||||
#define F5(x, y, z) xornt64(x,y,z) |
||||
#endif |
||||
|
||||
/* |
||||
* Round constants for RIPEMD-160. |
||||
*/ |
||||
#define K11 0x00000000u |
||||
#define K12 0x5A827999u |
||||
#define K13 0x6ED9EBA1u |
||||
#define K14 0x8F1BBCDCu |
||||
#define K15 0xA953FD4Eu |
||||
|
||||
#define K21 0x50A28BE6u |
||||
#define K22 0x5C4DD124u |
||||
#define K23 0x6D703EF3u |
||||
#define K24 0x7A6D76E9u |
||||
#define K25 0x00000000u |
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) { \ |
||||
a = SPH_T32(ROTL32(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ |
||||
c = ROTL32(c, 10); \ |
||||
} |
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \ |
||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) |
||||
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \ |
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) |
||||
|
||||
#define RIPEMD160_ROUND_BODY(in, h) { \ |
||||
uint32_t A1, B1, C1, D1, E1; \ |
||||
uint32_t A2, B2, C2, D2, E2; \ |
||||
uint32_t tmp; \ |
||||
\ |
||||
A1 = A2 = h[0]; \ |
||||
B1 = B2 = h[1]; \ |
||||
C1 = C2 = h[2]; \ |
||||
D1 = D2 = h[3]; \ |
||||
E1 = E2 = h[4]; \ |
||||
\ |
||||
ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \ |
||||
\ |
||||
ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \ |
||||
\ |
||||
ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \ |
||||
\ |
||||
ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \ |
||||
\ |
||||
ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \ |
||||
\ |
||||
ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \ |
||||
\ |
||||
ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \ |
||||
\ |
||||
ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \ |
||||
\ |
||||
ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \ |
||||
\ |
||||
ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \ |
||||
\ |
||||
tmp = (h[1] + C1 + D2); \ |
||||
h[1] = (h[2] + D1 + E2); \ |
||||
h[2] = (h[3] + E1 + A2); \ |
||||
h[3] = (h[4] + A1 + B2); \ |
||||
h[4] = (h[0] + B1 + C2); \ |
||||
h[0] = tmp; \ |
||||
} |
||||
|
||||
#if 0 |
||||
__global__ |
||||
void lbry_ripemd160_gpu_hash_32(const uint32_t threads, uint64_t *g_hash, const uint32_t byteOffset) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t *hash = (uint32_t*) (&g_hash[thread * 8U + byteOffset/8]); |
||||
|
||||
uint32_t in[16]; |
||||
for (int i=0; i<8; i++) |
||||
in[i] = (hash[i]); |
||||
in[8] = 0x80; |
||||
|
||||
#pragma unroll |
||||
for (int i=9;i<16;i++) in[i] = 0; |
||||
|
||||
in[14] = 0x100; // size in bits |
||||
|
||||
uint32_t h[5]; |
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
h[i] = c_IV[i]; |
||||
|
||||
RIPEMD160_ROUND_BODY(in, h); |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
hash[i] = h[i]; |
||||
|
||||
#ifdef PAD_ZEROS |
||||
// 20 bytes hash on 32 or 64 bytes output space |
||||
hash[5] = 0; |
||||
hash[6] = 0; |
||||
hash[7] = 0; |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_ripemd160_hash_32(int thr_id, uint32_t threads, uint32_t *g_Hash, uint32_t byteOffset, cudaStream_t stream) |
||||
{ |
||||
const uint32_t threadsperblock = 128; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_ripemd160_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) g_Hash, byteOffset); |
||||
} |
||||
#endif |
||||
|
||||
__global__ |
||||
//__launch_bounds__(256,6) |
||||
void lbry_ripemd160_gpu_hash_32x2(const uint32_t threads, uint64_t *g_hash) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t *hash = (uint32_t*) (&g_hash[thread * 8U]); |
||||
|
||||
uint32_t in[16]; |
||||
for (int i=0; i<8; i++) |
||||
in[i] = (hash[i]); |
||||
in[8] = 0x80; |
||||
|
||||
#pragma unroll |
||||
for (int i=9;i<16;i++) in[i] = 0; |
||||
|
||||
in[14] = 0x100; // size in bits |
||||
|
||||
uint32_t h[5]; |
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
h[i] = c_IV[i]; |
||||
|
||||
RIPEMD160_ROUND_BODY(in, h); |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
hash[i] = h[i]; |
||||
|
||||
#ifdef PAD_ZEROS |
||||
// 20 bytes hash on 32 output space |
||||
hash[5] = 0; |
||||
hash[6] = 0; |
||||
hash[7] = 0; |
||||
#endif |
||||
// second 32 bytes block hash |
||||
hash += 8; |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<8; i++) |
||||
in[i] = (hash[i]); |
||||
in[8] = 0x80; |
||||
|
||||
#pragma unroll |
||||
for (int i=9;i<16;i++) in[i] = 0; |
||||
|
||||
in[14] = 0x100; // size in bits |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
h[i] = c_IV[i]; |
||||
|
||||
RIPEMD160_ROUND_BODY(in, h); |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) |
||||
hash[i] = h[i]; |
||||
|
||||
#ifdef PAD_ZEROS |
||||
// 20 bytes hash on 32 output space |
||||
hash[5] = 0; |
||||
hash[6] = 0; |
||||
hash[7] = 0; |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream) |
||||
{ |
||||
const uint32_t threadsperblock = 128; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_ripemd160_gpu_hash_32x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) g_Hash); |
||||
} |
||||
|
||||
void lbry_ripemd160_init(int thr_id) |
||||
{ |
||||
//cudaMemcpyToSymbol(c_IV, IV, sizeof(IV), 0, cudaMemcpyHostToDevice); |
||||
} |
@ -0,0 +1,712 @@
@@ -0,0 +1,712 @@
|
||||
/* |
||||
* sha256 CUDA implementation. |
||||
*/ |
||||
#include <stdio.h> |
||||
#include <stdint.h> |
||||
#include <memory.h> |
||||
|
||||
#include <cuda_helper.h> |
||||
#include <miner.h> |
||||
|
||||
__constant__ static uint32_t __align__(8) c_midstate112[8]; |
||||
__constant__ static uint32_t __align__(8) c_dataEnd112[12]; |
||||
|
||||
const __constant__ uint32_t __align__(8) c_H256[8] = { |
||||
0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, |
||||
0x510E527FU, 0x9B05688CU, 0x1F83D9ABU, 0x5BE0CD19U |
||||
}; |
||||
__constant__ static uint32_t __align__(8) c_K[64]; |
||||
|
||||
static __thread uint32_t* d_resNonces; |
||||
__constant__ static uint32_t __align__(8) c_target[2]; |
||||
__device__ uint64_t d_target[1]; |
||||
|
||||
// ------------------------------------------------------------------------------------------------ |
||||
|
||||
static const uint32_t cpu_H256[8] = { |
||||
0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, |
||||
0x510E527FU, 0x9B05688CU, 0x1F83D9ABU, 0x5BE0CD19U |
||||
}; |
||||
|
||||
static const uint32_t cpu_K[64] = { |
||||
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, |
||||
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, |
||||
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, |
||||
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, |
||||
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, |
||||
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, |
||||
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, |
||||
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 |
||||
}; |
||||
|
||||
#define ROTR ROTR32 |
||||
|
||||
__host__ |
||||
static void sha256_step1_host(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, |
||||
uint32_t e, uint32_t f, uint32_t g, uint32_t &h, |
||||
uint32_t in, const uint32_t Kshared) |
||||
{ |
||||
uint32_t t1,t2; |
||||
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g); |
||||
uint32_t bsg21 = ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e); |
||||
uint32_t bsg20 = ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a); |
||||
uint32_t andorv = ((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c); |
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in; |
||||
t2 = bsg20 + andorv; |
||||
d = d + t1; |
||||
h = t1 + t2; |
||||
} |
||||
|
||||
__host__ |
||||
static void sha256_step2_host(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, |
||||
uint32_t e, uint32_t f, uint32_t g, uint32_t &h, |
||||
uint32_t* in, uint32_t pc, const uint32_t Kshared) |
||||
{ |
||||
uint32_t t1,t2; |
||||
|
||||
int pcidx1 = (pc-2) & 0xF; |
||||
int pcidx2 = (pc-7) & 0xF; |
||||
int pcidx3 = (pc-15) & 0xF; |
||||
|
||||
uint32_t inx0 = in[pc]; |
||||
uint32_t inx1 = in[pcidx1]; |
||||
uint32_t inx2 = in[pcidx2]; |
||||
uint32_t inx3 = in[pcidx3]; |
||||
|
||||
uint32_t ssg21 = ROTR(inx1, 17) ^ ROTR(inx1, 19) ^ SPH_T32((inx1) >> 10); //ssg2_1(inx1); |
||||
uint32_t ssg20 = ROTR(inx3, 7) ^ ROTR(inx3, 18) ^ SPH_T32((inx3) >> 3); //ssg2_0(inx3); |
||||
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g); |
||||
uint32_t bsg21 = ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e); |
||||
uint32_t bsg20 = ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a); |
||||
uint32_t andorv = ((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c); |
||||
|
||||
in[pc] = ssg21 + inx2 + ssg20 + inx0; |
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in[pc]; |
||||
t2 = bsg20 + andorv; |
||||
d = d + t1; |
||||
h = t1 + t2; |
||||
} |
||||
|
||||
__host__ |
||||
static void sha256_round_body_host(uint32_t* in, uint32_t* state, const uint32_t* Kshared) |
||||
{ |
||||
uint32_t a = state[0]; |
||||
uint32_t b = state[1]; |
||||
uint32_t c = state[2]; |
||||
uint32_t d = state[3]; |
||||
uint32_t e = state[4]; |
||||
uint32_t f = state[5]; |
||||
uint32_t g = state[6]; |
||||
uint32_t h = state[7]; |
||||
|
||||
sha256_step1_host(a,b,c,d,e,f,g,h,in[0], Kshared[0]); |
||||
sha256_step1_host(h,a,b,c,d,e,f,g,in[1], Kshared[1]); |
||||
sha256_step1_host(g,h,a,b,c,d,e,f,in[2], Kshared[2]); |
||||
sha256_step1_host(f,g,h,a,b,c,d,e,in[3], Kshared[3]); |
||||
sha256_step1_host(e,f,g,h,a,b,c,d,in[4], Kshared[4]); |
||||
sha256_step1_host(d,e,f,g,h,a,b,c,in[5], Kshared[5]); |
||||
sha256_step1_host(c,d,e,f,g,h,a,b,in[6], Kshared[6]); |
||||
sha256_step1_host(b,c,d,e,f,g,h,a,in[7], Kshared[7]); |
||||
sha256_step1_host(a,b,c,d,e,f,g,h,in[8], Kshared[8]); |
||||
sha256_step1_host(h,a,b,c,d,e,f,g,in[9], Kshared[9]); |
||||
sha256_step1_host(g,h,a,b,c,d,e,f,in[10],Kshared[10]); |
||||
sha256_step1_host(f,g,h,a,b,c,d,e,in[11],Kshared[11]); |
||||
sha256_step1_host(e,f,g,h,a,b,c,d,in[12],Kshared[12]); |
||||
sha256_step1_host(d,e,f,g,h,a,b,c,in[13],Kshared[13]); |
||||
sha256_step1_host(c,d,e,f,g,h,a,b,in[14],Kshared[14]); |
||||
sha256_step1_host(b,c,d,e,f,g,h,a,in[15],Kshared[15]); |
||||
|
||||
for (int i=0; i<3; i++) |
||||
{ |
||||
sha256_step2_host(a,b,c,d,e,f,g,h,in,0, Kshared[16+16*i]); |
||||
sha256_step2_host(h,a,b,c,d,e,f,g,in,1, Kshared[17+16*i]); |
||||
sha256_step2_host(g,h,a,b,c,d,e,f,in,2, Kshared[18+16*i]); |
||||
sha256_step2_host(f,g,h,a,b,c,d,e,in,3, Kshared[19+16*i]); |
||||
sha256_step2_host(e,f,g,h,a,b,c,d,in,4, Kshared[20+16*i]); |
||||
sha256_step2_host(d,e,f,g,h,a,b,c,in,5, Kshared[21+16*i]); |
||||
sha256_step2_host(c,d,e,f,g,h,a,b,in,6, Kshared[22+16*i]); |
||||
sha256_step2_host(b,c,d,e,f,g,h,a,in,7, Kshared[23+16*i]); |
||||
sha256_step2_host(a,b,c,d,e,f,g,h,in,8, Kshared[24+16*i]); |
||||
sha256_step2_host(h,a,b,c,d,e,f,g,in,9, Kshared[25+16*i]); |
||||
sha256_step2_host(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]); |
||||
sha256_step2_host(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]); |
||||
sha256_step2_host(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]); |
||||
sha256_step2_host(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]); |
||||
sha256_step2_host(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]); |
||||
sha256_step2_host(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]); |
||||
} |
||||
|
||||
state[0] += a; |
||||
state[1] += b; |
||||
state[2] += c; |
||||
state[3] += d; |
||||
state[4] += e; |
||||
state[5] += f; |
||||
state[6] += g; |
||||
state[7] += h; |
||||
} |
||||
|
||||
__device__ __forceinline__ |
||||
uint32_t xor3b(const uint32_t a, const uint32_t b, const uint32_t c) { |
||||
uint32_t result; |
||||
#if __CUDA_ARCH__ >= 500 && CUDA_VERSION >= 7050 |
||||
asm ("lop3.b32 %0, %1, %2, %3, 0x96; // xor3b" //0x96 = 0xF0 ^ 0xCC ^ 0xAA |
||||
: "=r"(result) : "r"(a), "r"(b),"r"(c)); |
||||
#else |
||||
result = a^b^c; |
||||
#endif |
||||
return result; |
||||
} |
||||
|
||||
/* |
||||
__device__ __forceinline__ |
||||
uint32_t xor3b(const uint32_t a, const uint32_t b, const uint32_t c) { |
||||
uint32_t result; |
||||
asm("{ .reg .u32 t1; // xor3b \n\t" |
||||
"xor.b32 t1, %2, %3;\n\t" |
||||
"xor.b32 %0, %1, t1;" |
||||
"}" |
||||
: "=r"(result) : "r"(a) ,"r"(b),"r"(c)); |
||||
return result; |
||||
} |
||||
#define xor3b(a,b,c) (a ^ b ^ c) |
||||
*/ |
||||
|
||||
__device__ __forceinline__ uint32_t bsg2_0(const uint32_t x) |
||||
{ |
||||
uint32_t r1 = ROTR32(x,2); |
||||
uint32_t r2 = ROTR32(x,13); |
||||
uint32_t r3 = ROTR32(x,22); |
||||
return xor3b(r1,r2,r3); |
||||
} |
||||
|
||||
__device__ __forceinline__ uint32_t bsg2_1(const uint32_t x) |
||||
{ |
||||
uint32_t r1 = ROTR32(x,6); |
||||
uint32_t r2 = ROTR32(x,11); |
||||
uint32_t r3 = ROTR32(x,25); |
||||
return xor3b(r1,r2,r3); |
||||
} |
||||
|
||||
__device__ __forceinline__ uint32_t ssg2_0(const uint32_t x) |
||||
{ |
||||
uint64_t r1 = ROTR32(x,7); |
||||
uint64_t r2 = ROTR32(x,18); |
||||
uint64_t r3 = shr_t32(x,3); |
||||
return xor3b(r1,r2,r3); |
||||
} |
||||
|
||||
__device__ __forceinline__ uint32_t ssg2_1(const uint32_t x) |
||||
{ |
||||
uint64_t r1 = ROTR32(x,17); |
||||
uint64_t r2 = ROTR32(x,19); |
||||
uint64_t r3 = shr_t32(x,10); |
||||
return xor3b(r1,r2,r3); |
||||
} |
||||
|
||||
__device__ __forceinline__ uint32_t andor32(const uint32_t a, const uint32_t b, const uint32_t c) |
||||
{ |
||||
uint32_t result; |
||||
asm("{\n\t" |
||||
".reg .u32 m,n,o;\n\t" |
||||
"and.b32 m, %1, %2;\n\t" |
||||
" or.b32 n, %1, %2;\n\t" |
||||
"and.b32 o, n, %3;\n\t" |
||||
" or.b32 %0, m, o ;\n\t" |
||||
"}\n\t" : "=r"(result) : "r"(a), "r"(b), "r"(c) |
||||
); |
||||
return result; |
||||
} |
||||
|
||||
__device__ |
||||
static void sha2_step1(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, uint32_t e, uint32_t f, uint32_t g, uint32_t &h, |
||||
uint32_t in, const uint32_t Kshared) |
||||
{ |
||||
uint32_t t1,t2; |
||||
uint32_t vxandx = xandx(e, f, g); |
||||
uint32_t bsg21 = bsg2_1(e); |
||||
uint32_t bsg20 = bsg2_0(a); |
||||
uint32_t andorv = andor32(a,b,c); |
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in; |
||||
t2 = bsg20 + andorv; |
||||
d = d + t1; |
||||
h = t1 + t2; |
||||
} |
||||
|
||||
__device__ |
||||
static void sha2_step2(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, uint32_t e, uint32_t f, uint32_t g, uint32_t &h, |
||||
uint32_t* in, uint32_t pc, const uint32_t Kshared) |
||||
{ |
||||
uint32_t t1,t2; |
||||
|
||||
int pcidx1 = (pc-2) & 0xF; |
||||
int pcidx2 = (pc-7) & 0xF; |
||||
int pcidx3 = (pc-15) & 0xF; |
||||
|
||||
uint32_t inx0 = in[pc]; |
||||
uint32_t inx1 = in[pcidx1]; |
||||
uint32_t inx2 = in[pcidx2]; |
||||
uint32_t inx3 = in[pcidx3]; |
||||
|
||||
uint32_t ssg21 = ssg2_1(inx1); |
||||
uint32_t ssg20 = ssg2_0(inx3); |
||||
uint32_t vxandx = xandx(e, f, g); |
||||
uint32_t bsg21 = bsg2_1(e); |
||||
uint32_t bsg20 = bsg2_0(a); |
||||
uint32_t andorv = andor32(a,b,c); |
||||
|
||||
in[pc] = ssg21 + inx2 + ssg20 + inx0; |
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in[pc]; |
||||
t2 = bsg20 + andorv; |
||||
d = d + t1; |
||||
h = t1 + t2; |
||||
} |
||||
|
||||
__device__ |
||||
static void sha256_round_body(uint32_t* in, uint32_t* state, uint32_t* const Kshared) |
||||
{ |
||||
uint32_t a = state[0]; |
||||
uint32_t b = state[1]; |
||||
uint32_t c = state[2]; |
||||
uint32_t d = state[3]; |
||||
uint32_t e = state[4]; |
||||
uint32_t f = state[5]; |
||||
uint32_t g = state[6]; |
||||
uint32_t h = state[7]; |
||||
|
||||
sha2_step1(a,b,c,d,e,f,g,h,in[0], Kshared[0]); |
||||
sha2_step1(h,a,b,c,d,e,f,g,in[1], Kshared[1]); |
||||
sha2_step1(g,h,a,b,c,d,e,f,in[2], Kshared[2]); |
||||
sha2_step1(f,g,h,a,b,c,d,e,in[3], Kshared[3]); |
||||
sha2_step1(e,f,g,h,a,b,c,d,in[4], Kshared[4]); |
||||
sha2_step1(d,e,f,g,h,a,b,c,in[5], Kshared[5]); |
||||
sha2_step1(c,d,e,f,g,h,a,b,in[6], Kshared[6]); |
||||
sha2_step1(b,c,d,e,f,g,h,a,in[7], Kshared[7]); |
||||
sha2_step1(a,b,c,d,e,f,g,h,in[8], Kshared[8]); |
||||
sha2_step1(h,a,b,c,d,e,f,g,in[9], Kshared[9]); |
||||
sha2_step1(g,h,a,b,c,d,e,f,in[10],Kshared[10]); |
||||
sha2_step1(f,g,h,a,b,c,d,e,in[11],Kshared[11]); |
||||
sha2_step1(e,f,g,h,a,b,c,d,in[12],Kshared[12]); |
||||
sha2_step1(d,e,f,g,h,a,b,c,in[13],Kshared[13]); |
||||
sha2_step1(c,d,e,f,g,h,a,b,in[14],Kshared[14]); |
||||
sha2_step1(b,c,d,e,f,g,h,a,in[15],Kshared[15]); |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<3; i++) |
||||
{ |
||||
sha2_step2(a,b,c,d,e,f,g,h,in,0, Kshared[16+16*i]); |
||||
sha2_step2(h,a,b,c,d,e,f,g,in,1, Kshared[17+16*i]); |
||||
sha2_step2(g,h,a,b,c,d,e,f,in,2, Kshared[18+16*i]); |
||||
sha2_step2(f,g,h,a,b,c,d,e,in,3, Kshared[19+16*i]); |
||||
sha2_step2(e,f,g,h,a,b,c,d,in,4, Kshared[20+16*i]); |
||||
sha2_step2(d,e,f,g,h,a,b,c,in,5, Kshared[21+16*i]); |
||||
sha2_step2(c,d,e,f,g,h,a,b,in,6, Kshared[22+16*i]); |
||||
sha2_step2(b,c,d,e,f,g,h,a,in,7, Kshared[23+16*i]); |
||||
sha2_step2(a,b,c,d,e,f,g,h,in,8, Kshared[24+16*i]); |
||||
sha2_step2(h,a,b,c,d,e,f,g,in,9, Kshared[25+16*i]); |
||||
sha2_step2(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]); |
||||
sha2_step2(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]); |
||||
sha2_step2(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]); |
||||
sha2_step2(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]); |
||||
sha2_step2(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]); |
||||
sha2_step2(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]); |
||||
} |
||||
|
||||
state[0] += a; |
||||
state[1] += b; |
||||
state[2] += c; |
||||
state[3] += d; |
||||
state[4] += e; |
||||
state[5] += f; |
||||
state[6] += g; |
||||
state[7] += h; |
||||
} |
||||
|
||||
__device__ |
||||
uint64_t cuda_swab32ll(uint64_t x) { |
||||
return MAKE_ULONGLONG(cuda_swab32(_LODWORD(x)), cuda_swab32(_HIDWORD(x))); |
||||
} |
||||
|
||||
__global__ |
||||
/*__launch_bounds__(256,3)*/ |
||||
void lbry_sha256_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
const uint32_t nonce = startNonce + thread; |
||||
|
||||
uint32_t dat[16]; |
||||
#pragma unroll |
||||
for (int i=0;i<11;i++) dat[i] = c_dataEnd112[i]; // pre "swabed" |
||||
dat[11] = swabNonce ? cuda_swab32(nonce) : nonce; |
||||
dat[12] = 0x80000000; |
||||
dat[13] = 0; |
||||
dat[14] = 0; |
||||
dat[15] = 0x380; |
||||
|
||||
uint32_t __align__(8) buf[8]; |
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_midstate112[i]; |
||||
|
||||
sha256_round_body(dat, buf, c_K); |
||||
|
||||
// output |
||||
uint2* output = (uint2*) (&outputHash[thread * 8U]); |
||||
#pragma unroll |
||||
for (int i=0;i<4;i++) { |
||||
//output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i])); |
||||
output[i] = vectorize(((uint64_t*)buf)[i]); // out without swap, new sha256 after |
||||
} |
||||
} |
||||
} |
||||
|
||||
__global__ |
||||
/*__launch_bounds__(256,3)*/ |
||||
void lbry_sha256_gpu_hash_32(uint32_t threads, uint64_t *Hash512) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t __align__(8) buf[8]; // align for vectorize |
||||
#pragma unroll |
||||
for (int i=0; i<8; i++) buf[i] = c_H256[i]; |
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]); |
||||
|
||||
uint32_t dat[16]; |
||||
#pragma unroll |
||||
//for (int i=0;i<8;i++) dat[i] = cuda_swab32(input[i]); |
||||
for (int i=0; i<8; i++) dat[i] = input[i]; |
||||
dat[8] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=9; i<15; i++) dat[i] = 0; |
||||
dat[15] = 0x100; |
||||
|
||||
sha256_round_body(dat, buf, c_K); |
||||
|
||||
// output |
||||
uint2* output = (uint2*) input; |
||||
#pragma unroll |
||||
for (int i=0;i<4;i++) { |
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i])); |
||||
} |
||||
#ifdef PAD_ZEROS |
||||
#pragma unroll |
||||
for (int i=4; i<8; i++) output[i] = vectorize(0); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
__global__ |
||||
/*__launch_bounds__(256,3)*/ |
||||
void lbry_sha256d_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
extern __shared__ uint32_t s_K[]; |
||||
//s_K[thread & 63] = c_K[thread & 63]; |
||||
if (threadIdx.x < 64U) s_K[threadIdx.x] = c_K[threadIdx.x]; |
||||
if (thread < threads) |
||||
{ |
||||
const uint32_t nonce = startNonce + thread; |
||||
|
||||
uint32_t dat[16]; |
||||
#pragma unroll |
||||
for (int i=0; i<11; i++) dat[i] = c_dataEnd112[i]; |
||||
dat[11] = swabNonce ? cuda_swab32(nonce) : nonce; |
||||
dat[12] = 0x80000000; |
||||
dat[13] = 0; |
||||
dat[14] = 0; |
||||
dat[15] = 0x380; |
||||
|
||||
uint32_t __align__(8) buf[8]; |
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_midstate112[i]; |
||||
|
||||
sha256_round_body(dat, buf, s_K); |
||||
|
||||
// second sha256 |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<8; i++) dat[i] = buf[i]; |
||||
dat[8] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=9; i<15; i++) dat[i] = 0; |
||||
dat[15] = 0x100; |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<8; i++) buf[i] = c_H256[i]; |
||||
|
||||
sha256_round_body(dat, buf, s_K); |
||||
|
||||
// output |
||||
uint2* output = (uint2*) (&outputHash[thread * 8U]); |
||||
#pragma unroll |
||||
for (int i=0;i<4;i++) { |
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i])); |
||||
//output[i] = vectorize(((uint64_t*)buf)[i]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
__global__ |
||||
/*__launch_bounds__(256,3)*/ |
||||
void lbry_sha256_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t __align__(8) buf[8]; // align for vectorize |
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i]; |
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]); |
||||
|
||||
uint32_t dat[16]; |
||||
#pragma unroll |
||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]); |
||||
#pragma unroll |
||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]); |
||||
dat[10] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=11;i<15;i++) dat[i] = 0; |
||||
dat[15] = 0x140; |
||||
|
||||
sha256_round_body(dat, buf, c_K); |
||||
|
||||
// output |
||||
uint2* output = (uint2*) input; |
||||
#pragma unroll |
||||
for (int i=0;i<4;i++) { |
||||
//output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i])); |
||||
output[i] = vectorize(((uint64_t*)buf)[i]); |
||||
} |
||||
#ifdef PAD_ZEROS |
||||
#pragma unroll |
||||
for (int i=4; i<8; i++) output[i] = vectorize(0); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
__global__ |
||||
/*__launch_bounds__(256,3)*/ |
||||
void lbry_sha256d_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
extern __shared__ uint32_t s_K[]; |
||||
if (threadIdx.x < 64U) s_K[threadIdx.x] = c_K[threadIdx.x]; |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t __align__(8) buf[8]; // align for vectorize |
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i]; |
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]); |
||||
|
||||
uint32_t dat[16]; |
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) dat[i] = cuda_swab32(input[i]); |
||||
#pragma unroll |
||||
for (int i=0; i<5; i++) dat[i+5] = cuda_swab32(input[i+8]); |
||||
dat[10] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=11;i<15;i++) dat[i] = 0; |
||||
dat[15] = 0x140; |
||||
|
||||
sha256_round_body(dat, buf, s_K); |
||||
|
||||
// second sha256 |
||||
|
||||
#pragma unroll |
||||
for (int i=0; i<8; i++) dat[i] = buf[i]; |
||||
dat[8] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=9; i<15; i++) dat[i] = 0; |
||||
dat[15] = 0x100; |
||||
|
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i]; |
||||
|
||||
sha256_round_body(dat, buf, s_K); |
||||
|
||||
// output |
||||
uint2* output = (uint2*) input; |
||||
|
||||
#ifdef FULL_HASH |
||||
#pragma unroll |
||||
for (int i=0;i<4;i++) { |
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i])); |
||||
//output[i] = vectorize(((uint64_t*)buf)[i]); |
||||
} |
||||
# ifdef PAD_ZEROS |
||||
#pragma unroll |
||||
for (int i=4; i<8; i++) output[i] = vectorize(0); |
||||
# endif |
||||
|
||||
#else |
||||
//input[6] = cuda_swab32(buf[6]); |
||||
//input[7] = cuda_swab32(buf[7]); |
||||
output[3] = vectorize(cuda_swab32ll(((uint64_t*)buf)[3])); |
||||
#endif |
||||
} |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_init(int thr_id) |
||||
{ |
||||
//cudaMemcpyToSymbol(c_H256, cpu_H256, sizeof(cpu_H256), 0, cudaMemcpyHostToDevice); |
||||
cudaMemcpyToSymbol(c_K, cpu_K, sizeof(cpu_K), 0, cudaMemcpyHostToDevice); |
||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNonces, 4*sizeof(uint32_t))); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_free(int thr_id) |
||||
{ |
||||
cudaFree(d_resNonces); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget) |
||||
{ |
||||
uint32_t in[16], buf[8], end[11]; |
||||
for (int i=0;i<16;i++) in[i] = cuda_swab32(pdata[i]); |
||||
for (int i=0; i<8;i++) buf[i] = cpu_H256[i]; |
||||
for (int i=0;i<11;i++) end[i] = cuda_swab32(pdata[16+i]); |
||||
sha256_round_body_host(in, buf, cpu_K); |
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_midstate112, buf, 32, 0, cudaMemcpyHostToDevice)); |
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_dataEnd112, end, sizeof(end), 0, cudaMemcpyHostToDevice)); |
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice)); |
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice)); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_sha256_gpu_hash_112 <<<grid, block, 0, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash); |
||||
cudaGetLastError(); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_sha256_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_sha256d_gpu_hash_112 <<<grid, block, 64*4, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_sha256_gpu_hash_20x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash); |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
lbry_sha256d_gpu_hash_20x2 <<<grid, block, 64*4, stream>>> (threads, (uint64_t*) d_Hash); |
||||
} |
||||
|
||||
__global__ |
||||
__launch_bounds__(256,3) |
||||
void lbry_sha256d_gpu_hash_final(const uint32_t threads, const uint32_t startNonce, uint64_t *Hash512, uint32_t *resNonces) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint32_t __align__(8) buf[8]; // align for vectorize |
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i]; |
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]); |
||||
|
||||
uint32_t __align__(8) dat[16]; |
||||
#pragma unroll |
||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]); |
||||
#pragma unroll |
||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]); |
||||
dat[10] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=11;i<15;i++) dat[i] = 0; |
||||
dat[15] = 0x140; |
||||
|
||||
sha256_round_body(dat, buf, c_K); |
||||
|
||||
// second sha256 |
||||
|
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) dat[i] = buf[i]; |
||||
dat[8] = 0x80000000; |
||||
#pragma unroll |
||||
for (int i=9;i<15;i++) dat[i] = 0; |
||||
dat[15] = 0x100; |
||||
|
||||
#pragma unroll |
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i]; |
||||
|
||||
sha256_round_body(dat, buf, c_K); |
||||
|
||||
// valid nonces |
||||
uint64_t high = cuda_swab32ll(((uint64_t*)buf)[3]); |
||||
if (high <= d_target[0]) { |
||||
// printf("%08x %08x - %016llx %016llx - %08x %08x\n", buf[7], buf[6], high, d_target[0], c_target[1], c_target[0]); |
||||
uint32_t nonce = startNonce + thread; |
||||
resNonces[1] = atomicExch(resNonces, nonce); |
||||
d_target[0] = high; |
||||
} |
||||
} |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid(threads/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
CUDA_SAFE_CALL(cudaMemset(d_resNonces, 0xFF, 2 * sizeof(uint32_t))); |
||||
cudaThreadSynchronize(); |
||||
|
||||
lbry_sha256d_gpu_hash_final <<<grid, block, 0, stream>>> (threads, startNonce, (uint64_t*) d_inputHash, d_resNonces); |
||||
|
||||
cudaThreadSynchronize(); |
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpy(resNonces, d_resNonces, 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost)); |
||||
if (resNonces[0] == resNonces[1]) { |
||||
resNonces[1] = UINT32_MAX; |
||||
} |
||||
} |
@ -0,0 +1,181 @@
@@ -0,0 +1,181 @@
|
||||
/** |
||||
* sha-512 CUDA implementation. |
||||
*/ |
||||
|
||||
#include <stdio.h> |
||||
#include <stdint.h> |
||||
#include <memory.h> |
||||
|
||||
#include <cuda_helper.h> |
||||
|
||||
static __constant__ uint64_t K_512[80]; |
||||
|
||||
static const uint64_t K512[80] = { |
||||
0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, |
||||
0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, |
||||
0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, |
||||
0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694, |
||||
0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, |
||||
0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, |
||||
0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, |
||||
0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70, |
||||
0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, |
||||
0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B, |
||||
0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30, |
||||
0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8, |
||||
0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, |
||||
0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, |
||||
0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC, |
||||
0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, |
||||
0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, |
||||
0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B, |
||||
0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, |
||||
0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817 |
||||
}; |
||||
|
||||
//#undef xor3 |
||||
//#define xor3(a,b,c) (a^b^c) |
||||
|
||||
//#undef |
||||
|
||||
static __device__ __forceinline__ |
||||
uint64_t bsg5_0(const uint64_t x) |
||||
{ |
||||
uint64_t r1 = ROTR64(x,28); |
||||
uint64_t r2 = ROTR64(x,34); |
||||
uint64_t r3 = ROTR64(x,39); |
||||
return xor3(r1,r2,r3); |
||||
} |
||||
|
||||
static __device__ __forceinline__ |
||||
uint64_t bsg5_1(const uint64_t x) |
||||
{ |
||||
uint64_t r1 = ROTR64(x,14); |
||||
uint64_t r2 = ROTR64(x,18); |
||||
uint64_t r3 = ROTR64(x,41); |
||||
return xor3(r1,r2,r3); |
||||
} |
||||
|
||||
static __device__ __forceinline__ |
||||
uint64_t ssg5_0(const uint64_t x) |
||||
{ |
||||
uint64_t r1 = ROTR64(x,1); |
||||
uint64_t r2 = ROTR64(x,8); |
||||
uint64_t r3 = shr_t64(x,7); |
||||
return xor3(r1,r2,r3); |
||||
} |
||||
|
||||
static __device__ __forceinline__ |
||||
uint64_t ssg5_1(const uint64_t x) |
||||
{ |
||||
uint64_t r1 = ROTR64(x,19); |
||||
uint64_t r2 = ROTR64(x,61); |
||||
uint64_t r3 = shr_t64(x,6); |
||||
return xor3(r1,r2,r3); |
||||
} |
||||
|
||||
static __device__ __forceinline__ |
||||
uint64_t xandx64(const uint64_t a, const uint64_t b, const uint64_t c) |
||||
{ |
||||
uint64_t result; |
||||
asm("{ .reg .u64 m,n; // xandx64\n\t" |
||||
"xor.b64 m, %2,%3;\n\t" |
||||
"and.b64 n, m,%1;\n\t" |
||||
"xor.b64 %0, n,%3;\n\t" |
||||
"}" : "=l"(result) : "l"(a), "l"(b), "l"(c)); |
||||
return result; |
||||
} |
||||
|
||||
static __device__ __forceinline__ |
||||
void sha512_step2(uint64_t* r, uint64_t* W, uint64_t* K, const int ord, int i) |
||||
{ |
||||
int u = 8-ord; |
||||
uint64_t a = r[(0+u) & 7]; |
||||
uint64_t b = r[(1+u) & 7]; |
||||
uint64_t c = r[(2+u) & 7]; |
||||
uint64_t d = r[(3+u) & 7]; |
||||
uint64_t e = r[(4+u) & 7]; |
||||
uint64_t f = r[(5+u) & 7]; |
||||
uint64_t g = r[(6+u) & 7]; |
||||
uint64_t h = r[(7+u) & 7]; |
||||
|
||||
uint64_t T1 = h + bsg5_1(e) + xandx64(e,f,g) + W[i] + K[i]; |
||||
uint64_t T2 = bsg5_0(a) + andor(a,b,c); |
||||
r[(3+u)& 7] = d + T1; |
||||
r[(7+u)& 7] = T1 + T2; |
||||
} |
||||
|
||||
/**************************************************************************************************/ |
||||
|
||||
__global__ |
||||
void lbry_sha512_gpu_hash_32(const uint32_t threads, uint64_t *g_hash) |
||||
{ |
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
uint64_t *pHash = &g_hash[thread * 8U]; |
||||
|
||||
uint64_t W[80]; |
||||
uint64_t r[8]; |
||||
|
||||
uint64_t IV512[8] = { |
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, |
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 |
||||
}; |
||||
|
||||
#pragma unroll |
||||
for (int i = 0; i < 8; i++) |
||||
r[i] = IV512[i]; |
||||
|
||||
#pragma unroll |
||||
for (int i = 0; i < 4; i++) { |
||||
// 32 bytes input |
||||
W[i] = cuda_swab64(pHash[i]); |
||||
} |
||||
|
||||
W[4] = 0x8000000000000000; // end tag |
||||
|
||||
#pragma unroll |
||||
for (int i = 5; i < 15; i++) W[i] = 0; |
||||
|
||||
W[15] = 0x100; // 256 bits |
||||
|
||||
#pragma unroll |
||||
for (int i = 16; i < 80; i++) W[i] = 0; |
||||
|
||||
#pragma unroll 64 |
||||
for (int i = 16; i < 80; i++) |
||||
W[i] = ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]; |
||||
|
||||
#pragma unroll 10 |
||||
for (int i = 0; i < 10; i++) { |
||||
#pragma unroll 8 |
||||
for (int ord=0; ord<8; ord++) |
||||
sha512_step2(r, W, K_512, ord, 8*i + ord); |
||||
} |
||||
|
||||
#pragma unroll 8 |
||||
for (int i = 0; i < 8; i++) |
||||
pHash[i] = cuda_swab64(r[i] + IV512[i]); |
||||
} |
||||
} |
||||
|
||||
__host__ |
||||
void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream) |
||||
{ |
||||
const int threadsperblock = 256; |
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
size_t shared_size = 80*8; |
||||
lbry_sha512_gpu_hash_32 <<<grid, block, shared_size, stream>>> (threads, (uint64_t*)d_hash); |
||||
} |
||||
|
||||
/**************************************************************************************************/ |
||||
|
||||
__host__ |
||||
void lbry_sha512_init(int thr_id) |
||||
{ |
||||
cudaMemcpyToSymbol(K_512, K512, 80*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); |
||||
} |
@ -0,0 +1,225 @@
@@ -0,0 +1,225 @@
|
||||
/** |
||||
* Lbry CUDA Implementation |
||||
* |
||||
* by tpruvot@github - July 2016 |
||||
* |
||||
*/ |
||||
|
||||
#include <string.h> |
||||
#include <stdint.h> |
||||
|
||||
extern "C" { |
||||
#include <sph/sph_sha2.h> |
||||
#include <sph/sph_ripemd.h> |
||||
} |
||||
|
||||
#include <cuda_helper.h> |
||||
#include <miner.h> |
||||
|
||||
#define A 64 |
||||
#define debug_cpu 0 |
||||
|
||||
extern "C" void lbry_hash(void* output, const void* input) |
||||
{ |
||||
uint32_t _ALIGN(A) hashA[16]; |
||||
uint32_t _ALIGN(A) hashB[8]; |
||||
uint32_t _ALIGN(A) hashC[8]; |
||||
|
||||
sph_sha256_context ctx_sha256; |
||||
sph_sha512_context ctx_sha512; |
||||
sph_ripemd160_context ctx_ripemd; |
||||
|
||||
sph_sha256_init(&ctx_sha256); |
||||
sph_sha256(&ctx_sha256, input, 112); |
||||
sph_sha256_close(&ctx_sha256, hashA); |
||||
|
||||
sph_sha256(&ctx_sha256, hashA, 32); |
||||
sph_sha256_close(&ctx_sha256, hashA); |
||||
|
||||
sph_sha512_init(&ctx_sha512); |
||||
sph_sha512(&ctx_sha512, hashA, 32); |
||||
sph_sha512_close(&ctx_sha512, hashA); |
||||
|
||||
sph_ripemd160_init(&ctx_ripemd); |
||||
sph_ripemd160(&ctx_ripemd, hashA, 32); // sha512 low |
||||
sph_ripemd160_close(&ctx_ripemd, hashB); |
||||
if (debug_cpu) applog_hex(hashB, 20); |
||||
|
||||
sph_ripemd160(&ctx_ripemd, &hashA[8], 32); // sha512 high |
||||
sph_ripemd160_close(&ctx_ripemd, hashC); |
||||
if (debug_cpu) applog_hex(hashC, 20); |
||||
|
||||
sph_sha256(&ctx_sha256, hashB, 20); |
||||
sph_sha256(&ctx_sha256, hashC, 20); |
||||
sph_sha256_close(&ctx_sha256, hashA); |
||||
if (debug_cpu) applog_hex(hashA,32); |
||||
|
||||
sph_sha256(&ctx_sha256, hashA, 32); |
||||
sph_sha256_close(&ctx_sha256, hashA); |
||||
|
||||
memcpy(output, hashA, 32); |
||||
} |
||||
|
||||
/* ############################################################################################################################### */ |
||||
|
||||
extern void lbry_ripemd160_init(int thr_id); |
||||
extern void lbry_sha256_init(int thr_id); |
||||
extern void lbry_sha256_free(int thr_id); |
||||
extern void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget); |
||||
extern void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream); |
||||
extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream); |
||||
extern void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream); |
||||
extern void lbry_sha512_init(int thr_id); |
||||
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream); |
||||
extern void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream); |
||||
extern void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream); |
||||
extern void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream); |
||||
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream); |
||||
|
||||
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) { |
||||
return iftrue ? swab32(val) : val; |
||||
} |
||||
|
||||
static bool init[MAX_GPUS] = { 0 }; |
||||
|
||||
static uint32_t *d_hash[MAX_GPUS]; |
||||
|
||||
// nonce position is different |
||||
#define LBC_NONCE_OFT32 27 |
||||
|
||||
extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done) |
||||
{ |
||||
uint32_t _ALIGN(A) vhash[8]; |
||||
uint32_t _ALIGN(A) endiandata[28]; |
||||
uint32_t *pdata = work->data; |
||||
uint32_t *ptarget = work->target; |
||||
|
||||
const uint32_t first_nonce = pdata[LBC_NONCE_OFT32]; |
||||
const int swap = 0; // to toggle nonce endian |
||||
|
||||
const int dev_id = device_map[thr_id]; |
||||
int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 22 : 20; |
||||
if (device_sm[dev_id] >= 600) intensity = 23; |
||||
if (device_sm[dev_id] < 350) intensity = 18; |
||||
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); |
||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); |
||||
|
||||
if (opt_benchmark) { |
||||
ptarget[7] = 0xff; |
||||
} |
||||
|
||||
if (!init[thr_id]){ |
||||
cudaSetDevice(dev_id); |
||||
if (opt_cudaschedule == -1 && gpu_threads == 1) { |
||||
cudaDeviceReset(); |
||||
// reduce cpu usage (linux) |
||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); |
||||
CUDA_LOG_ERROR(); |
||||
} |
||||
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); |
||||
|
||||
lbry_sha256_init(thr_id); |
||||
lbry_sha512_init(thr_id); |
||||
lbry_ripemd160_init(thr_id); |
||||
cuda_check_cpu_init(thr_id, throughput); |
||||
CUDA_LOG_ERROR(); |
||||
|
||||
init[thr_id] = true; |
||||
} |
||||
|
||||
for (int i=0; i < LBC_NONCE_OFT32; i++) { |
||||
be32enc(&endiandata[i], pdata[i]); |
||||
} |
||||
|
||||
lbry_sha256_setBlock_112(endiandata, ptarget); |
||||
cuda_check_cpu_setTarget(ptarget); |
||||
|
||||
do { |
||||
|
||||
// Hash with CUDA |
||||
#if 0 |
||||
lbry_sha256_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], swap, 0); |
||||
lbry_sha256_hash_32(thr_id, throughput, d_hash[thr_id], 0); |
||||
#else |
||||
lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], swap, 0); |
||||
#endif |
||||
CUDA_LOG_ERROR(); |
||||
|
||||
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id], 0); |
||||
|
||||
lbry_ripemd160_hash_32x2(thr_id, throughput, d_hash[thr_id], 0); |
||||
|
||||
#if 0 |
||||
lbry_sha256d_hash_20x2(thr_id, throughput, d_hash[thr_id], 0); |
||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]); |
||||
#else |
||||
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX }; |
||||
lbry_sha256d_hash_final(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], resNonces, 0); |
||||
uint32_t foundNonce = resNonces[0]; |
||||
#endif |
||||
|
||||
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput; |
||||
|
||||
if (foundNonce != UINT32_MAX) |
||||
{ |
||||
endiandata[LBC_NONCE_OFT32] = swab32_if(foundNonce, !swap); |
||||
lbry_hash(vhash, endiandata); |
||||
|
||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { |
||||
int res = 1; |
||||
uint32_t secNonce = resNonces[1]; |
||||
//uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], 1); |
||||
work->nonces[0] = swab32_if(foundNonce, swap); |
||||
work_set_target_ratio(work, vhash); |
||||
if (secNonce != UINT32_MAX) { |
||||
//if (secNonce) { |
||||
if (opt_debug) |
||||
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", swab32(secNonce)); |
||||
endiandata[LBC_NONCE_OFT32] = swab32_if(secNonce, !swap); |
||||
lbry_hash(vhash, endiandata); |
||||
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio) { |
||||
work_set_target_ratio(work, vhash); |
||||
xchg(work->nonces[0], work->nonces[1]); |
||||
} |
||||
work->nonces[1] = swab32_if(secNonce, swap); |
||||
res++; |
||||
} |
||||
pdata[LBC_NONCE_OFT32] = work->nonces[0]; |
||||
return res; |
||||
} else { |
||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU %08x > %08x!", foundNonce, vhash[7], ptarget[7]); |
||||
} |
||||
} |
||||
|
||||
if ((uint64_t) throughput + pdata[LBC_NONCE_OFT32] >= max_nonce) { |
||||
pdata[LBC_NONCE_OFT32] = max_nonce; |
||||
break; |
||||
} |
||||
|
||||
pdata[LBC_NONCE_OFT32] += throughput; |
||||
|
||||
} while (!work_restart[thr_id].restart); |
||||
|
||||
//*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce; |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
// cleanup |
||||
void free_lbry(int thr_id) |
||||
{ |
||||
if (!init[thr_id]) |
||||
return; |
||||
|
||||
cudaThreadSynchronize(); |
||||
|
||||
cudaFree(d_hash[thr_id]); |
||||
lbry_sha256_free(thr_id); |
||||
|
||||
cuda_check_cpu_free(thr_id); |
||||
init[thr_id] = false; |
||||
|
||||
cudaDeviceSynchronize(); |
||||
} |
@ -0,0 +1,833 @@
@@ -0,0 +1,833 @@
|
||||
/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */ |
||||
/*
|
||||
* RIPEMD-160 implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <string.h> |
||||
|
||||
#include "sph_ripemd.h" |
||||
|
||||
/*
|
||||
* Round functions for RIPEMD (original). |
||||
*/ |
||||
#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) |
||||
#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) |
||||
#define H(x, y, z) ((x) ^ (y) ^ (z)) |
||||
|
||||
static const sph_u32 oIV[5] = { |
||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), |
||||
SPH_C32(0x98BADCFE), SPH_C32(0x10325476) |
||||
}; |
||||
|
||||
/*
|
||||
* Round functions for RIPEMD-128 and RIPEMD-160. |
||||
*/ |
||||
#define F1(x, y, z) ((x) ^ (y) ^ (z)) |
||||
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) |
||||
#define F3(x, y, z) (((x) | ~(y)) ^ (z)) |
||||
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) |
||||
#define F5(x, y, z) ((x) ^ ((y) | ~(z))) |
||||
|
||||
static const sph_u32 IV[5] = { |
||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), |
||||
SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) |
||||
}; |
||||
|
||||
#define ROTL SPH_ROTL32 |
||||
|
||||
/* ===================================================================== */ |
||||
/*
|
||||
* RIPEMD (original hash, deprecated). |
||||
*/ |
||||
|
||||
#define FF1(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define GG1(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + G(B, C, D) \ |
||||
+ (X) + SPH_C32(0x5A827999)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define HH1(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ |
||||
+ (X) + SPH_C32(0x6ED9EBA1)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define FF2(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + F(B, C, D) \ |
||||
+ (X) + SPH_C32(0x50A28BE6)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define GG2(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define HH2(A, B, C, D, X, s) do { \ |
||||
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ |
||||
+ (X) + SPH_C32(0x5C4DD124)); \ |
||||
(A) = ROTL(tmp, (s)); \ |
||||
} while (0) |
||||
|
||||
#define RIPEMD_ROUND_BODY(in, h) do { \ |
||||
sph_u32 A1, B1, C1, D1; \ |
||||
sph_u32 A2, B2, C2, D2; \ |
||||
sph_u32 tmp; \ |
||||
\ |
||||
A1 = A2 = (h)[0]; \ |
||||
B1 = B2 = (h)[1]; \ |
||||
C1 = C2 = (h)[2]; \ |
||||
D1 = D2 = (h)[3]; \ |
||||
\ |
||||
FF1(A1, B1, C1, D1, in( 0), 11); \ |
||||
FF1(D1, A1, B1, C1, in( 1), 14); \ |
||||
FF1(C1, D1, A1, B1, in( 2), 15); \ |
||||
FF1(B1, C1, D1, A1, in( 3), 12); \ |
||||
FF1(A1, B1, C1, D1, in( 4), 5); \ |
||||
FF1(D1, A1, B1, C1, in( 5), 8); \ |
||||
FF1(C1, D1, A1, B1, in( 6), 7); \ |
||||
FF1(B1, C1, D1, A1, in( 7), 9); \ |
||||
FF1(A1, B1, C1, D1, in( 8), 11); \ |
||||
FF1(D1, A1, B1, C1, in( 9), 13); \ |
||||
FF1(C1, D1, A1, B1, in(10), 14); \ |
||||
FF1(B1, C1, D1, A1, in(11), 15); \ |
||||
FF1(A1, B1, C1, D1, in(12), 6); \ |
||||
FF1(D1, A1, B1, C1, in(13), 7); \ |
||||
FF1(C1, D1, A1, B1, in(14), 9); \ |
||||
FF1(B1, C1, D1, A1, in(15), 8); \ |
||||
\ |
||||
GG1(A1, B1, C1, D1, in( 7), 7); \ |
||||
GG1(D1, A1, B1, C1, in( 4), 6); \ |
||||
GG1(C1, D1, A1, B1, in(13), 8); \ |
||||
GG1(B1, C1, D1, A1, in( 1), 13); \ |
||||
GG1(A1, B1, C1, D1, in(10), 11); \ |
||||
GG1(D1, A1, B1, C1, in( 6), 9); \ |
||||
GG1(C1, D1, A1, B1, in(15), 7); \ |
||||
GG1(B1, C1, D1, A1, in( 3), 15); \ |
||||
GG1(A1, B1, C1, D1, in(12), 7); \ |
||||
GG1(D1, A1, B1, C1, in( 0), 12); \ |
||||
GG1(C1, D1, A1, B1, in( 9), 15); \ |
||||
GG1(B1, C1, D1, A1, in( 5), 9); \ |
||||
GG1(A1, B1, C1, D1, in(14), 7); \ |
||||
GG1(D1, A1, B1, C1, in( 2), 11); \ |
||||
GG1(C1, D1, A1, B1, in(11), 13); \ |
||||
GG1(B1, C1, D1, A1, in( 8), 12); \ |
||||
\ |
||||
HH1(A1, B1, C1, D1, in( 3), 11); \ |
||||
HH1(D1, A1, B1, C1, in(10), 13); \ |
||||
HH1(C1, D1, A1, B1, in( 2), 14); \ |
||||
HH1(B1, C1, D1, A1, in( 4), 7); \ |
||||
HH1(A1, B1, C1, D1, in( 9), 14); \ |
||||
HH1(D1, A1, B1, C1, in(15), 9); \ |
||||
HH1(C1, D1, A1, B1, in( 8), 13); \ |
||||
HH1(B1, C1, D1, A1, in( 1), 15); \ |
||||
HH1(A1, B1, C1, D1, in(14), 6); \ |
||||
HH1(D1, A1, B1, C1, in( 7), 8); \ |
||||
HH1(C1, D1, A1, B1, in( 0), 13); \ |
||||
HH1(B1, C1, D1, A1, in( 6), 6); \ |
||||
HH1(A1, B1, C1, D1, in(11), 12); \ |
||||
HH1(D1, A1, B1, C1, in(13), 5); \ |
||||
HH1(C1, D1, A1, B1, in( 5), 7); \ |
||||
HH1(B1, C1, D1, A1, in(12), 5); \ |
||||
\ |
||||
FF2(A2, B2, C2, D2, in( 0), 11); \ |
||||
FF2(D2, A2, B2, C2, in( 1), 14); \ |
||||
FF2(C2, D2, A2, B2, in( 2), 15); \ |
||||
FF2(B2, C2, D2, A2, in( 3), 12); \ |
||||
FF2(A2, B2, C2, D2, in( 4), 5); \ |
||||
FF2(D2, A2, B2, C2, in( 5), 8); \ |
||||
FF2(C2, D2, A2, B2, in( 6), 7); \ |
||||
FF2(B2, C2, D2, A2, in( 7), 9); \ |
||||
FF2(A2, B2, C2, D2, in( 8), 11); \ |
||||
FF2(D2, A2, B2, C2, in( 9), 13); \ |
||||
FF2(C2, D2, A2, B2, in(10), 14); \ |
||||
FF2(B2, C2, D2, A2, in(11), 15); \ |
||||
FF2(A2, B2, C2, D2, in(12), 6); \ |
||||
FF2(D2, A2, B2, C2, in(13), 7); \ |
||||
FF2(C2, D2, A2, B2, in(14), 9); \ |
||||
FF2(B2, C2, D2, A2, in(15), 8); \ |
||||
\ |
||||
GG2(A2, B2, C2, D2, in( 7), 7); \ |
||||
GG2(D2, A2, B2, C2, in( 4), 6); \ |
||||
GG2(C2, D2, A2, B2, in(13), 8); \ |
||||
GG2(B2, C2, D2, A2, in( 1), 13); \ |
||||
GG2(A2, B2, C2, D2, in(10), 11); \ |
||||
GG2(D2, A2, B2, C2, in( 6), 9); \ |
||||
GG2(C2, D2, A2, B2, in(15), 7); \ |
||||
GG2(B2, C2, D2, A2, in( 3), 15); \ |
||||
GG2(A2, B2, C2, D2, in(12), 7); \ |
||||
GG2(D2, A2, B2, C2, in( 0), 12); \ |
||||
GG2(C2, D2, A2, B2, in( 9), 15); \ |
||||
GG2(B2, C2, D2, A2, in( 5), 9); \ |
||||
GG2(A2, B2, C2, D2, in(14), 7); \ |
||||
GG2(D2, A2, B2, C2, in( 2), 11); \ |
||||
GG2(C2, D2, A2, B2, in(11), 13); \ |
||||
GG2(B2, C2, D2, A2, in( 8), 12); \ |
||||
\ |
||||
HH2(A2, B2, C2, D2, in( 3), 11); \ |
||||
HH2(D2, A2, B2, C2, in(10), 13); \ |
||||
HH2(C2, D2, A2, B2, in( 2), 14); \ |
||||
HH2(B2, C2, D2, A2, in( 4), 7); \ |
||||
HH2(A2, B2, C2, D2, in( 9), 14); \ |
||||
HH2(D2, A2, B2, C2, in(15), 9); \ |
||||
HH2(C2, D2, A2, B2, in( 8), 13); \ |
||||
HH2(B2, C2, D2, A2, in( 1), 15); \ |
||||
HH2(A2, B2, C2, D2, in(14), 6); \ |
||||
HH2(D2, A2, B2, C2, in( 7), 8); \ |
||||
HH2(C2, D2, A2, B2, in( 0), 13); \ |
||||
HH2(B2, C2, D2, A2, in( 6), 6); \ |
||||
HH2(A2, B2, C2, D2, in(11), 12); \ |
||||
HH2(D2, A2, B2, C2, in(13), 5); \ |
||||
HH2(C2, D2, A2, B2, in( 5), 7); \ |
||||
HH2(B2, C2, D2, A2, in(12), 5); \ |
||||
\ |
||||
tmp = SPH_T32((h)[1] + C1 + D2); \ |
||||
(h)[1] = SPH_T32((h)[2] + D1 + A2); \ |
||||
(h)[2] = SPH_T32((h)[3] + A1 + B2); \ |
||||
(h)[3] = SPH_T32((h)[0] + B1 + C2); \ |
||||
(h)[0] = tmp; \ |
||||
} while (0) |
||||
|
||||
/*
|
||||
* One round of RIPEMD. The data must be aligned for 32-bit access. |
||||
*/ |
||||
static void |
||||
ripemd_round(const unsigned char *data, sph_u32 r[5]) |
||||
{ |
||||
#if SPH_LITTLE_FAST |
||||
|
||||
#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x))) |
||||
|
||||
#else |
||||
|
||||
sph_u32 X_var[16]; |
||||
int i; |
||||
|
||||
for (i = 0; i < 16; i ++) |
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i); |
||||
#define RIPEMD_IN(x) X_var[x] |
||||
|
||||
#endif |
||||
RIPEMD_ROUND_BODY(RIPEMD_IN, r); |
||||
#undef RIPEMD_IN |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd_init(void *cc) |
||||
{ |
||||
sph_ripemd_context *sc; |
||||
|
||||
sc = cc; |
||||
memcpy(sc->val, oIV, sizeof sc->val); |
||||
#if SPH_64 |
||||
sc->count = 0; |
||||
#else |
||||
sc->count_high = sc->count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
#define RFUN ripemd_round |
||||
#define HASH ripemd |
||||
#define LE32 1 |
||||
#include "md_helper.c" |
||||
#undef RFUN |
||||
#undef HASH |
||||
#undef LE32 |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd_close(void *cc, void *dst) |
||||
{ |
||||
ripemd_close(cc, dst, 4); |
||||
sph_ripemd_init(cc); |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) |
||||
{ |
||||
#define RIPEMD_IN(x) msg[x] |
||||
RIPEMD_ROUND_BODY(RIPEMD_IN, val); |
||||
#undef RIPEMD_IN |
||||
} |
||||
|
||||
/* ===================================================================== */ |
||||
/*
|
||||
* RIPEMD-128. |
||||
*/ |
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-128. |
||||
*/ |
||||
#define sK11 SPH_C32(0x00000000) |
||||
#define sK12 SPH_C32(0x5A827999) |
||||
#define sK13 SPH_C32(0x6ED9EBA1) |
||||
#define sK14 SPH_C32(0x8F1BBCDC) |
||||
|
||||
#define sK21 SPH_C32(0x50A28BE6) |
||||
#define sK22 SPH_C32(0x5C4DD124) |
||||
#define sK23 SPH_C32(0x6D703EF3) |
||||
#define sK24 SPH_C32(0x00000000) |
||||
|
||||
#define sRR(a, b, c, d, f, s, r, k) do { \ |
||||
a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \ |
||||
} while (0) |
||||
|
||||
#define sROUND1(a, b, c, d, f, s, r, k) \ |
||||
sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k) |
||||
|
||||
#define sROUND2(a, b, c, d, f, s, r, k) \ |
||||
sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k) |
||||
|
||||
/*
|
||||
* This macro defines the body for a RIPEMD-128 compression function |
||||
* implementation. The "in" parameter should evaluate, when applied to a |
||||
* numerical input parameter from 0 to 15, to an expression which yields |
||||
* the corresponding input block. The "h" parameter should evaluate to |
||||
* an array or pointer expression designating the array of 4 words which |
||||
* contains the input and output of the compression function. |
||||
*/ |
||||
|
||||
#define RIPEMD128_ROUND_BODY(in, h) do { \ |
||||
sph_u32 A1, B1, C1, D1; \ |
||||
sph_u32 A2, B2, C2, D2; \ |
||||
sph_u32 tmp; \ |
||||
\ |
||||
A1 = A2 = (h)[0]; \ |
||||
B1 = B2 = (h)[1]; \ |
||||
C1 = C2 = (h)[2]; \ |
||||
D1 = D2 = (h)[3]; \ |
||||
\ |
||||
sROUND1(A, B, C, D, F1, 11, in( 0), 1); \ |
||||
sROUND1(D, A, B, C, F1, 14, in( 1), 1); \ |
||||
sROUND1(C, D, A, B, F1, 15, in( 2), 1); \ |
||||
sROUND1(B, C, D, A, F1, 12, in( 3), 1); \ |
||||
sROUND1(A, B, C, D, F1, 5, in( 4), 1); \ |
||||
sROUND1(D, A, B, C, F1, 8, in( 5), 1); \ |
||||
sROUND1(C, D, A, B, F1, 7, in( 6), 1); \ |
||||
sROUND1(B, C, D, A, F1, 9, in( 7), 1); \ |
||||
sROUND1(A, B, C, D, F1, 11, in( 8), 1); \ |
||||
sROUND1(D, A, B, C, F1, 13, in( 9), 1); \ |
||||
sROUND1(C, D, A, B, F1, 14, in(10), 1); \ |
||||
sROUND1(B, C, D, A, F1, 15, in(11), 1); \ |
||||
sROUND1(A, B, C, D, F1, 6, in(12), 1); \ |
||||
sROUND1(D, A, B, C, F1, 7, in(13), 1); \ |
||||
sROUND1(C, D, A, B, F1, 9, in(14), 1); \ |
||||
sROUND1(B, C, D, A, F1, 8, in(15), 1); \ |
||||
\ |
||||
sROUND1(A, B, C, D, F2, 7, in( 7), 2); \ |
||||
sROUND1(D, A, B, C, F2, 6, in( 4), 2); \ |
||||
sROUND1(C, D, A, B, F2, 8, in(13), 2); \ |
||||
sROUND1(B, C, D, A, F2, 13, in( 1), 2); \ |
||||
sROUND1(A, B, C, D, F2, 11, in(10), 2); \ |
||||
sROUND1(D, A, B, C, F2, 9, in( 6), 2); \ |
||||
sROUND1(C, D, A, B, F2, 7, in(15), 2); \ |
||||
sROUND1(B, C, D, A, F2, 15, in( 3), 2); \ |
||||
sROUND1(A, B, C, D, F2, 7, in(12), 2); \ |
||||
sROUND1(D, A, B, C, F2, 12, in( 0), 2); \ |
||||
sROUND1(C, D, A, B, F2, 15, in( 9), 2); \ |
||||
sROUND1(B, C, D, A, F2, 9, in( 5), 2); \ |
||||
sROUND1(A, B, C, D, F2, 11, in( 2), 2); \ |
||||
sROUND1(D, A, B, C, F2, 7, in(14), 2); \ |
||||
sROUND1(C, D, A, B, F2, 13, in(11), 2); \ |
||||
sROUND1(B, C, D, A, F2, 12, in( 8), 2); \ |
||||
\ |
||||
sROUND1(A, B, C, D, F3, 11, in( 3), 3); \ |
||||
sROUND1(D, A, B, C, F3, 13, in(10), 3); \ |
||||
sROUND1(C, D, A, B, F3, 6, in(14), 3); \ |
||||
sROUND1(B, C, D, A, F3, 7, in( 4), 3); \ |
||||
sROUND1(A, B, C, D, F3, 14, in( 9), 3); \ |
||||
sROUND1(D, A, B, C, F3, 9, in(15), 3); \ |
||||
sROUND1(C, D, A, B, F3, 13, in( 8), 3); \ |
||||
sROUND1(B, C, D, A, F3, 15, in( 1), 3); \ |
||||
sROUND1(A, B, C, D, F3, 14, in( 2), 3); \ |
||||
sROUND1(D, A, B, C, F3, 8, in( 7), 3); \ |
||||
sROUND1(C, D, A, B, F3, 13, in( 0), 3); \ |
||||
sROUND1(B, C, D, A, F3, 6, in( 6), 3); \ |
||||
sROUND1(A, B, C, D, F3, 5, in(13), 3); \ |
||||
sROUND1(D, A, B, C, F3, 12, in(11), 3); \ |
||||
sROUND1(C, D, A, B, F3, 7, in( 5), 3); \ |
||||
sROUND1(B, C, D, A, F3, 5, in(12), 3); \ |
||||
\ |
||||
sROUND1(A, B, C, D, F4, 11, in( 1), 4); \ |
||||
sROUND1(D, A, B, C, F4, 12, in( 9), 4); \ |
||||
sROUND1(C, D, A, B, F4, 14, in(11), 4); \ |
||||
sROUND1(B, C, D, A, F4, 15, in(10), 4); \ |
||||
sROUND1(A, B, C, D, F4, 14, in( 0), 4); \ |
||||
sROUND1(D, A, B, C, F4, 15, in( 8), 4); \ |
||||
sROUND1(C, D, A, B, F4, 9, in(12), 4); \ |
||||
sROUND1(B, C, D, A, F4, 8, in( 4), 4); \ |
||||
sROUND1(A, B, C, D, F4, 9, in(13), 4); \ |
||||
sROUND1(D, A, B, C, F4, 14, in( 3), 4); \ |
||||
sROUND1(C, D, A, B, F4, 5, in( 7), 4); \ |
||||
sROUND1(B, C, D, A, F4, 6, in(15), 4); \ |
||||
sROUND1(A, B, C, D, F4, 8, in(14), 4); \ |
||||
sROUND1(D, A, B, C, F4, 6, in( 5), 4); \ |
||||
sROUND1(C, D, A, B, F4, 5, in( 6), 4); \ |
||||
sROUND1(B, C, D, A, F4, 12, in( 2), 4); \ |
||||
\ |
||||
sROUND2(A, B, C, D, F4, 8, in( 5), 1); \ |
||||
sROUND2(D, A, B, C, F4, 9, in(14), 1); \ |
||||
sROUND2(C, D, A, B, F4, 9, in( 7), 1); \ |
||||
sROUND2(B, C, D, A, F4, 11, in( 0), 1); \ |
||||
sROUND2(A, B, C, D, F4, 13, in( 9), 1); \ |
||||
sROUND2(D, A, B, C, F4, 15, in( 2), 1); \ |
||||
sROUND2(C, D, A, B, F4, 15, in(11), 1); \ |
||||
sROUND2(B, C, D, A, F4, 5, in( 4), 1); \ |
||||
sROUND2(A, B, C, D, F4, 7, in(13), 1); \ |
||||
sROUND2(D, A, B, C, F4, 7, in( 6), 1); \ |
||||
sROUND2(C, D, A, B, F4, 8, in(15), 1); \ |
||||
sROUND2(B, C, D, A, F4, 11, in( 8), 1); \ |
||||
sROUND2(A, B, C, D, F4, 14, in( 1), 1); \ |
||||
sROUND2(D, A, B, C, F4, 14, in(10), 1); \ |
||||
sROUND2(C, D, A, B, F4, 12, in( 3), 1); \ |
||||
sROUND2(B, C, D, A, F4, 6, in(12), 1); \ |
||||
\ |
||||
sROUND2(A, B, C, D, F3, 9, in( 6), 2); \ |
||||
sROUND2(D, A, B, C, F3, 13, in(11), 2); \ |
||||
sROUND2(C, D, A, B, F3, 15, in( 3), 2); \ |
||||
sROUND2(B, C, D, A, F3, 7, in( 7), 2); \ |
||||
sROUND2(A, B, C, D, F3, 12, in( 0), 2); \ |
||||
sROUND2(D, A, B, C, F3, 8, in(13), 2); \ |
||||
sROUND2(C, D, A, B, F3, 9, in( 5), 2); \ |
||||
sROUND2(B, C, D, A, F3, 11, in(10), 2); \ |
||||
sROUND2(A, B, C, D, F3, 7, in(14), 2); \ |
||||
sROUND2(D, A, B, C, F3, 7, in(15), 2); \ |
||||
sROUND2(C, D, A, B, F3, 12, in( 8), 2); \ |
||||
sROUND2(B, C, D, A, F3, 7, in(12), 2); \ |
||||
sROUND2(A, B, C, D, F3, 6, in( 4), 2); \ |
||||
sROUND2(D, A, B, C, F3, 15, in( 9), 2); \ |
||||
sROUND2(C, D, A, B, F3, 13, in( 1), 2); \ |
||||
sROUND2(B, C, D, A, F3, 11, in( 2), 2); \ |
||||
\ |
||||
sROUND2(A, B, C, D, F2, 9, in(15), 3); \ |
||||
sROUND2(D, A, B, C, F2, 7, in( 5), 3); \ |
||||
sROUND2(C, D, A, B, F2, 15, in( 1), 3); \ |
||||
sROUND2(B, C, D, A, F2, 11, in( 3), 3); \ |
||||
sROUND2(A, B, C, D, F2, 8, in( 7), 3); \ |
||||
sROUND2(D, A, B, C, F2, 6, in(14), 3); \ |
||||
sROUND2(C, D, A, B, F2, 6, in( 6), 3); \ |
||||
sROUND2(B, C, D, A, F2, 14, in( 9), 3); \ |
||||
sROUND2(A, B, C, D, F2, 12, in(11), 3); \ |
||||
sROUND2(D, A, B, C, F2, 13, in( 8), 3); \ |
||||
sROUND2(C, D, A, B, F2, 5, in(12), 3); \ |
||||
sROUND2(B, C, D, A, F2, 14, in( 2), 3); \ |
||||
sROUND2(A, B, C, D, F2, 13, in(10), 3); \ |
||||
sROUND2(D, A, B, C, F2, 13, in( 0), 3); \ |
||||
sROUND2(C, D, A, B, F2, 7, in( 4), 3); \ |
||||
sROUND2(B, C, D, A, F2, 5, in(13), 3); \ |
||||
\ |
||||
sROUND2(A, B, C, D, F1, 15, in( 8), 4); \ |
||||
sROUND2(D, A, B, C, F1, 5, in( 6), 4); \ |
||||
sROUND2(C, D, A, B, F1, 8, in( 4), 4); \ |
||||
sROUND2(B, C, D, A, F1, 11, in( 1), 4); \ |
||||
sROUND2(A, B, C, D, F1, 14, in( 3), 4); \ |
||||
sROUND2(D, A, B, C, F1, 14, in(11), 4); \ |
||||
sROUND2(C, D, A, B, F1, 6, in(15), 4); \ |
||||
sROUND2(B, C, D, A, F1, 14, in( 0), 4); \ |
||||
sROUND2(A, B, C, D, F1, 6, in( 5), 4); \ |
||||
sROUND2(D, A, B, C, F1, 9, in(12), 4); \ |
||||
sROUND2(C, D, A, B, F1, 12, in( 2), 4); \ |
||||
sROUND2(B, C, D, A, F1, 9, in(13), 4); \ |
||||
sROUND2(A, B, C, D, F1, 12, in( 9), 4); \ |
||||
sROUND2(D, A, B, C, F1, 5, in( 7), 4); \ |
||||
sROUND2(C, D, A, B, F1, 15, in(10), 4); \ |
||||
sROUND2(B, C, D, A, F1, 8, in(14), 4); \ |
||||
\ |
||||
tmp = SPH_T32((h)[1] + C1 + D2); \ |
||||
(h)[1] = SPH_T32((h)[2] + D1 + A2); \ |
||||
(h)[2] = SPH_T32((h)[3] + A1 + B2); \ |
||||
(h)[3] = SPH_T32((h)[0] + B1 + C2); \ |
||||
(h)[0] = tmp; \ |
||||
} while (0) |
||||
|
||||
/*
|
||||
* One round of RIPEMD-128. The data must be aligned for 32-bit access. |
||||
*/ |
||||
static void |
||||
ripemd128_round(const unsigned char *data, sph_u32 r[5]) |
||||
{ |
||||
#if SPH_LITTLE_FAST |
||||
|
||||
#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x))) |
||||
|
||||
#else |
||||
|
||||
sph_u32 X_var[16]; |
||||
int i; |
||||
|
||||
for (i = 0; i < 16; i ++) |
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i); |
||||
#define RIPEMD128_IN(x) X_var[x] |
||||
|
||||
#endif |
||||
RIPEMD128_ROUND_BODY(RIPEMD128_IN, r); |
||||
#undef RIPEMD128_IN |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd128_init(void *cc) |
||||
{ |
||||
sph_ripemd128_context *sc; |
||||
|
||||
sc = cc; |
||||
memcpy(sc->val, IV, sizeof sc->val); |
||||
#if SPH_64 |
||||
sc->count = 0; |
||||
#else |
||||
sc->count_high = sc->count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
#define RFUN ripemd128_round |
||||
#define HASH ripemd128 |
||||
#define LE32 1 |
||||
#include "md_helper.c" |
||||
#undef RFUN |
||||
#undef HASH |
||||
#undef LE32 |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd128_close(void *cc, void *dst) |
||||
{ |
||||
ripemd128_close(cc, dst, 4); |
||||
sph_ripemd128_init(cc); |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]) |
||||
{ |
||||
#define RIPEMD128_IN(x) msg[x] |
||||
RIPEMD128_ROUND_BODY(RIPEMD128_IN, val); |
||||
#undef RIPEMD128_IN |
||||
} |
||||
|
||||
/* ===================================================================== */ |
||||
/*
|
||||
* RIPEMD-160. |
||||
*/ |
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-160. |
||||
*/ |
||||
#define K11 SPH_C32(0x00000000) |
||||
#define K12 SPH_C32(0x5A827999) |
||||
#define K13 SPH_C32(0x6ED9EBA1) |
||||
#define K14 SPH_C32(0x8F1BBCDC) |
||||
#define K15 SPH_C32(0xA953FD4E) |
||||
|
||||
#define K21 SPH_C32(0x50A28BE6) |
||||
#define K22 SPH_C32(0x5C4DD124) |
||||
#define K23 SPH_C32(0x6D703EF3) |
||||
#define K24 SPH_C32(0x7A6D76E9) |
||||
#define K25 SPH_C32(0x00000000) |
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) do { \ |
||||
a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ |
||||
c = ROTL(c, 10); \ |
||||
} while (0) |
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \ |
||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) |
||||
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \ |
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) |
||||
|
||||
/*
|
||||
* This macro defines the body for a RIPEMD-160 compression function |
||||
* implementation. The "in" parameter should evaluate, when applied to a |
||||
* numerical input parameter from 0 to 15, to an expression which yields |
||||
* the corresponding input block. The "h" parameter should evaluate to |
||||
* an array or pointer expression designating the array of 5 words which |
||||
* contains the input and output of the compression function. |
||||
*/ |
||||
|
||||
#define RIPEMD160_ROUND_BODY(in, h) do { \ |
||||
sph_u32 A1, B1, C1, D1, E1; \ |
||||
sph_u32 A2, B2, C2, D2, E2; \ |
||||
sph_u32 tmp; \ |
||||
\ |
||||
A1 = A2 = (h)[0]; \ |
||||
B1 = B2 = (h)[1]; \ |
||||
C1 = C2 = (h)[2]; \ |
||||
D1 = D2 = (h)[3]; \ |
||||
E1 = E2 = (h)[4]; \ |
||||
\ |
||||
ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \ |
||||
ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \ |
||||
ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \ |
||||
ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \ |
||||
ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \ |
||||
ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \ |
||||
\ |
||||
ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \ |
||||
ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \ |
||||
ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \ |
||||
ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \ |
||||
ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \ |
||||
ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \ |
||||
\ |
||||
ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \ |
||||
ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \ |
||||
ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \ |
||||
ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \ |
||||
ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \ |
||||
ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \ |
||||
\ |
||||
ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \ |
||||
ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \ |
||||
ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \ |
||||
ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \ |
||||
ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \ |
||||
ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \ |
||||
\ |
||||
ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \ |
||||
ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \ |
||||
ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \ |
||||
ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \ |
||||
ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \ |
||||
ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \ |
||||
\ |
||||
ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \ |
||||
ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \ |
||||
ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \ |
||||
ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \ |
||||
ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \ |
||||
ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \ |
||||
\ |
||||
ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \ |
||||
ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \ |
||||
ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \ |
||||
ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \ |
||||
ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \ |
||||
ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \ |
||||
\ |
||||
ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \ |
||||
ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \ |
||||
ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \ |
||||
ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \ |
||||
ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \ |
||||
ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \ |
||||
\ |
||||
ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \ |
||||
ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \ |
||||
ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \ |
||||
ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \ |
||||
ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \ |
||||
ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \ |
||||
\ |
||||
ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \ |
||||
ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \ |
||||
ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \ |
||||
ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \ |
||||
ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \ |
||||
ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \ |
||||
\ |
||||
tmp = SPH_T32((h)[1] + C1 + D2); \ |
||||
(h)[1] = SPH_T32((h)[2] + D1 + E2); \ |
||||
(h)[2] = SPH_T32((h)[3] + E1 + A2); \ |
||||
(h)[3] = SPH_T32((h)[4] + A1 + B2); \ |
||||
(h)[4] = SPH_T32((h)[0] + B1 + C2); \ |
||||
(h)[0] = tmp; \ |
||||
} while (0) |
||||
|
||||
/*
|
||||
* One round of RIPEMD-160. The data must be aligned for 32-bit access. |
||||
*/ |
||||
static void |
||||
ripemd160_round(const unsigned char *data, sph_u32 r[5]) |
||||
{ |
||||
#if SPH_LITTLE_FAST |
||||
|
||||
#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x))) |
||||
|
||||
#else |
||||
|
||||
sph_u32 X_var[16]; |
||||
int i; |
||||
|
||||
for (i = 0; i < 16; i ++) |
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i); |
||||
#define RIPEMD160_IN(x) X_var[x] |
||||
|
||||
#endif |
||||
RIPEMD160_ROUND_BODY(RIPEMD160_IN, r); |
||||
#undef RIPEMD160_IN |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd160_init(void *cc) |
||||
{ |
||||
sph_ripemd160_context *sc; |
||||
|
||||
sc = cc; |
||||
memcpy(sc->val, IV, sizeof sc->val); |
||||
#if SPH_64 |
||||
sc->count = 0; |
||||
#else |
||||
sc->count_high = sc->count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
#define RFUN ripemd160_round |
||||
#define HASH ripemd160 |
||||
#define LE32 1 |
||||
#include "md_helper.c" |
||||
#undef RFUN |
||||
#undef HASH |
||||
#undef LE32 |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd160_close(void *cc, void *dst) |
||||
{ |
||||
ripemd160_close(cc, dst, 5); |
||||
sph_ripemd160_init(cc); |
||||
} |
||||
|
||||
/* see sph_ripemd.h */ |
||||
void |
||||
sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]) |
||||
{ |
||||
#define RIPEMD160_IN(x) msg[x] |
||||
RIPEMD160_ROUND_BODY(RIPEMD160_IN, val); |
||||
#undef RIPEMD160_IN |
||||
} |
@ -0,0 +1,273 @@
@@ -0,0 +1,273 @@
|
||||
/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */ |
||||
/**
|
||||
* RIPEMD, RIPEMD-128 and RIPEMD-160 interface. |
||||
* |
||||
* RIPEMD was first described in: Research and Development in Advanced |
||||
* Communication Technologies in Europe, "RIPE Integrity Primitives: |
||||
* Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE, |
||||
* June 1992. |
||||
* |
||||
* A new, strengthened version, dubbed RIPEMD-160, was published in: H. |
||||
* Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened |
||||
* version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039, |
||||
* Springer (1996), pp. 71--82. |
||||
* |
||||
* This article describes both RIPEMD-160, with a 160-bit output, and a |
||||
* reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128 |
||||
* was meant as a "drop-in" replacement for any hash function with 128-bit |
||||
* output, especially the original RIPEMD. |
||||
* |
||||
* @warning Collisions, and an efficient method to build other collisions, |
||||
* have been published for the original RIPEMD, which is thus considered as |
||||
* cryptographically broken. It is also very rarely encountered, and there |
||||
* seems to exist no free description or implementation of RIPEMD (except |
||||
* the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160 |
||||
* seem as secure as their output length allows. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @file sph_ripemd.h |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#ifndef SPH_RIPEMD_H__ |
||||
#define SPH_RIPEMD_H__ |
||||
|
||||
#include <stddef.h> |
||||
#include "sph_types.h" |
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD. |
||||
*/ |
||||
#define SPH_SIZE_ripemd 128 |
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD-128. |
||||
*/ |
||||
#define SPH_SIZE_ripemd128 128 |
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD-160. |
||||
*/ |
||||
#define SPH_SIZE_ripemd160 160 |
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* a RIPEMD computation has been performed, the context can be reused for |
||||
* another computation. |
||||
* |
||||
* The contents of this structure are private. A running RIPEMD computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
sph_u32 val[4]; |
||||
#if SPH_64 |
||||
sph_u64 count; |
||||
#else |
||||
sph_u32 count_high, count_low; |
||||
#endif |
||||
#endif |
||||
} sph_ripemd_context; |
||||
|
||||
/**
|
||||
* Initialize a RIPEMD context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the RIPEMD context (pointer to |
||||
* a <code>sph_ripemd_context</code>) |
||||
*/ |
||||
void sph_ripemd_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the RIPEMD context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_ripemd(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD computation and output the result into the |
||||
* provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (16 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the RIPEMD context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_ripemd_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Apply the RIPEMD compression function on the provided data. The |
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks, |
||||
* as numerical values (hence after the little-endian decoding). The |
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for |
||||
* the compression function; the output is written in place in this |
||||
* array. |
||||
* |
||||
* @param msg the message block (16 values) |
||||
* @param val the function 128-bit input and output |
||||
*/ |
||||
void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]); |
||||
|
||||
/* ===================================================================== */ |
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD-128 computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* a RIPEMD-128 computation has been performed, the context can be reused for |
||||
* another computation. |
||||
* |
||||
* The contents of this structure are private. A running RIPEMD-128 computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
sph_u32 val[4]; |
||||
#if SPH_64 |
||||
sph_u64 count; |
||||
#else |
||||
sph_u32 count_high, count_low; |
||||
#endif |
||||
#endif |
||||
} sph_ripemd128_context; |
||||
|
||||
/**
|
||||
* Initialize a RIPEMD-128 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the RIPEMD-128 context (pointer to |
||||
* a <code>sph_ripemd128_context</code>) |
||||
*/ |
||||
void sph_ripemd128_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the RIPEMD-128 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_ripemd128(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD-128 computation and output the result into the |
||||
* provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (16 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the RIPEMD-128 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_ripemd128_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Apply the RIPEMD-128 compression function on the provided data. The |
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks, |
||||
* as numerical values (hence after the little-endian decoding). The |
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for |
||||
* the compression function; the output is written in place in this |
||||
* array. |
||||
* |
||||
* @param msg the message block (16 values) |
||||
* @param val the function 128-bit input and output |
||||
*/ |
||||
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]); |
||||
|
||||
/* ===================================================================== */ |
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD-160 computations: it contains the |
||||
* intermediate values and some data from the last entered block. Once |
||||
* a RIPEMD-160 computation has been performed, the context can be reused for |
||||
* another computation. |
||||
* |
||||
* The contents of this structure are private. A running RIPEMD-160 computation |
||||
* can be cloned by copying the context (e.g. with a simple |
||||
* <code>memcpy()</code>). |
||||
*/ |
||||
typedef struct { |
||||
#ifndef DOXYGEN_IGNORE |
||||
unsigned char buf[64]; /* first field, for alignment */ |
||||
sph_u32 val[5]; |
||||
#if SPH_64 |
||||
sph_u64 count; |
||||
#else |
||||
sph_u32 count_high, count_low; |
||||
#endif |
||||
#endif |
||||
} sph_ripemd160_context; |
||||
|
||||
/**
|
||||
* Initialize a RIPEMD-160 context. This process performs no memory allocation. |
||||
* |
||||
* @param cc the RIPEMD-160 context (pointer to |
||||
* a <code>sph_ripemd160_context</code>) |
||||
*/ |
||||
void sph_ripemd160_init(void *cc); |
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero |
||||
* (in which case this function does nothing). |
||||
* |
||||
* @param cc the RIPEMD-160 context |
||||
* @param data the input data |
||||
* @param len the input data length (in bytes) |
||||
*/ |
||||
void sph_ripemd160(void *cc, const void *data, size_t len); |
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD-160 computation and output the result into the |
||||
* provided buffer. The destination buffer must be wide enough to |
||||
* accomodate the result (20 bytes). The context is automatically |
||||
* reinitialized. |
||||
* |
||||
* @param cc the RIPEMD-160 context |
||||
* @param dst the destination buffer |
||||
*/ |
||||
void sph_ripemd160_close(void *cc, void *dst); |
||||
|
||||
/**
|
||||
* Apply the RIPEMD-160 compression function on the provided data. The |
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks, |
||||
* as numerical values (hence after the little-endian decoding). The |
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for |
||||
* the compression function; the output is written in place in this |
||||
* array. |
||||
* |
||||
* @param msg the message block (16 values) |
||||
* @param val the function 160-bit input and output |
||||
*/ |
||||
void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]); |
||||
|
||||
#endif |
@ -0,0 +1,691 @@
@@ -0,0 +1,691 @@
|
||||
/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */ |
||||
/*
|
||||
* SHA-224 / SHA-256 implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com> |
||||
*/ |
||||
|
||||
#include <stddef.h> |
||||
#include <string.h> |
||||
|
||||
#include "sph_sha2.h" |
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2 |
||||
#define SPH_SMALL_FOOTPRINT_SHA2 1 |
||||
#endif |
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) |
||||
#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) |
||||
|
||||
#define ROTR SPH_ROTR32 |
||||
|
||||
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) |
||||
#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) |
||||
#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3)) |
||||
#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10)) |
||||
|
||||
static const sph_u32 H224[8] = { |
||||
SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17), |
||||
SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511), |
||||
SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4) |
||||
}; |
||||
|
||||
static const sph_u32 H256[8] = { |
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372), |
||||
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C), |
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) |
||||
}; |
||||
|
||||
/*
|
||||
* The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256 |
||||
* compression function implementation. The "in" parameter should |
||||
* evaluate, when applied to a numerical input parameter from 0 to 15, |
||||
* to an expression which yields the corresponding input block. The "r" |
||||
* parameter should evaluate to an array or pointer expression |
||||
* designating the array of 8 words which contains the input and output |
||||
* of the compression function. |
||||
*/ |
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2 |
||||
|
||||
static const sph_u32 K[64] = { |
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491), |
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), |
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), |
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), |
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01), |
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), |
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), |
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), |
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), |
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), |
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), |
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), |
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D), |
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), |
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), |
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967), |
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), |
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13), |
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB), |
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85), |
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B), |
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3), |
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624), |
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070), |
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08), |
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5), |
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A), |
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3), |
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F), |
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208), |
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB), |
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2) |
||||
}; |
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \ |
||||
W[pc] = in(pc); \ |
||||
} while (0) |
||||
|
||||
#define SHA2_MEXP2(in, pc) do { \ |
||||
W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \ |
||||
+ W[((pc) - 7) & 0x0F] \ |
||||
+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \ |
||||
} while (0) |
||||
|
||||
#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \ |
||||
sph_u32 t1, t2; \ |
||||
SHA2_MEXP ## n(in, pc); \ |
||||
t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \ |
||||
+ K[pcount + (pc)] + W[(pc) & 0x0F]); \ |
||||
t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \ |
||||
d = SPH_T32(d + t1); \ |
||||
h = SPH_T32(t1 + t2); \ |
||||
} while (0) |
||||
|
||||
#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \ |
||||
SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc) |
||||
#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \ |
||||
SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc) |
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \ |
||||
sph_u32 A, B, C, D, E, F, G, H; \ |
||||
sph_u32 W[16]; \ |
||||
unsigned pcount; \ |
||||
\ |
||||
A = (r)[0]; \ |
||||
B = (r)[1]; \ |
||||
C = (r)[2]; \ |
||||
D = (r)[3]; \ |
||||
E = (r)[4]; \ |
||||
F = (r)[5]; \ |
||||
G = (r)[6]; \ |
||||
H = (r)[7]; \ |
||||
pcount = 0; \ |
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \ |
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \ |
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \ |
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \ |
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \ |
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \ |
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \ |
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \ |
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \ |
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \ |
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \ |
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \ |
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \ |
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \ |
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \ |
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \ |
||||
for (pcount = 16; pcount < 64; pcount += 16) { \ |
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \ |
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \ |
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \ |
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \ |
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \ |
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \ |
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \ |
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \ |
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \ |
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \ |
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \ |
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \ |
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \ |
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \ |
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \ |
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \ |
||||
} \ |
||||
(r)[0] = SPH_T32((r)[0] + A); \ |
||||
(r)[1] = SPH_T32((r)[1] + B); \ |
||||
(r)[2] = SPH_T32((r)[2] + C); \ |
||||
(r)[3] = SPH_T32((r)[3] + D); \ |
||||
(r)[4] = SPH_T32((r)[4] + E); \ |
||||
(r)[5] = SPH_T32((r)[5] + F); \ |
||||
(r)[6] = SPH_T32((r)[6] + G); \ |
||||
(r)[7] = SPH_T32((r)[7] + H); \ |
||||
} while (0) |
||||
|
||||
#else |
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \ |
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2; \ |
||||
sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ |
||||
sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ |
||||
int i; \ |
||||
\ |
||||
A = (r)[0]; \ |
||||
B = (r)[1]; \ |
||||
C = (r)[2]; \ |
||||
D = (r)[3]; \ |
||||
E = (r)[4]; \ |
||||
F = (r)[5]; \ |
||||
G = (r)[6]; \ |
||||
H = (r)[7]; \ |
||||
W00 = in(0); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0x428A2F98) + W00); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W01 = in(1); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0x71374491) + W01); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W02 = in(2); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0xB5C0FBCF) + W02); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W03 = in(3); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0xE9B5DBA5) + W03); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W04 = in(4); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x3956C25B) + W04); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W05 = in(5); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0x59F111F1) + W05); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W06 = in(6); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x923F82A4) + W06); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W07 = in(7); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0xAB1C5ED5) + W07); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W08 = in(8); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0xD807AA98) + W08); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W09 = in(9); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0x12835B01) + W09); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W10 = in(10); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0x243185BE) + W10); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W11 = in(11); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0x550C7DC3) + W11); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W12 = in(12); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x72BE5D74) + W12); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W13 = in(13); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0x80DEB1FE) + W13); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W14 = in(14); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x9BDC06A7) + W14); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W15 = in(15); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0xC19BF174) + W15); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0xE49B69C1) + W00); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0xEFBE4786) + W01); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0x0FC19DC6) + W02); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0x240CA1CC) + W03); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x2DE92C6F) + W04); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0x4A7484AA) + W05); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x5CB0A9DC) + W06); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0x76F988DA) + W07); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0x983E5152) + W08); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0xA831C66D) + W09); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0xB00327C8) + W10); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0xBF597FC7) + W11); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0xC6E00BF3) + W12); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0xD5A79147) + W13); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x06CA6351) + W14); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0x14292967) + W15); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0x27B70A85) + W00); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0x2E1B2138) + W01); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0x4D2C6DFC) + W02); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0x53380D13) + W03); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x650A7354) + W04); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0x766A0ABB) + W05); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x81C2C92E) + W06); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0x92722C85) + W07); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0xA2BFE8A1) + W08); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0xA81A664B) + W09); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0xC24B8B70) + W10); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0xC76C51A3) + W11); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0xD192E819) + W12); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0xD6990624) + W13); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0xF40E3585) + W14); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0x106AA070) + W15); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0x19A4C116) + W00); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0x1E376C08) + W01); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0x2748774C) + W02); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0x34B0BCB5) + W03); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x391C0CB3) + W04); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0x4ED8AA4A) + W05); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0x5B9CCA4F) + W06); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0x682E6FF3) + W07); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ |
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ |
||||
+ SPH_C32(0x748F82EE) + W08); \ |
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ |
||||
D = SPH_T32(D + T1); \ |
||||
H = SPH_T32(T1 + T2); \ |
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ |
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ |
||||
+ SPH_C32(0x78A5636F) + W09); \ |
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ |
||||
C = SPH_T32(C + T1); \ |
||||
G = SPH_T32(T1 + T2); \ |
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ |
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ |
||||
+ SPH_C32(0x84C87814) + W10); \ |
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ |
||||
B = SPH_T32(B + T1); \ |
||||
F = SPH_T32(T1 + T2); \ |
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ |
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ |
||||
+ SPH_C32(0x8CC70208) + W11); \ |
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ |
||||
A = SPH_T32(A + T1); \ |
||||
E = SPH_T32(T1 + T2); \ |
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ |
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ |
||||
+ SPH_C32(0x90BEFFFA) + W12); \ |
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ |
||||
H = SPH_T32(H + T1); \ |
||||
D = SPH_T32(T1 + T2); \ |
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ |
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ |
||||
+ SPH_C32(0xA4506CEB) + W13); \ |
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ |
||||
G = SPH_T32(G + T1); \ |
||||
C = SPH_T32(T1 + T2); \ |
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ |
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ |
||||
+ SPH_C32(0xBEF9A3F7) + W14); \ |
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ |
||||
F = SPH_T32(F + T1); \ |
||||
B = SPH_T32(T1 + T2); \ |
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ |
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ |
||||
+ SPH_C32(0xC67178F2) + W15); \ |
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ |
||||
E = SPH_T32(E + T1); \ |
||||
A = SPH_T32(T1 + T2); \ |
||||
(r)[0] = SPH_T32((r)[0] + A); \ |
||||
(r)[1] = SPH_T32((r)[1] + B); \ |
||||
(r)[2] = SPH_T32((r)[2] + C); \ |
||||
(r)[3] = SPH_T32((r)[3] + D); \ |
||||
(r)[4] = SPH_T32((r)[4] + E); \ |
||||
(r)[5] = SPH_T32((r)[5] + F); \ |
||||
(r)[6] = SPH_T32((r)[6] + G); \ |
||||
(r)[7] = SPH_T32((r)[7] + H); \ |
||||
} while (0) |
||||
|
||||
#endif |
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access. |
||||
*/ |
||||
static void |
||||
sha2_round(const unsigned char *data, sph_u32 r[8]) |
||||
{ |
||||
#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x))) |
||||
SHA2_ROUND_BODY(SHA2_IN, r); |
||||
#undef SHA2_IN |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha224_init(void *cc) |
||||
{ |
||||
sph_sha224_context *sc; |
||||
|
||||
sc = cc; |
||||
memcpy(sc->val, H224, sizeof H224); |
||||
#if SPH_64 |
||||
sc->count = 0; |
||||
#else |
||||
sc->count_high = sc->count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha256_init(void *cc) |
||||
{ |
||||
sph_sha256_context *sc; |
||||
|
||||
sc = cc; |
||||
memcpy(sc->val, H256, sizeof H256); |
||||
#if SPH_64 |
||||
sc->count = 0; |
||||
#else |
||||
sc->count_high = sc->count_low = 0; |
||||
#endif |
||||
} |
||||
|
||||
#define RFUN sha2_round |
||||
#define HASH sha224 |
||||
#define BE32 1 |
||||
#include "md_helper.c" |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha224_close(void *cc, void *dst) |
||||
{ |
||||
sha224_close(cc, dst, 7); |
||||
sph_sha224_init(cc); |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
sha224_addbits_and_close(cc, ub, n, dst, 7); |
||||
sph_sha224_init(cc); |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha256_close(void *cc, void *dst) |
||||
{ |
||||
sha224_close(cc, dst, 8); |
||||
sph_sha256_init(cc); |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) |
||||
{ |
||||
sha224_addbits_and_close(cc, ub, n, dst, 8); |
||||
sph_sha256_init(cc); |
||||
} |
||||
|
||||
/* see sph_sha2.h */ |
||||
void |
||||
sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]) |
||||
{ |
||||
#define SHA2_IN(x) msg[x] |
||||
SHA2_ROUND_BODY(SHA2_IN, val); |
||||
#undef SHA2_IN |
||||
} |
Loading…
Reference in new issue