From 02e0fc4db5cdd2c80ea08d45b1be6ae4b81e0512 Mon Sep 17 00:00:00 2001 From: elbandi Date: Sun, 17 Jul 2016 22:36:27 +0000 Subject: [PATCH] Add lbry algo support --- Makefile.am | 1 + algorithm.c | 31 +- algorithm.h | 3 +- algorithm/lbry.c | 60 +++ algorithm/lbry.h | 8 + kernel/lbry.cl | 179 +++++++++ kernel/ripemd160.cl | 423 +++++++++++++++++++++ kernel/sha256.cl | 149 ++++++++ kernel/wolf-sha512.cl | 108 ++++++ miner.h | 10 + ocl.c | 1 + sgminer.c | 4 + sph/Makefile.am | 2 +- sph/ripemd.c | 833 ++++++++++++++++++++++++++++++++++++++++++ sph/sph_ripemd.h | 273 ++++++++++++++ util.c | 37 +- 16 files changed, 2105 insertions(+), 17 deletions(-) create mode 100644 algorithm/lbry.c create mode 100644 algorithm/lbry.h create mode 100644 kernel/lbry.cl create mode 100644 kernel/ripemd160.cl create mode 100644 kernel/sha256.cl create mode 100644 kernel/wolf-sha512.cl create mode 100644 sph/ripemd.c create mode 100644 sph/sph_ripemd.h diff --git a/Makefile.am b/Makefile.am index e8a3f3d4..137a5723 100644 --- a/Makefile.am +++ b/Makefile.am @@ -80,6 +80,7 @@ sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_ sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h +sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index ad41a494..a4637ba0 100644 --- a/algorithm.c +++ b/algorithm.c @@ -40,6 +40,7 @@ #include "algorithm/blake256.h" #include "algorithm/blakecoin.h" #include "algorithm/decred.h" +#include "algorithm/lbry.h" #include "compat.h" @@ -72,7 +73,8 @@ const char *algorithm_type_str[] = { "Blakecoin", "Blake", "Decred", - "Vanilla" + "Vanilla", + "Lbry" }; void sha256(const unsigned char *message, unsigned int len, unsigned char *digest) @@ -1007,6 +1009,31 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u return status; } +static cl_int queue_lbry_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->target + 24); + flip112(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 112, clState->cldata, 0, NULL, NULL); + + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + num = 0; + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + num = 0; + + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ @@ -1104,6 +1131,8 @@ static algorithm_settings_t algos[] = { { "blake256r14", ALGO_BLAKE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, blake256_midstate, blake256_prepare_work, queue_blake_kernel, gen_hash, NULL }, { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, + { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, + // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; diff --git a/algorithm.h b/algorithm.h index 5628d006..c02da8a6 100644 --- a/algorithm.h +++ b/algorithm.h @@ -37,7 +37,8 @@ typedef enum { ALGO_BLAKECOIN, ALGO_BLAKE, ALGO_DECRED, - ALGO_VANILLA + ALGO_VANILLA, + ALGO_LBRY } algorithm_type_t; extern const char *algorithm_type_str[]; diff --git a/algorithm/lbry.c b/algorithm/lbry.c new file mode 100644 index 00000000..163c2e73 --- /dev/null +++ b/algorithm/lbry.c @@ -0,0 +1,60 @@ +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_sha2.h" +#include "sph/sph_ripemd.h" + +typedef struct { + sph_sha256_context sha256; + sph_sha512_context sha512; + sph_ripemd160_context ripemd; +} lbryhash_context_holder; + +void lbryhash(void* output, const void* input) +{ + uint32_t hashA[16], hashB[16], hashC[16]; + lbryhash_context_holder ctx; + + sph_sha256_init(&ctx.sha256); + sph_sha512_init(&ctx.sha512); + sph_ripemd160_init(&ctx.ripemd); + + sph_sha256 (&ctx.sha256, input, 112); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha512 (&ctx.sha512, hashA, 32); + sph_sha512_close(&ctx.sha512, hashA); + + sph_ripemd160 (&ctx.ripemd, hashA, 32); + sph_ripemd160_close(&ctx.ripemd, hashB); + + sph_ripemd160 (&ctx.ripemd, hashA+8, 32); + sph_ripemd160_close(&ctx.ripemd, hashC); + + sph_sha256 (&ctx.sha256, hashB, 20); + sph_sha256 (&ctx.sha256, hashC, 20); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + memcpy(output, hashA, 32); +} + +void lbry_regenhash(struct work *work) +{ + uint32_t data[28]; + uint32_t *nonce = (uint32_t *)(work->data + 108); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 27); + data[27] = htobe32(*nonce); + lbryhash(ohash, data); +} diff --git a/algorithm/lbry.h b/algorithm/lbry.h new file mode 100644 index 00000000..c9bbdbdf --- /dev/null +++ b/algorithm/lbry.h @@ -0,0 +1,8 @@ +#ifndef LBRY_H +#define LBRY_H + +#include "miner.h" + +extern void lbry_regenhash(struct work *work); + +#endif diff --git a/kernel/lbry.cl b/kernel/lbry.cl new file mode 100644 index 00000000..fef4f90e --- /dev/null +++ b/kernel/lbry.cl @@ -0,0 +1,179 @@ +#include "sha256.cl" +#include "wolf-sha512.cl" +#include "ripemd160.cl" + +#define SWAP32(x) as_uint(as_uchar4(x).s3210) +#define SWAP64(x) as_ulong(as_uchar8(x).s76543210) + + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global const uint *input, __global uint8 *ctx) +{ + // SHA256 takes 16 uints of input per block - we have 112 bytes to process + // 8 * 16 == 64, meaning two block transforms. + + uint SHA256Buf[16]; + uint gid = get_global_id(0); + + // Remember the last four is the nonce - so 108 bytes / 4 bytes per dword + #pragma unroll + for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]); + + + + // SHA256 initialization constants + uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + + #pragma unroll + for(int i = 0; i < 3; ++i) + { + if(i == 1) + { + #pragma unroll + for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); + SHA256Buf[11] = SWAP32(gid); + SHA256Buf[12] = 0x80000000; + SHA256Buf[13] = 0x00000000; + SHA256Buf[14] = 0x00000000; + SHA256Buf[15] = 0x00000380; + } + if(i == 2) + { + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + #pragma unroll + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + } + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + } + + /* + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + #pragma unroll + for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); + SHA256Buf[11] = SWAP32(gid); + SHA256Buf[12] = 0x80000000; + SHA256Buf[13] = 0x00000000; + SHA256Buf[14] = 0x00000000; + SHA256Buf[15] = 0x00000380; + + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + */ + + + /* + + //outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + //outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf); + + // outbuf would normall be SWAP32'd here, but it'll need it again + // once we use it as input to the next SHA256, so it negates. + + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + */ + + + + outbuf.s0 = SWAP32(outbuf.s0); + outbuf.s1 = SWAP32(outbuf.s1); + outbuf.s2 = SWAP32(outbuf.s2); + outbuf.s3 = SWAP32(outbuf.s3); + outbuf.s4 = SWAP32(outbuf.s4); + outbuf.s5 = SWAP32(outbuf.s5); + outbuf.s6 = SWAP32(outbuf.s6); + outbuf.s7 = SWAP32(outbuf.s7); + + ctx[get_global_id(0) - get_global_offset(0)] = outbuf; +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global uint8 *ctx) +{ + ulong W[16] = { 0UL }, SHA512Out[8]; + uint SHA256Buf[16]; + uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; + + ((uint8 *)W)[0] = outbuf; + + for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]); + + W[4] = 0x8000000000000000UL; + W[15] = 0x0000000000000100UL; + + for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i]; + + SHA512Block(W, SHA512Out); + + for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]); + + uint RMD160_0[16] = { 0U }; + uint RMD160_1[16] = { 0U }; + uint RMD160_0_Out[5], RMD160_1_Out[5]; + + for(int i = 0; i < 4; ++i) + { + ((ulong *)RMD160_0)[i] = SHA512Out[i]; + ((ulong *)RMD160_1)[i] = SHA512Out[i + 4]; + } + + RMD160_0[8] = RMD160_1[8] = 0x00000080; + RMD160_0[14] = RMD160_1[14] = 0x00000100; + + for(int i = 0; i < 5; ++i) + { + RMD160_0_Out[i] = RMD160_IV[i]; + RMD160_1_Out[i] = RMD160_IV[i]; + } + + RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out); + RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out); + + for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]); + for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]); + SHA256Buf[10] = 0x80000000; + + for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U; + + SHA256Buf[15] = 0x00000140; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + + ctx[get_global_id(0) - get_global_offset(0)] = outbuf; +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target) +{ + uint SHA256Buf[16] = { 0U }; + uint gid = get_global_id(0); + uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; + + ((uint8 *)SHA256Buf)[0] = outbuf; + SHA256Buf[8] = 0x80000000; + for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; + SHA256Buf[15] = 0x00000100; + + outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); + outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); + + outbuf.s6 = SWAP32(outbuf.s6); + outbuf.s7 = SWAP32(outbuf.s7); + + if(as_ulong(outbuf.s67) <= target) + output[atomic_inc(output+0xFF)] = SWAP32(gid); +} diff --git a/kernel/ripemd160.cl b/kernel/ripemd160.cl new file mode 100644 index 00000000..b481b423 --- /dev/null +++ b/kernel/ripemd160.cl @@ -0,0 +1,423 @@ +#define RIPEMD160_IN(x) W[x] + +// Round functions for RIPEMD-128 and RIPEMD-160. + +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +#define K11 0x00000000 +#define K12 0x5A827999 +#define K13 0x6ED9EBA1 +#define K14 0x8F1BBCDC +#define K15 0xA953FD4E + +#define K21 0x50A28BE6 +#define K22 0x5C4DD124 +#define K23 0x6D703EF3 +#define K24 0x7A6D76E9 +#define K25 0x00000000 + +const __constant uint RMD160_IV[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }; + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + const uint rrtmp = a + f(b, c, d) + r + k; \ + a = amd_bitalign(rrtmp, rrtmp, 32U - (uint)s) + e; \ + c = amd_bitalign(c, c, 32U - 10U); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +//#define RIPEMD160_ROUND_BODY(in, h) do { \ + uint A1, B1, C1, D1, E1; \ + uint A2, B2, C2, D2, E2; \ + uint tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, (in)[ 0], 1); \ + ROUND1(E, A, B, C, D, F1, 14, (in)[ 1], 1); \ + ROUND1(D, E, A, B, C, F1, 15, (in)[ 2], 1); \ + ROUND1(C, D, E, A, B, F1, 12, (in)[ 3], 1); \ + ROUND1(B, C, D, E, A, F1, 5, (in)[ 4], 1); \ + ROUND1(A, B, C, D, E, F1, 8, (in)[ 5], 1); \ + ROUND1(E, A, B, C, D, F1, 7, (in)[ 6], 1); \ + ROUND1(D, E, A, B, C, F1, 9, (in)[ 7], 1); \ + ROUND1(C, D, E, A, B, F1, 11, (in)[ 8], 1); \ + ROUND1(B, C, D, E, A, F1, 13, (in)[ 9], 1); \ + ROUND1(A, B, C, D, E, F1, 14, (in)[10], 1); \ + ROUND1(E, A, B, C, D, F1, 15, (in)[11], 1); \ + ROUND1(D, E, A, B, C, F1, 6, (in)[12], 1); \ + ROUND1(C, D, E, A, B, F1, 7, (in)[13], 1); \ + ROUND1(B, C, D, E, A, F1, 9, (in)[14], 1); \ + ROUND1(A, B, C, D, E, F1, 8, (in)[15], 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, (in)[ 7], 2); \ + ROUND1(D, E, A, B, C, F2, 6, (in)[ 4], 2); \ + ROUND1(C, D, E, A, B, F2, 8, (in)[13], 2); \ + ROUND1(B, C, D, E, A, F2, 13, (in)[ 1], 2); \ + ROUND1(A, B, C, D, E, F2, 11, (in)[10], 2); \ + ROUND1(E, A, B, C, D, F2, 9, (in)[ 6], 2); \ + ROUND1(D, E, A, B, C, F2, 7, (in)[15], 2); \ + ROUND1(C, D, E, A, B, F2, 15, (in)[ 3], 2); \ + ROUND1(B, C, D, E, A, F2, 7, (in)[12], 2); \ + ROUND1(A, B, C, D, E, F2, 12, (in)[ 0], 2); \ + ROUND1(E, A, B, C, D, F2, 15, (in)[ 9], 2); \ + ROUND1(D, E, A, B, C, F2, 9, (in)[ 5], 2); \ + ROUND1(C, D, E, A, B, F2, 11, (in)[ 2], 2); \ + ROUND1(B, C, D, E, A, F2, 7, (in)[14], 2); \ + ROUND1(A, B, C, D, E, F2, 13, (in)[11], 2); \ + ROUND1(E, A, B, C, D, F2, 12, (in)[ 8], 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, (in)[ 3], 3); \ + ROUND1(C, D, E, A, B, F3, 13, (in)[10], 3); \ + ROUND1(B, C, D, E, A, F3, 6, (in)[14], 3); \ + ROUND1(A, B, C, D, E, F3, 7, (in)[ 4], 3); \ + ROUND1(E, A, B, C, D, F3, 14, (in)[ 9], 3); \ + ROUND1(D, E, A, B, C, F3, 9, (in)[15], 3); \ + ROUND1(C, D, E, A, B, F3, 13, (in)[ 8], 3); \ + ROUND1(B, C, D, E, A, F3, 15, (in)[ 1], 3); \ + ROUND1(A, B, C, D, E, F3, 14, (in)[ 2], 3); \ + ROUND1(E, A, B, C, D, F3, 8, (in)[ 7], 3); \ + ROUND1(D, E, A, B, C, F3, 13, (in)[ 0], 3); \ + ROUND1(C, D, E, A, B, F3, 6, (in)[ 6], 3); \ + ROUND1(B, C, D, E, A, F3, 5, (in)[13], 3); \ + ROUND1(A, B, C, D, E, F3, 12, (in)[11], 3); \ + ROUND1(E, A, B, C, D, F3, 7, (in)[ 5], 3); \ + ROUND1(D, E, A, B, C, F3, 5, (in)[12], 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, (in)[ 1], 4); \ + ROUND1(B, C, D, E, A, F4, 12, (in)[ 9], 4); \ + ROUND1(A, B, C, D, E, F4, 14, (in)[11], 4); \ + ROUND1(E, A, B, C, D, F4, 15, (in)[10], 4); \ + ROUND1(D, E, A, B, C, F4, 14, (in)[ 0], 4); \ + ROUND1(C, D, E, A, B, F4, 15, (in)[ 8], 4); \ + ROUND1(B, C, D, E, A, F4, 9, (in)[12], 4); \ + ROUND1(A, B, C, D, E, F4, 8, (in)[ 4], 4); \ + ROUND1(E, A, B, C, D, F4, 9, (in)[13], 4); \ + ROUND1(D, E, A, B, C, F4, 14, (in)[ 3], 4); \ + ROUND1(C, D, E, A, B, F4, 5, (in)[ 7], 4); \ + ROUND1(B, C, D, E, A, F4, 6, (in)[15], 4); \ + ROUND1(A, B, C, D, E, F4, 8, (in)[14], 4); \ + ROUND1(E, A, B, C, D, F4, 6, (in)[ 5], 4); \ + ROUND1(D, E, A, B, C, F4, 5, (in)[ 6], 4); \ + ROUND1(C, D, E, A, B, F4, 12, (in)[ 2], 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, (in)[ 4], 5); \ + ROUND1(A, B, C, D, E, F5, 15, (in)[ 0], 5); \ + ROUND1(E, A, B, C, D, F5, 5, (in)[ 5], 5); \ + ROUND1(D, E, A, B, C, F5, 11, (in)[ 9], 5); \ + ROUND1(C, D, E, A, B, F5, 6, (in)[ 7], 5); \ + ROUND1(B, C, D, E, A, F5, 8, (in)[12], 5); \ + ROUND1(A, B, C, D, E, F5, 13, (in)[ 2], 5); \ + ROUND1(E, A, B, C, D, F5, 12, (in)[10], 5); \ + ROUND1(D, E, A, B, C, F5, 5, (in)[14], 5); \ + ROUND1(C, D, E, A, B, F5, 12, (in)[ 1], 5); \ + ROUND1(B, C, D, E, A, F5, 13, (in)[ 3], 5); \ + ROUND1(A, B, C, D, E, F5, 14, (in)[ 8], 5); \ + ROUND1(E, A, B, C, D, F5, 11, (in)[11], 5); \ + ROUND1(D, E, A, B, C, F5, 8, (in)[ 6], 5); \ + ROUND1(C, D, E, A, B, F5, 5, (in)[15], 5); \ + ROUND1(B, C, D, E, A, F5, 6, (in)[13], 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, (in)[ 5], 1); \ + ROUND2(E, A, B, C, D, F5, 9, (in)[14], 1); \ + ROUND2(D, E, A, B, C, F5, 9, (in)[ 7], 1); \ + ROUND2(C, D, E, A, B, F5, 11, (in)[ 0], 1); \ + ROUND2(B, C, D, E, A, F5, 13, (in)[ 9], 1); \ + ROUND2(A, B, C, D, E, F5, 15, (in)[ 2], 1); \ + ROUND2(E, A, B, C, D, F5, 15, (in)[11], 1); \ + ROUND2(D, E, A, B, C, F5, 5, (in)[ 4], 1); \ + ROUND2(C, D, E, A, B, F5, 7, (in)[13], 1); \ + ROUND2(B, C, D, E, A, F5, 7, (in)[ 6], 1); \ + ROUND2(A, B, C, D, E, F5, 8, (in)[15], 1); \ + ROUND2(E, A, B, C, D, F5, 11, (in)[ 8], 1); \ + ROUND2(D, E, A, B, C, F5, 14, (in)[ 1], 1); \ + ROUND2(C, D, E, A, B, F5, 14, (in)[10], 1); \ + ROUND2(B, C, D, E, A, F5, 12, (in)[ 3], 1); \ + ROUND2(A, B, C, D, E, F5, 6, (in)[12], 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, (in)[ 6], 2); \ + ROUND2(D, E, A, B, C, F4, 13, (in)[11], 2); \ + ROUND2(C, D, E, A, B, F4, 15, (in)[ 3], 2); \ + ROUND2(B, C, D, E, A, F4, 7, (in)[ 7], 2); \ + ROUND2(A, B, C, D, E, F4, 12, (in)[ 0], 2); \ + ROUND2(E, A, B, C, D, F4, 8, (in)[13], 2); \ + ROUND2(D, E, A, B, C, F4, 9, (in)[ 5], 2); \ + ROUND2(C, D, E, A, B, F4, 11, (in)[10], 2); \ + ROUND2(B, C, D, E, A, F4, 7, (in)[14], 2); \ + ROUND2(A, B, C, D, E, F4, 7, (in)[15], 2); \ + ROUND2(E, A, B, C, D, F4, 12, (in)[ 8], 2); \ + ROUND2(D, E, A, B, C, F4, 7, (in)[12], 2); \ + ROUND2(C, D, E, A, B, F4, 6, (in)[ 4], 2); \ + ROUND2(B, C, D, E, A, F4, 15, (in)[ 9], 2); \ + ROUND2(A, B, C, D, E, F4, 13, (in)[ 1], 2); \ + ROUND2(E, A, B, C, D, F4, 11, (in)[ 2], 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, (in)[15], 3); \ + ROUND2(C, D, E, A, B, F3, 7, (in)[ 5], 3); \ + ROUND2(B, C, D, E, A, F3, 15, (in)[ 1], 3); \ + ROUND2(A, B, C, D, E, F3, 11, (in)[ 3], 3); \ + ROUND2(E, A, B, C, D, F3, 8, (in)[ 7], 3); \ + ROUND2(D, E, A, B, C, F3, 6, (in)[14], 3); \ + ROUND2(C, D, E, A, B, F3, 6, (in)[ 6], 3); \ + ROUND2(B, C, D, E, A, F3, 14, (in)[ 9], 3); \ + ROUND2(A, B, C, D, E, F3, 12, (in)[11], 3); \ + ROUND2(E, A, B, C, D, F3, 13, (in)[ 8], 3); \ + ROUND2(D, E, A, B, C, F3, 5, (in)[12], 3); \ + ROUND2(C, D, E, A, B, F3, 14, (in)[ 2], 3); \ + ROUND2(B, C, D, E, A, F3, 13, (in)[10], 3); \ + ROUND2(A, B, C, D, E, F3, 13, (in)[ 0], 3); \ + ROUND2(E, A, B, C, D, F3, 7, (in)[ 4], 3); \ + ROUND2(D, E, A, B, C, F3, 5, (in)[13], 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, (in)[ 8], 4); \ + ROUND2(B, C, D, E, A, F2, 5, (in)[ 6], 4); \ + ROUND2(A, B, C, D, E, F2, 8, (in)[ 4], 4); \ + ROUND2(E, A, B, C, D, F2, 11, (in)[ 1], 4); \ + ROUND2(D, E, A, B, C, F2, 14, (in)[ 3], 4); \ + ROUND2(C, D, E, A, B, F2, 14, (in)[11], 4); \ + ROUND2(B, C, D, E, A, F2, 6, (in)[15], 4); \ + ROUND2(A, B, C, D, E, F2, 14, (in)[ 0], 4); \ + ROUND2(E, A, B, C, D, F2, 6, (in)[ 5], 4); \ + ROUND2(D, E, A, B, C, F2, 9, (in)[12], 4); \ + ROUND2(C, D, E, A, B, F2, 12, (in)[ 2], 4); \ + ROUND2(B, C, D, E, A, F2, 9, (in)[13], 4); \ + ROUND2(A, B, C, D, E, F2, 12, (in)[ 9], 4); \ + ROUND2(E, A, B, C, D, F2, 5, (in)[ 7], 4); \ + ROUND2(D, E, A, B, C, F2, 15, (in)[10], 4); \ + ROUND2(C, D, E, A, B, F2, 8, (in)[14], 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, (in)[12], 5); \ + ROUND2(A, B, C, D, E, F1, 5, (in)[15], 5); \ + ROUND2(E, A, B, C, D, F1, 12, (in)[10], 5); \ + ROUND2(D, E, A, B, C, F1, 9, (in)[ 4], 5); \ + ROUND2(C, D, E, A, B, F1, 12, (in)[ 1], 5); \ + ROUND2(B, C, D, E, A, F1, 5, (in)[ 5], 5); \ + ROUND2(A, B, C, D, E, F1, 14, (in)[ 8], 5); \ + ROUND2(E, A, B, C, D, F1, 6, (in)[ 7], 5); \ + ROUND2(D, E, A, B, C, F1, 8, (in)[ 6], 5); \ + ROUND2(C, D, E, A, B, F1, 13, (in)[ 2], 5); \ + ROUND2(B, C, D, E, A, F1, 6, (in)[13], 5); \ + ROUND2(A, B, C, D, E, F1, 5, (in)[14], 5); \ + ROUND2(E, A, B, C, D, F1, 15, (in)[ 0], 5); \ + ROUND2(D, E, A, B, C, F1, 13, (in)[ 3], 5); \ + ROUND2(C, D, E, A, B, F1, 11, (in)[ 9], 5); \ + ROUND2(B, C, D, E, A, F1, 11, (in)[11], 5); \ + \ + tmp = (h)[1] + C1 + D2; \ + (h)[1] = (h)[2] + D1 + E2; \ + (h)[2] = (h)[3] + E1 + A2; \ + (h)[3] = (h)[4] + A1 + B2; \ + (h)[4] = (h)[0] + B1 + C2; \ + (h)[0] = tmp; \ + } while (0) + +void RIPEMD160_ROUND_BODY(uint *in, uint *h) +{ + uint A1, B1, C1, D1, E1; + uint A2, B2, C2, D2, E2; + uint tmp; + + A1 = A2 = (h)[0]; + B1 = B2 = (h)[1]; + C1 = C2 = (h)[2]; + D1 = D2 = (h)[3]; + E1 = E2 = (h)[4]; + + ROUND1(A, B, C, D, E, F1, 11, (in)[ 0], 1); + ROUND1(E, A, B, C, D, F1, 14, (in)[ 1], 1); + ROUND1(D, E, A, B, C, F1, 15, (in)[ 2], 1); + ROUND1(C, D, E, A, B, F1, 12, (in)[ 3], 1); + ROUND1(B, C, D, E, A, F1, 5, (in)[ 4], 1); + ROUND1(A, B, C, D, E, F1, 8, (in)[ 5], 1); + ROUND1(E, A, B, C, D, F1, 7, (in)[ 6], 1); + ROUND1(D, E, A, B, C, F1, 9, (in)[ 7], 1); + ROUND1(C, D, E, A, B, F1, 11, (in)[ 8], 1); + ROUND1(B, C, D, E, A, F1, 13, (in)[ 9], 1); + ROUND1(A, B, C, D, E, F1, 14, (in)[10], 1); + ROUND1(E, A, B, C, D, F1, 15, (in)[11], 1); + ROUND1(D, E, A, B, C, F1, 6, (in)[12], 1); + ROUND1(C, D, E, A, B, F1, 7, (in)[13], 1); + ROUND1(B, C, D, E, A, F1, 9, (in)[14], 1); + ROUND1(A, B, C, D, E, F1, 8, (in)[15], 1); + + ROUND1(E, A, B, C, D, F2, 7, (in)[ 7], 2); + ROUND1(D, E, A, B, C, F2, 6, (in)[ 4], 2); + ROUND1(C, D, E, A, B, F2, 8, (in)[13], 2); + ROUND1(B, C, D, E, A, F2, 13, (in)[ 1], 2); + ROUND1(A, B, C, D, E, F2, 11, (in)[10], 2); + ROUND1(E, A, B, C, D, F2, 9, (in)[ 6], 2); + ROUND1(D, E, A, B, C, F2, 7, (in)[15], 2); + ROUND1(C, D, E, A, B, F2, 15, (in)[ 3], 2); + ROUND1(B, C, D, E, A, F2, 7, (in)[12], 2); + ROUND1(A, B, C, D, E, F2, 12, (in)[ 0], 2); + ROUND1(E, A, B, C, D, F2, 15, (in)[ 9], 2); + ROUND1(D, E, A, B, C, F2, 9, (in)[ 5], 2); + ROUND1(C, D, E, A, B, F2, 11, (in)[ 2], 2); + ROUND1(B, C, D, E, A, F2, 7, (in)[14], 2); + ROUND1(A, B, C, D, E, F2, 13, (in)[11], 2); + ROUND1(E, A, B, C, D, F2, 12, (in)[ 8], 2); + + ROUND1(D, E, A, B, C, F3, 11, (in)[ 3], 3); + ROUND1(C, D, E, A, B, F3, 13, (in)[10], 3); + ROUND1(B, C, D, E, A, F3, 6, (in)[14], 3); + ROUND1(A, B, C, D, E, F3, 7, (in)[ 4], 3); + ROUND1(E, A, B, C, D, F3, 14, (in)[ 9], 3); + ROUND1(D, E, A, B, C, F3, 9, (in)[15], 3); + ROUND1(C, D, E, A, B, F3, 13, (in)[ 8], 3); + ROUND1(B, C, D, E, A, F3, 15, (in)[ 1], 3); + ROUND1(A, B, C, D, E, F3, 14, (in)[ 2], 3); + ROUND1(E, A, B, C, D, F3, 8, (in)[ 7], 3); + ROUND1(D, E, A, B, C, F3, 13, (in)[ 0], 3); + ROUND1(C, D, E, A, B, F3, 6, (in)[ 6], 3); + ROUND1(B, C, D, E, A, F3, 5, (in)[13], 3); + ROUND1(A, B, C, D, E, F3, 12, (in)[11], 3); + ROUND1(E, A, B, C, D, F3, 7, (in)[ 5], 3); + ROUND1(D, E, A, B, C, F3, 5, (in)[12], 3); + + ROUND1(C, D, E, A, B, F4, 11, (in)[ 1], 4); + ROUND1(B, C, D, E, A, F4, 12, (in)[ 9], 4); + ROUND1(A, B, C, D, E, F4, 14, (in)[11], 4); + ROUND1(E, A, B, C, D, F4, 15, (in)[10], 4); + ROUND1(D, E, A, B, C, F4, 14, (in)[ 0], 4); + ROUND1(C, D, E, A, B, F4, 15, (in)[ 8], 4); + ROUND1(B, C, D, E, A, F4, 9, (in)[12], 4); + ROUND1(A, B, C, D, E, F4, 8, (in)[ 4], 4); + ROUND1(E, A, B, C, D, F4, 9, (in)[13], 4); + ROUND1(D, E, A, B, C, F4, 14, (in)[ 3], 4); + ROUND1(C, D, E, A, B, F4, 5, (in)[ 7], 4); + ROUND1(B, C, D, E, A, F4, 6, (in)[15], 4); + ROUND1(A, B, C, D, E, F4, 8, (in)[14], 4); + ROUND1(E, A, B, C, D, F4, 6, (in)[ 5], 4); + ROUND1(D, E, A, B, C, F4, 5, (in)[ 6], 4); + ROUND1(C, D, E, A, B, F4, 12, (in)[ 2], 4); + + ROUND1(B, C, D, E, A, F5, 9, (in)[ 4], 5); + ROUND1(A, B, C, D, E, F5, 15, (in)[ 0], 5); + ROUND1(E, A, B, C, D, F5, 5, (in)[ 5], 5); + ROUND1(D, E, A, B, C, F5, 11, (in)[ 9], 5); + ROUND1(C, D, E, A, B, F5, 6, (in)[ 7], 5); + ROUND1(B, C, D, E, A, F5, 8, (in)[12], 5); + ROUND1(A, B, C, D, E, F5, 13, (in)[ 2], 5); + ROUND1(E, A, B, C, D, F5, 12, (in)[10], 5); + ROUND1(D, E, A, B, C, F5, 5, (in)[14], 5); + ROUND1(C, D, E, A, B, F5, 12, (in)[ 1], 5); + ROUND1(B, C, D, E, A, F5, 13, (in)[ 3], 5); + ROUND1(A, B, C, D, E, F5, 14, (in)[ 8], 5); + ROUND1(E, A, B, C, D, F5, 11, (in)[11], 5); + ROUND1(D, E, A, B, C, F5, 8, (in)[ 6], 5); + ROUND1(C, D, E, A, B, F5, 5, (in)[15], 5); + ROUND1(B, C, D, E, A, F5, 6, (in)[13], 5); + + ROUND2(A, B, C, D, E, F5, 8, (in)[ 5], 1); + ROUND2(E, A, B, C, D, F5, 9, (in)[14], 1); + ROUND2(D, E, A, B, C, F5, 9, (in)[ 7], 1); + ROUND2(C, D, E, A, B, F5, 11, (in)[ 0], 1); + ROUND2(B, C, D, E, A, F5, 13, (in)[ 9], 1); + ROUND2(A, B, C, D, E, F5, 15, (in)[ 2], 1); + ROUND2(E, A, B, C, D, F5, 15, (in)[11], 1); + ROUND2(D, E, A, B, C, F5, 5, (in)[ 4], 1); + ROUND2(C, D, E, A, B, F5, 7, (in)[13], 1); + ROUND2(B, C, D, E, A, F5, 7, (in)[ 6], 1); + ROUND2(A, B, C, D, E, F5, 8, (in)[15], 1); + ROUND2(E, A, B, C, D, F5, 11, (in)[ 8], 1); + ROUND2(D, E, A, B, C, F5, 14, (in)[ 1], 1); + ROUND2(C, D, E, A, B, F5, 14, (in)[10], 1); + ROUND2(B, C, D, E, A, F5, 12, (in)[ 3], 1); + ROUND2(A, B, C, D, E, F5, 6, (in)[12], 1); + + ROUND2(E, A, B, C, D, F4, 9, (in)[ 6], 2); + ROUND2(D, E, A, B, C, F4, 13, (in)[11], 2); + ROUND2(C, D, E, A, B, F4, 15, (in)[ 3], 2); + ROUND2(B, C, D, E, A, F4, 7, (in)[ 7], 2); + ROUND2(A, B, C, D, E, F4, 12, (in)[ 0], 2); + ROUND2(E, A, B, C, D, F4, 8, (in)[13], 2); + ROUND2(D, E, A, B, C, F4, 9, (in)[ 5], 2); + ROUND2(C, D, E, A, B, F4, 11, (in)[10], 2); + ROUND2(B, C, D, E, A, F4, 7, (in)[14], 2); + ROUND2(A, B, C, D, E, F4, 7, (in)[15], 2); + ROUND2(E, A, B, C, D, F4, 12, (in)[ 8], 2); + ROUND2(D, E, A, B, C, F4, 7, (in)[12], 2); + ROUND2(C, D, E, A, B, F4, 6, (in)[ 4], 2); + ROUND2(B, C, D, E, A, F4, 15, (in)[ 9], 2); + ROUND2(A, B, C, D, E, F4, 13, (in)[ 1], 2); + ROUND2(E, A, B, C, D, F4, 11, (in)[ 2], 2); + + ROUND2(D, E, A, B, C, F3, 9, (in)[15], 3); + ROUND2(C, D, E, A, B, F3, 7, (in)[ 5], 3); + ROUND2(B, C, D, E, A, F3, 15, (in)[ 1], 3); + ROUND2(A, B, C, D, E, F3, 11, (in)[ 3], 3); + ROUND2(E, A, B, C, D, F3, 8, (in)[ 7], 3); + ROUND2(D, E, A, B, C, F3, 6, (in)[14], 3); + ROUND2(C, D, E, A, B, F3, 6, (in)[ 6], 3); + ROUND2(B, C, D, E, A, F3, 14, (in)[ 9], 3); + ROUND2(A, B, C, D, E, F3, 12, (in)[11], 3); + ROUND2(E, A, B, C, D, F3, 13, (in)[ 8], 3); + ROUND2(D, E, A, B, C, F3, 5, (in)[12], 3); + ROUND2(C, D, E, A, B, F3, 14, (in)[ 2], 3); + ROUND2(B, C, D, E, A, F3, 13, (in)[10], 3); + ROUND2(A, B, C, D, E, F3, 13, (in)[ 0], 3); + ROUND2(E, A, B, C, D, F3, 7, (in)[ 4], 3); + ROUND2(D, E, A, B, C, F3, 5, (in)[13], 3); + + ROUND2(C, D, E, A, B, F2, 15, (in)[ 8], 4); + ROUND2(B, C, D, E, A, F2, 5, (in)[ 6], 4); + ROUND2(A, B, C, D, E, F2, 8, (in)[ 4], 4); + ROUND2(E, A, B, C, D, F2, 11, (in)[ 1], 4); + ROUND2(D, E, A, B, C, F2, 14, (in)[ 3], 4); + ROUND2(C, D, E, A, B, F2, 14, (in)[11], 4); + ROUND2(B, C, D, E, A, F2, 6, (in)[15], 4); + ROUND2(A, B, C, D, E, F2, 14, (in)[ 0], 4); + ROUND2(E, A, B, C, D, F2, 6, (in)[ 5], 4); + ROUND2(D, E, A, B, C, F2, 9, (in)[12], 4); + ROUND2(C, D, E, A, B, F2, 12, (in)[ 2], 4); + ROUND2(B, C, D, E, A, F2, 9, (in)[13], 4); + ROUND2(A, B, C, D, E, F2, 12, (in)[ 9], 4); + ROUND2(E, A, B, C, D, F2, 5, (in)[ 7], 4); + ROUND2(D, E, A, B, C, F2, 15, (in)[10], 4); + ROUND2(C, D, E, A, B, F2, 8, (in)[14], 4); + + ROUND2(B, C, D, E, A, F1, 8, (in)[12], 5); + ROUND2(A, B, C, D, E, F1, 5, (in)[15], 5); + ROUND2(E, A, B, C, D, F1, 12, (in)[10], 5); + ROUND2(D, E, A, B, C, F1, 9, (in)[ 4], 5); + ROUND2(C, D, E, A, B, F1, 12, (in)[ 1], 5); + ROUND2(B, C, D, E, A, F1, 5, (in)[ 5], 5); + ROUND2(A, B, C, D, E, F1, 14, (in)[ 8], 5); + ROUND2(E, A, B, C, D, F1, 6, (in)[ 7], 5); + ROUND2(D, E, A, B, C, F1, 8, (in)[ 6], 5); + ROUND2(C, D, E, A, B, F1, 13, (in)[ 2], 5); + ROUND2(B, C, D, E, A, F1, 6, (in)[13], 5); + ROUND2(A, B, C, D, E, F1, 5, (in)[14], 5); + ROUND2(E, A, B, C, D, F1, 15, (in)[ 0], 5); + ROUND2(D, E, A, B, C, F1, 13, (in)[ 3], 5); + ROUND2(C, D, E, A, B, F1, 11, (in)[ 9], 5); + ROUND2(B, C, D, E, A, F1, 11, (in)[11], 5); + + tmp = (h)[1] + C1 + D2; + (h)[1] = (h)[2] + D1 + E2; + (h)[2] = (h)[3] + E1 + A2; + (h)[3] = (h)[4] + A1 + B2; + (h)[4] = (h)[0] + B1 + C2; + (h)[0] = tmp; +} diff --git a/kernel/sha256.cl b/kernel/sha256.cl new file mode 100644 index 00000000..354695ae --- /dev/null +++ b/kernel/sha256.cl @@ -0,0 +1,149 @@ +#define ROL32(x, y) rotate(x, y ## U) +#define SHR(x, y) (x >> y) +#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) + +#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) +#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) + +#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) +#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) + +#define P(a,b,c,d,e,f,g,h,x,K) \ +{ \ + temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ + d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ +} + +#define F0(y, x, z) bitselect(z, y, z ^ x) +#define F1(x, y, z) bitselect(z, y, x) + +#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) +#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) +#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) +#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) +#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) +#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) +#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) +#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) +#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) +#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) +#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) +#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) +#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) +#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) +#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) +#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) + +#define RD14 (S1(W12) + W7 + S0(W15) + W14) +#define RD15 (S1(W13) + W8 + S0(W0) + W15) + + +inline uint8 sha256_round(uint16 data, uint8 buf) +{ + uint temp1; + uint8 res; + uint W0 = (data.s0); + uint W1 = (data.s1); + uint W2 = (data.s2); + uint W3 = (data.s3); + uint W4 = (data.s4); + uint W5 = (data.s5); + uint W6 = (data.s6); + uint W7 = (data.s7); + uint W8 = (data.s8); + uint W9 = (data.s9); + uint W10 = (data.sA); + uint W11 = (data.sB); + uint W12 = (data.sC); + uint W13 = (data.sD); + uint W14 = (data.sE); + uint W15 = (data.sF); + + uint v0 = buf.s0; + uint v1 = buf.s1; + uint v2 = buf.s2; + uint v3 = buf.s3; + uint v4 = buf.s4; + uint v5 = buf.s5; + uint v6 = buf.s6; + uint v7 = buf.s7; + + P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); + P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); + P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); + P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); + P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); + P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); + P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); + P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); + P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); + P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); + P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); + P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); + P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); + P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); + P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); + P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); + P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); + + P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); + P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); + P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); + P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); + P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); + P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); + P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); + P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); + P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); + P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); + P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); + P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); + P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); + P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); + P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); + P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); + + res.s0 = (v0 + buf.s0); + res.s1 = (v1 + buf.s1); + res.s2 = (v2 + buf.s2); + res.s3 = (v3 + buf.s3); + res.s4 = (v4 + buf.s4); + res.s5 = (v5 + buf.s5); + res.s6 = (v6 + buf.s6); + res.s7 = (v7 + buf.s7); + return (res); +} + diff --git a/kernel/wolf-sha512.cl b/kernel/wolf-sha512.cl new file mode 100644 index 00000000..62953b05 --- /dev/null +++ b/kernel/wolf-sha512.cl @@ -0,0 +1,108 @@ +static const __constant ulong K512[80] = +{ + 0x428A2F98D728AE22UL, 0x7137449123EF65CDUL, + 0xB5C0FBCFEC4D3B2FUL, 0xE9B5DBA58189DBBCUL, + 0x3956C25BF348B538UL, 0x59F111F1B605D019UL, + 0x923F82A4AF194F9BUL, 0xAB1C5ED5DA6D8118UL, + 0xD807AA98A3030242UL, 0x12835B0145706FBEUL, + 0x243185BE4EE4B28CUL, 0x550C7DC3D5FFB4E2UL, + 0x72BE5D74F27B896FUL, 0x80DEB1FE3B1696B1UL, + 0x9BDC06A725C71235UL, 0xC19BF174CF692694UL, + 0xE49B69C19EF14AD2UL, 0xEFBE4786384F25E3UL, + 0x0FC19DC68B8CD5B5UL, 0x240CA1CC77AC9C65UL, + 0x2DE92C6F592B0275UL, 0x4A7484AA6EA6E483UL, + 0x5CB0A9DCBD41FBD4UL, 0x76F988DA831153B5UL, + 0x983E5152EE66DFABUL, 0xA831C66D2DB43210UL, + 0xB00327C898FB213FUL, 0xBF597FC7BEEF0EE4UL, + 0xC6E00BF33DA88FC2UL, 0xD5A79147930AA725UL, + 0x06CA6351E003826FUL, 0x142929670A0E6E70UL, + 0x27B70A8546D22FFCUL, 0x2E1B21385C26C926UL, + 0x4D2C6DFC5AC42AEDUL, 0x53380D139D95B3DFUL, + 0x650A73548BAF63DEUL, 0x766A0ABB3C77B2A8UL, + 0x81C2C92E47EDAEE6UL, 0x92722C851482353BUL, + 0xA2BFE8A14CF10364UL, 0xA81A664BBC423001UL, + 0xC24B8B70D0F89791UL, 0xC76C51A30654BE30UL, + 0xD192E819D6EF5218UL, 0xD69906245565A910UL, + 0xF40E35855771202AUL, 0x106AA07032BBD1B8UL, + 0x19A4C116B8D2D0C8UL, 0x1E376C085141AB53UL, + 0x2748774CDF8EEB99UL, 0x34B0BCB5E19B48A8UL, + 0x391C0CB3C5C95A63UL, 0x4ED8AA4AE3418ACBUL, + 0x5B9CCA4F7763E373UL, 0x682E6FF3D6B2B8A3UL, + 0x748F82EE5DEFB2FCUL, 0x78A5636F43172F60UL, + 0x84C87814A1F0AB72UL, 0x8CC702081A6439ECUL, + 0x90BEFFFA23631E28UL, 0xA4506CEBDE82BDE9UL, + 0xBEF9A3F7B2C67915UL, 0xC67178F2E372532BUL, + 0xCA273ECEEA26619CUL, 0xD186B8C721C0C207UL, + 0xEADA7DD6CDE0EB1EUL, 0xF57D4F7FEE6ED178UL, + 0x06F067AA72176FBAUL, 0x0A637DC5A2C898A6UL, + 0x113F9804BEF90DAEUL, 0x1B710B35131C471BUL, + 0x28DB77F523047D84UL, 0x32CAAB7B40C72493UL, + 0x3C9EBE0A15C9BEBCUL, 0x431D67C49C100D4CUL, + 0x4CC5D4BECB3E42B6UL, 0x597F299CFC657E2AUL, + 0x5FCB6FAB3AD6FAECUL, 0x6C44198C4A475817UL +}; + +static const __constant ulong SHA512_INIT[8] = +{ + 0x6A09E667F3BCC908UL, 0xBB67AE8584CAA73BUL, + 0x3C6EF372FE94F82BUL, 0xA54FF53A5F1D36F1UL, + 0x510E527FADE682D1UL, 0x9B05688C2B3E6C1FUL, + 0x1F83D9ABFB41BD6BUL, 0x5BE0CD19137E2179UL +}; + +#define ROTR64(x, y) rotate((x), 64UL - (y)) + +ulong FAST_ROTR64_LO(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x.s10, x, y))); } +ulong FAST_ROTR64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign(x, x.s10, (y - 32)))); } + +/* +#define BSG5_0(x) (FAST_ROTR64_LO(x, 28) ^ FAST_ROTR64_HI(x, 34) ^ FAST_ROTR64_HI(x, 39)) +#define BSG5_1(x) (FAST_ROTR64_LO(x, 14) ^ FAST_ROTR64_LO(x, 18) ^ ROTR64(x, 41)) +#define SSG5_0(x) (FAST_ROTR64_LO(x, 1) ^ FAST_ROTR64_LO(x, 8) ^ ((x) >> 7)) +#define SSG5_1(x) (FAST_ROTR64_LO(x, 19) ^ FAST_ROTR64_HI(x, 61) ^ ((x) >> 6)) +*/ + +#define BSG5_0(x) (FAST_ROTR64_LO(as_uint2(x), 28) ^ FAST_ROTR64_HI(as_uint2(x), 34) ^ FAST_ROTR64_HI(as_uint2(x), 39)) +#define BSG5_1(x) (FAST_ROTR64_LO(as_uint2(x), 14) ^ FAST_ROTR64_LO(as_uint2(x), 18) ^ FAST_ROTR64_HI(as_uint2(x), 41)) +#define SSG5_0(x) (FAST_ROTR64_LO(as_uint2(x), 1) ^ FAST_ROTR64_LO(as_uint2(x), 8) ^ ((x) >> 7)) +#define SSG5_1(x) (FAST_ROTR64_LO(as_uint2(x), 19) ^ FAST_ROTR64_HI(as_uint2(x), 61) ^ ((x) >> 6)) + +#define CH(X, Y, Z) bitselect(Z, Y, X) +#define MAJ(X, Y, Z) CH((X ^ Z), Y, Z) + +void SHA2_512_STEP2(const ulong *W, uint ord, ulong *r, int i) +{ + ulong T1; + int x = 8 - ord; + + ulong a = r[x & 7], b = r[(x + 1) & 7], c = r[(x + 2) & 7], d = r[(x + 3) & 7]; + ulong e = r[(x + 4) & 7], f = r[(x + 5) & 7], g = r[(x + 6) & 7], h = r[(x + 7) & 7]; + + T1 = h + BSG5_1(e) + CH(e, f, g) + W[i] + K512[i]; + r[(3 + x) & 7] = d + T1; + r[(7 + x) & 7] = T1 + BSG5_0(a) + MAJ(a, b, c); +} + +void SHA512Block(ulong *data, ulong *buf) +{ + ulong W[80], r[8]; + + for(int i = 0; i < 8; ++i) r[i] = buf[i]; + + for(int i = 0; i < 16; ++i) W[i] = data[i]; + + #pragma unroll 4 + for(int i = 16; i < 80; ++i) W[i] = SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]; + + #pragma unroll 1 + for(int i = 0; i < 80; i += 8) + { + #pragma unroll + for(int j = 0; j < 8; ++j) + { + SHA2_512_STEP2(W, j, r, i + j); + } + } + + for(int i = 0; i < 8; ++i) buf[i] += r[i]; +} diff --git a/miner.h b/miner.h index cb28f7c2..af9cdf90 100644 --- a/miner.h +++ b/miner.h @@ -716,6 +716,16 @@ static inline void flip80(void *dest_p, const void *src_p) dest[i] = swab32(src[i]); } +static inline void flip112(void *dest_p, const void *src_p) +{ + uint32_t *dest = (uint32_t *)dest_p; + const uint32_t *src = (uint32_t *)src_p; + int i; + + for (i = 0; i < 28; i++) + dest[i] = swab32(src[i]); +} + static inline void flip128(void *dest_p, const void *src_p) { uint32_t *dest = (uint32_t *)dest_p; diff --git a/ocl.c b/ocl.c index c34f191b..1dd74ee4 100644 --- a/ocl.c +++ b/ocl.c @@ -761,6 +761,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg size_t readbufsize = 128; if (algorithm->type == ALGO_CRE) readbufsize = 168; else if (algorithm->type == ALGO_DECRED) readbufsize = 192; + else if (algorithm->type == ALGO_LBRY) readbufsize = 112; if (algorithm->rw_buffer_size < 0) { // calc buffer size for neoscrypt diff --git a/sgminer.c b/sgminer.c index 6cb7944b..896f7abc 100644 --- a/sgminer.c +++ b/sgminer.c @@ -5627,6 +5627,9 @@ static void *stratum_sthread(void *userdata) else if (pool->algorithm.type == ALGO_DECRED) { nonce = *((uint32_t *)(work->data + 140)); } + else if (pool->algorithm.type == ALGO_LBRY) { + nonce = *((uint32_t *)(work->data + 108)); + } else { nonce = *((uint32_t *)(work->data + 76)); } @@ -7149,6 +7152,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) uint32_t nonce_pos = 76; if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140; else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140; + else if (work->pool->algorithm.type == ALGO_LBRY) nonce_pos = 108; uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos); diff --git a/sph/Makefile.am b/sph/Makefile.am index bc2f4b23..2c2a6950 100644 --- a/sph/Makefile.am +++ b/sph/Makefile.am @@ -1,3 +1,3 @@ noinst_LIBRARIES = libsph.a -libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c +libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c ripemd.c diff --git a/sph/ripemd.c b/sph/ripemd.c new file mode 100644 index 00000000..e242ac25 --- /dev/null +++ b/sph/ripemd.c @@ -0,0 +1,833 @@ +/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * RIPEMD-160 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_ripemd.h" + +/* + * Round functions for RIPEMD (original). + */ +#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +static const sph_u32 oIV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), + SPH_C32(0x98BADCFE), SPH_C32(0x10325476) +}; + +/* + * Round functions for RIPEMD-128 and RIPEMD-160. + */ +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +static const sph_u32 IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), + SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) +}; + +#define ROTL SPH_ROTL32 + +/* ===================================================================== */ +/* + * RIPEMD (original hash, deprecated). + */ + +#define FF1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) \ + + (X) + SPH_C32(0x5A827999)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x6ED9EBA1)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define FF2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) \ + + (X) + SPH_C32(0x50A28BE6)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x5C4DD124)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define RIPEMD_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + FF1(A1, B1, C1, D1, in( 0), 11); \ + FF1(D1, A1, B1, C1, in( 1), 14); \ + FF1(C1, D1, A1, B1, in( 2), 15); \ + FF1(B1, C1, D1, A1, in( 3), 12); \ + FF1(A1, B1, C1, D1, in( 4), 5); \ + FF1(D1, A1, B1, C1, in( 5), 8); \ + FF1(C1, D1, A1, B1, in( 6), 7); \ + FF1(B1, C1, D1, A1, in( 7), 9); \ + FF1(A1, B1, C1, D1, in( 8), 11); \ + FF1(D1, A1, B1, C1, in( 9), 13); \ + FF1(C1, D1, A1, B1, in(10), 14); \ + FF1(B1, C1, D1, A1, in(11), 15); \ + FF1(A1, B1, C1, D1, in(12), 6); \ + FF1(D1, A1, B1, C1, in(13), 7); \ + FF1(C1, D1, A1, B1, in(14), 9); \ + FF1(B1, C1, D1, A1, in(15), 8); \ + \ + GG1(A1, B1, C1, D1, in( 7), 7); \ + GG1(D1, A1, B1, C1, in( 4), 6); \ + GG1(C1, D1, A1, B1, in(13), 8); \ + GG1(B1, C1, D1, A1, in( 1), 13); \ + GG1(A1, B1, C1, D1, in(10), 11); \ + GG1(D1, A1, B1, C1, in( 6), 9); \ + GG1(C1, D1, A1, B1, in(15), 7); \ + GG1(B1, C1, D1, A1, in( 3), 15); \ + GG1(A1, B1, C1, D1, in(12), 7); \ + GG1(D1, A1, B1, C1, in( 0), 12); \ + GG1(C1, D1, A1, B1, in( 9), 15); \ + GG1(B1, C1, D1, A1, in( 5), 9); \ + GG1(A1, B1, C1, D1, in(14), 7); \ + GG1(D1, A1, B1, C1, in( 2), 11); \ + GG1(C1, D1, A1, B1, in(11), 13); \ + GG1(B1, C1, D1, A1, in( 8), 12); \ + \ + HH1(A1, B1, C1, D1, in( 3), 11); \ + HH1(D1, A1, B1, C1, in(10), 13); \ + HH1(C1, D1, A1, B1, in( 2), 14); \ + HH1(B1, C1, D1, A1, in( 4), 7); \ + HH1(A1, B1, C1, D1, in( 9), 14); \ + HH1(D1, A1, B1, C1, in(15), 9); \ + HH1(C1, D1, A1, B1, in( 8), 13); \ + HH1(B1, C1, D1, A1, in( 1), 15); \ + HH1(A1, B1, C1, D1, in(14), 6); \ + HH1(D1, A1, B1, C1, in( 7), 8); \ + HH1(C1, D1, A1, B1, in( 0), 13); \ + HH1(B1, C1, D1, A1, in( 6), 6); \ + HH1(A1, B1, C1, D1, in(11), 12); \ + HH1(D1, A1, B1, C1, in(13), 5); \ + HH1(C1, D1, A1, B1, in( 5), 7); \ + HH1(B1, C1, D1, A1, in(12), 5); \ + \ + FF2(A2, B2, C2, D2, in( 0), 11); \ + FF2(D2, A2, B2, C2, in( 1), 14); \ + FF2(C2, D2, A2, B2, in( 2), 15); \ + FF2(B2, C2, D2, A2, in( 3), 12); \ + FF2(A2, B2, C2, D2, in( 4), 5); \ + FF2(D2, A2, B2, C2, in( 5), 8); \ + FF2(C2, D2, A2, B2, in( 6), 7); \ + FF2(B2, C2, D2, A2, in( 7), 9); \ + FF2(A2, B2, C2, D2, in( 8), 11); \ + FF2(D2, A2, B2, C2, in( 9), 13); \ + FF2(C2, D2, A2, B2, in(10), 14); \ + FF2(B2, C2, D2, A2, in(11), 15); \ + FF2(A2, B2, C2, D2, in(12), 6); \ + FF2(D2, A2, B2, C2, in(13), 7); \ + FF2(C2, D2, A2, B2, in(14), 9); \ + FF2(B2, C2, D2, A2, in(15), 8); \ + \ + GG2(A2, B2, C2, D2, in( 7), 7); \ + GG2(D2, A2, B2, C2, in( 4), 6); \ + GG2(C2, D2, A2, B2, in(13), 8); \ + GG2(B2, C2, D2, A2, in( 1), 13); \ + GG2(A2, B2, C2, D2, in(10), 11); \ + GG2(D2, A2, B2, C2, in( 6), 9); \ + GG2(C2, D2, A2, B2, in(15), 7); \ + GG2(B2, C2, D2, A2, in( 3), 15); \ + GG2(A2, B2, C2, D2, in(12), 7); \ + GG2(D2, A2, B2, C2, in( 0), 12); \ + GG2(C2, D2, A2, B2, in( 9), 15); \ + GG2(B2, C2, D2, A2, in( 5), 9); \ + GG2(A2, B2, C2, D2, in(14), 7); \ + GG2(D2, A2, B2, C2, in( 2), 11); \ + GG2(C2, D2, A2, B2, in(11), 13); \ + GG2(B2, C2, D2, A2, in( 8), 12); \ + \ + HH2(A2, B2, C2, D2, in( 3), 11); \ + HH2(D2, A2, B2, C2, in(10), 13); \ + HH2(C2, D2, A2, B2, in( 2), 14); \ + HH2(B2, C2, D2, A2, in( 4), 7); \ + HH2(A2, B2, C2, D2, in( 9), 14); \ + HH2(D2, A2, B2, C2, in(15), 9); \ + HH2(C2, D2, A2, B2, in( 8), 13); \ + HH2(B2, C2, D2, A2, in( 1), 15); \ + HH2(A2, B2, C2, D2, in(14), 6); \ + HH2(D2, A2, B2, C2, in( 7), 8); \ + HH2(C2, D2, A2, B2, in( 0), 13); \ + HH2(B2, C2, D2, A2, in( 6), 6); \ + HH2(A2, B2, C2, D2, in(11), 12); \ + HH2(D2, A2, B2, C2, in(13), 5); \ + HH2(C2, D2, A2, B2, in( 5), 7); \ + HH2(B2, C2, D2, A2, in(12), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD. The data must be aligned for 32-bit access. + */ +static void +ripemd_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD_IN(x) X_var[x] + +#endif + RIPEMD_ROUND_BODY(RIPEMD_IN, r); +#undef RIPEMD_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd_init(void *cc) +{ + sph_ripemd_context *sc; + + sc = cc; + memcpy(sc->val, oIV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd_round +#define HASH ripemd +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd_close(void *cc, void *dst) +{ + ripemd_close(cc, dst, 4); + sph_ripemd_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD_IN(x) msg[x] + RIPEMD_ROUND_BODY(RIPEMD_IN, val); +#undef RIPEMD_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-128. + */ + +/* + * Round constants for RIPEMD-128. + */ +#define sK11 SPH_C32(0x00000000) +#define sK12 SPH_C32(0x5A827999) +#define sK13 SPH_C32(0x6ED9EBA1) +#define sK14 SPH_C32(0x8F1BBCDC) + +#define sK21 SPH_C32(0x50A28BE6) +#define sK22 SPH_C32(0x5C4DD124) +#define sK23 SPH_C32(0x6D703EF3) +#define sK24 SPH_C32(0x00000000) + +#define sRR(a, b, c, d, f, s, r, k) do { \ + a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \ + } while (0) + +#define sROUND1(a, b, c, d, f, s, r, k) \ + sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k) + +#define sROUND2(a, b, c, d, f, s, r, k) \ + sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k) + +/* + * This macro defines the body for a RIPEMD-128 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 4 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD128_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + sROUND1(A, B, C, D, F1, 11, in( 0), 1); \ + sROUND1(D, A, B, C, F1, 14, in( 1), 1); \ + sROUND1(C, D, A, B, F1, 15, in( 2), 1); \ + sROUND1(B, C, D, A, F1, 12, in( 3), 1); \ + sROUND1(A, B, C, D, F1, 5, in( 4), 1); \ + sROUND1(D, A, B, C, F1, 8, in( 5), 1); \ + sROUND1(C, D, A, B, F1, 7, in( 6), 1); \ + sROUND1(B, C, D, A, F1, 9, in( 7), 1); \ + sROUND1(A, B, C, D, F1, 11, in( 8), 1); \ + sROUND1(D, A, B, C, F1, 13, in( 9), 1); \ + sROUND1(C, D, A, B, F1, 14, in(10), 1); \ + sROUND1(B, C, D, A, F1, 15, in(11), 1); \ + sROUND1(A, B, C, D, F1, 6, in(12), 1); \ + sROUND1(D, A, B, C, F1, 7, in(13), 1); \ + sROUND1(C, D, A, B, F1, 9, in(14), 1); \ + sROUND1(B, C, D, A, F1, 8, in(15), 1); \ + \ + sROUND1(A, B, C, D, F2, 7, in( 7), 2); \ + sROUND1(D, A, B, C, F2, 6, in( 4), 2); \ + sROUND1(C, D, A, B, F2, 8, in(13), 2); \ + sROUND1(B, C, D, A, F2, 13, in( 1), 2); \ + sROUND1(A, B, C, D, F2, 11, in(10), 2); \ + sROUND1(D, A, B, C, F2, 9, in( 6), 2); \ + sROUND1(C, D, A, B, F2, 7, in(15), 2); \ + sROUND1(B, C, D, A, F2, 15, in( 3), 2); \ + sROUND1(A, B, C, D, F2, 7, in(12), 2); \ + sROUND1(D, A, B, C, F2, 12, in( 0), 2); \ + sROUND1(C, D, A, B, F2, 15, in( 9), 2); \ + sROUND1(B, C, D, A, F2, 9, in( 5), 2); \ + sROUND1(A, B, C, D, F2, 11, in( 2), 2); \ + sROUND1(D, A, B, C, F2, 7, in(14), 2); \ + sROUND1(C, D, A, B, F2, 13, in(11), 2); \ + sROUND1(B, C, D, A, F2, 12, in( 8), 2); \ + \ + sROUND1(A, B, C, D, F3, 11, in( 3), 3); \ + sROUND1(D, A, B, C, F3, 13, in(10), 3); \ + sROUND1(C, D, A, B, F3, 6, in(14), 3); \ + sROUND1(B, C, D, A, F3, 7, in( 4), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 9), 3); \ + sROUND1(D, A, B, C, F3, 9, in(15), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 8), 3); \ + sROUND1(B, C, D, A, F3, 15, in( 1), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 2), 3); \ + sROUND1(D, A, B, C, F3, 8, in( 7), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 0), 3); \ + sROUND1(B, C, D, A, F3, 6, in( 6), 3); \ + sROUND1(A, B, C, D, F3, 5, in(13), 3); \ + sROUND1(D, A, B, C, F3, 12, in(11), 3); \ + sROUND1(C, D, A, B, F3, 7, in( 5), 3); \ + sROUND1(B, C, D, A, F3, 5, in(12), 3); \ + \ + sROUND1(A, B, C, D, F4, 11, in( 1), 4); \ + sROUND1(D, A, B, C, F4, 12, in( 9), 4); \ + sROUND1(C, D, A, B, F4, 14, in(11), 4); \ + sROUND1(B, C, D, A, F4, 15, in(10), 4); \ + sROUND1(A, B, C, D, F4, 14, in( 0), 4); \ + sROUND1(D, A, B, C, F4, 15, in( 8), 4); \ + sROUND1(C, D, A, B, F4, 9, in(12), 4); \ + sROUND1(B, C, D, A, F4, 8, in( 4), 4); \ + sROUND1(A, B, C, D, F4, 9, in(13), 4); \ + sROUND1(D, A, B, C, F4, 14, in( 3), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 7), 4); \ + sROUND1(B, C, D, A, F4, 6, in(15), 4); \ + sROUND1(A, B, C, D, F4, 8, in(14), 4); \ + sROUND1(D, A, B, C, F4, 6, in( 5), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 6), 4); \ + sROUND1(B, C, D, A, F4, 12, in( 2), 4); \ + \ + sROUND2(A, B, C, D, F4, 8, in( 5), 1); \ + sROUND2(D, A, B, C, F4, 9, in(14), 1); \ + sROUND2(C, D, A, B, F4, 9, in( 7), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 0), 1); \ + sROUND2(A, B, C, D, F4, 13, in( 9), 1); \ + sROUND2(D, A, B, C, F4, 15, in( 2), 1); \ + sROUND2(C, D, A, B, F4, 15, in(11), 1); \ + sROUND2(B, C, D, A, F4, 5, in( 4), 1); \ + sROUND2(A, B, C, D, F4, 7, in(13), 1); \ + sROUND2(D, A, B, C, F4, 7, in( 6), 1); \ + sROUND2(C, D, A, B, F4, 8, in(15), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 8), 1); \ + sROUND2(A, B, C, D, F4, 14, in( 1), 1); \ + sROUND2(D, A, B, C, F4, 14, in(10), 1); \ + sROUND2(C, D, A, B, F4, 12, in( 3), 1); \ + sROUND2(B, C, D, A, F4, 6, in(12), 1); \ + \ + sROUND2(A, B, C, D, F3, 9, in( 6), 2); \ + sROUND2(D, A, B, C, F3, 13, in(11), 2); \ + sROUND2(C, D, A, B, F3, 15, in( 3), 2); \ + sROUND2(B, C, D, A, F3, 7, in( 7), 2); \ + sROUND2(A, B, C, D, F3, 12, in( 0), 2); \ + sROUND2(D, A, B, C, F3, 8, in(13), 2); \ + sROUND2(C, D, A, B, F3, 9, in( 5), 2); \ + sROUND2(B, C, D, A, F3, 11, in(10), 2); \ + sROUND2(A, B, C, D, F3, 7, in(14), 2); \ + sROUND2(D, A, B, C, F3, 7, in(15), 2); \ + sROUND2(C, D, A, B, F3, 12, in( 8), 2); \ + sROUND2(B, C, D, A, F3, 7, in(12), 2); \ + sROUND2(A, B, C, D, F3, 6, in( 4), 2); \ + sROUND2(D, A, B, C, F3, 15, in( 9), 2); \ + sROUND2(C, D, A, B, F3, 13, in( 1), 2); \ + sROUND2(B, C, D, A, F3, 11, in( 2), 2); \ + \ + sROUND2(A, B, C, D, F2, 9, in(15), 3); \ + sROUND2(D, A, B, C, F2, 7, in( 5), 3); \ + sROUND2(C, D, A, B, F2, 15, in( 1), 3); \ + sROUND2(B, C, D, A, F2, 11, in( 3), 3); \ + sROUND2(A, B, C, D, F2, 8, in( 7), 3); \ + sROUND2(D, A, B, C, F2, 6, in(14), 3); \ + sROUND2(C, D, A, B, F2, 6, in( 6), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 9), 3); \ + sROUND2(A, B, C, D, F2, 12, in(11), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 8), 3); \ + sROUND2(C, D, A, B, F2, 5, in(12), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 2), 3); \ + sROUND2(A, B, C, D, F2, 13, in(10), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 0), 3); \ + sROUND2(C, D, A, B, F2, 7, in( 4), 3); \ + sROUND2(B, C, D, A, F2, 5, in(13), 3); \ + \ + sROUND2(A, B, C, D, F1, 15, in( 8), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 6), 4); \ + sROUND2(C, D, A, B, F1, 8, in( 4), 4); \ + sROUND2(B, C, D, A, F1, 11, in( 1), 4); \ + sROUND2(A, B, C, D, F1, 14, in( 3), 4); \ + sROUND2(D, A, B, C, F1, 14, in(11), 4); \ + sROUND2(C, D, A, B, F1, 6, in(15), 4); \ + sROUND2(B, C, D, A, F1, 14, in( 0), 4); \ + sROUND2(A, B, C, D, F1, 6, in( 5), 4); \ + sROUND2(D, A, B, C, F1, 9, in(12), 4); \ + sROUND2(C, D, A, B, F1, 12, in( 2), 4); \ + sROUND2(B, C, D, A, F1, 9, in(13), 4); \ + sROUND2(A, B, C, D, F1, 12, in( 9), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 7), 4); \ + sROUND2(C, D, A, B, F1, 15, in(10), 4); \ + sROUND2(B, C, D, A, F1, 8, in(14), 4); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-128. The data must be aligned for 32-bit access. + */ +static void +ripemd128_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD128_IN(x) X_var[x] + +#endif + RIPEMD128_ROUND_BODY(RIPEMD128_IN, r); +#undef RIPEMD128_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_init(void *cc) +{ + sph_ripemd128_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd128_round +#define HASH ripemd128 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd128_close(void *cc, void *dst) +{ + ripemd128_close(cc, dst, 4); + sph_ripemd128_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD128_IN(x) msg[x] + RIPEMD128_ROUND_BODY(RIPEMD128_IN, val); +#undef RIPEMD128_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-160. + */ + +/* + * Round constants for RIPEMD-160. + */ +#define K11 SPH_C32(0x00000000) +#define K12 SPH_C32(0x5A827999) +#define K13 SPH_C32(0x6ED9EBA1) +#define K14 SPH_C32(0x8F1BBCDC) +#define K15 SPH_C32(0xA953FD4E) + +#define K21 SPH_C32(0x50A28BE6) +#define K22 SPH_C32(0x5C4DD124) +#define K23 SPH_C32(0x6D703EF3) +#define K24 SPH_C32(0x7A6D76E9) +#define K25 SPH_C32(0x00000000) + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ + c = ROTL(c, 10); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD160_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1, E1; \ + sph_u32 A2, B2, C2, D2, E2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \ + ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \ + ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \ + ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \ + ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \ + ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \ + ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \ + ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \ + ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \ + ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \ + ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \ + ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \ + ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \ + ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \ + ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \ + ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \ + ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \ + ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \ + ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \ + ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \ + ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \ + ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \ + ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \ + ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \ + ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \ + ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \ + ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \ + ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \ + ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \ + ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \ + ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \ + ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \ + ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \ + ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \ + ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \ + ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \ + ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \ + ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \ + ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \ + ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \ + ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \ + ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \ + ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \ + ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \ + ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \ + ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \ + ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \ + ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \ + ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \ + ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \ + ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \ + ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \ + ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \ + ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \ + ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \ + ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \ + ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \ + ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \ + ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \ + ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \ + ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \ + ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \ + ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \ + ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \ + ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \ + ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \ + ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \ + ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \ + ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \ + ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \ + ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \ + ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \ + ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \ + ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \ + ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \ + ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \ + ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \ + ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \ + ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \ + ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \ + ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \ + ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \ + ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \ + ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \ + ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \ + ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \ + ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \ + ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \ + ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \ + ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \ + ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \ + ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \ + ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \ + ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \ + ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \ + ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \ + ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \ + ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \ + ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \ + ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \ + ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \ + ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \ + ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \ + ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \ + ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \ + ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \ + ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \ + ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \ + ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \ + ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \ + ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \ + ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \ + ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \ + ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \ + ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \ + ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \ + ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \ + ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \ + ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \ + ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \ + ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \ + ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \ + ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \ + ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \ + ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \ + ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + E2); \ + (h)[2] = SPH_T32((h)[3] + E1 + A2); \ + (h)[3] = SPH_T32((h)[4] + A1 + B2); \ + (h)[4] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-160. The data must be aligned for 32-bit access. + */ +static void +ripemd160_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD160_IN(x) X_var[x] + +#endif + RIPEMD160_ROUND_BODY(RIPEMD160_IN, r); +#undef RIPEMD160_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_init(void *cc) +{ + sph_ripemd160_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd160_round +#define HASH ripemd160 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd160_close(void *cc, void *dst) +{ + ripemd160_close(cc, dst, 5); + sph_ripemd160_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]) +{ +#define RIPEMD160_IN(x) msg[x] + RIPEMD160_ROUND_BODY(RIPEMD160_IN, val); +#undef RIPEMD160_IN +} diff --git a/sph/sph_ripemd.h b/sph/sph_ripemd.h new file mode 100644 index 00000000..25677683 --- /dev/null +++ b/sph/sph_ripemd.h @@ -0,0 +1,273 @@ +/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * RIPEMD, RIPEMD-128 and RIPEMD-160 interface. + * + * RIPEMD was first described in: Research and Development in Advanced + * Communication Technologies in Europe, "RIPE Integrity Primitives: + * Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE, + * June 1992. + * + * A new, strengthened version, dubbed RIPEMD-160, was published in: H. + * Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened + * version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039, + * Springer (1996), pp. 71--82. + * + * This article describes both RIPEMD-160, with a 160-bit output, and a + * reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128 + * was meant as a "drop-in" replacement for any hash function with 128-bit + * output, especially the original RIPEMD. + * + * @warning Collisions, and an efficient method to build other collisions, + * have been published for the original RIPEMD, which is thus considered as + * cryptographically broken. It is also very rarely encountered, and there + * seems to exist no free description or implementation of RIPEMD (except + * the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160 + * seem as secure as their output length allows. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_ripemd.h + * @author Thomas Pornin + */ + +#ifndef SPH_RIPEMD_H__ +#define SPH_RIPEMD_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for RIPEMD. + */ +#define SPH_SIZE_ripemd 128 + +/** + * Output size (in bits) for RIPEMD-128. + */ +#define SPH_SIZE_ripemd128 128 + +/** + * Output size (in bits) for RIPEMD-160. + */ +#define SPH_SIZE_ripemd160 160 + +/** + * This structure is a context for RIPEMD computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd_context; + +/** + * Initialize a RIPEMD context. This process performs no memory allocation. + * + * @param cc the RIPEMD context (pointer to + * a sph_ripemd_context) + */ +void sph_ripemd_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD context + * @param dst the destination buffer + */ +void sph_ripemd_close(void *cc, void *dst); + +/** + * Apply the RIPEMD compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-128 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-128 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-128 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd128_context; + +/** + * Initialize a RIPEMD-128 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-128 context (pointer to + * a sph_ripemd128_context) + */ +void sph_ripemd128_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-128 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd128(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-128 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-128 context + * @param dst the destination buffer + */ +void sph_ripemd128_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-128 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-160 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-160 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-160 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[5]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd160_context; + +/** + * Initialize a RIPEMD-160 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-160 context (pointer to + * a sph_ripemd160_context) + */ +void sph_ripemd160_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-160 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd160(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-160 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-160 context + * @param dst the destination buffer + */ +void sph_ripemd160_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-160 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 160-bit input and output + */ +void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]); + +#endif diff --git a/util.c b/util.c index 8bf6e22e..b965b54c 100644 --- a/util.c +++ b/util.c @@ -1505,34 +1505,42 @@ static char *blank_merkel = "000000000000000000000000000000000000000000000000000 static bool parse_notify(struct pool *pool, json_t *val) { char *job_id, *prev_hash, *coinbase1, *coinbase2, *bbversion, *nbit, - *ntime, *header; + *ntime, *header, *trie; size_t cb1_len, cb2_len, alloc_len, header_len; unsigned char *cb1, *cb2; - bool clean, ret = false; - int merkles, i; + bool clean, ret = false, has_trie = false; + int merkles, i = 0; json_t *arr; - arr = json_array_get(val, 4); + has_trie = json_array_size(val) == 10; + + job_id = json_array_string(val, i++); + prev_hash = json_array_string(val, i++); + if (has_trie) { + trie = json_array_string(val, i++); + } + coinbase1 = json_array_string(val, i++); + coinbase2 = json_array_string(val, i++); + + arr = json_array_get(val, i++); if (!arr || !json_is_array(arr)) goto out; merkles = json_array_size(arr); - job_id = json_array_string(val, 0); - prev_hash = json_array_string(val, 1); - coinbase1 = json_array_string(val, 2); - coinbase2 = json_array_string(val, 3); - bbversion = json_array_string(val, 5); - nbit = json_array_string(val, 6); - ntime = json_array_string(val, 7); - clean = json_is_true(json_array_get(val, 8)); + bbversion = json_array_string(val, i++); + nbit = json_array_string(val, i++); + ntime = json_array_string(val, i++); + clean = json_is_true(json_array_get(val, i)); - if (!job_id || !prev_hash || !coinbase1 || !coinbase2 || !bbversion || !nbit || !ntime) { + if (!job_id || !prev_hash || !coinbase1 || !coinbase2 || !bbversion || !nbit || !ntime || (has_trie && !trie)) { /* Annoying but we must not leak memory */ if (job_id) free(job_id); if (prev_hash) free(prev_hash); + if (trie) + free(trie); if (coinbase1) free(coinbase1); if (coinbase2) @@ -1589,10 +1597,11 @@ static bool parse_notify(struct pool *pool, json_t *val) pool->merkle_offset /= 2; header = (char *)alloca(257); snprintf(header, 257, - "%s%s%s%s%s%s", + "%s%s%s%s%s%s%s", pool->swork.bbversion, pool->swork.prev_hash, blank_merkel, + has_trie ? trie : "", pool->swork.ntime, pool->swork.nbit, "00000000" /* nonce */