lbry algo (stratum only)
Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
This commit is contained in:
parent
dbb9507d2b
commit
5703a74e22
@ -50,6 +50,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
||||
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
|
||||
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
||||
sph/ripemd.c sph/sph_sha2.c \
|
||||
lbry/lbry.cu lbry/cuda_ripemd160.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu \
|
||||
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
|
||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
ccMiner preview 1.8-dev (May 2016) "Pascal and x11evo algo"
|
||||
ccMiner 1.8 Preview (July 2016) "CUDA 8, lbry and x11evo algos"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
@ -28,6 +28,7 @@ Decred (Blake256 14-rounds - 180 bytes)
|
||||
HeavyCoin & MjollnirCoin
|
||||
FugueCoin
|
||||
GroestlCoin & Myriad-Groestl
|
||||
Lbry Credits
|
||||
JackpotCoin
|
||||
QuarkCoin family & AnimeCoin
|
||||
TalkCoin
|
||||
@ -35,13 +36,13 @@ DarkCoin and other X11 coins
|
||||
Chaincoin and Flaxscript (C11)
|
||||
Saffroncoin blake (256 14-rounds)
|
||||
BlakeCoin (256 8-rounds)
|
||||
Midnight (BMW 256)
|
||||
Qubit (Digibyte, ...)
|
||||
Luffa (Joincoin)
|
||||
Keccak (Maxcoin)
|
||||
Pentablake (Blake 512 x5)
|
||||
1Coin Triple S
|
||||
Neoscrypt (FeatherCoin)
|
||||
Revolver (X11evo)
|
||||
Scrypt and Scrypt:N
|
||||
Scrypt-Jane (Chacha)
|
||||
Sibcoin (sib)
|
||||
@ -82,6 +83,7 @@ its command line interface and options.
|
||||
heavy use to mine Heavycoin
|
||||
jackpot use to mine Jackpotcoin
|
||||
keccak use to mine Maxcoin
|
||||
lbry use to mine LBRY Credits
|
||||
luffa use to mine Joincoin
|
||||
lyra2 use to mine Vertcoin
|
||||
mjollnir use to mine Mjollnircoin
|
||||
@ -151,6 +153,7 @@ its command line interface and options.
|
||||
--max-diff=N Only mine if net difficulty is less than specified value
|
||||
--pstate=0 will force the Geforce 9xx to run in P0 P-State
|
||||
--plimit=150W set the gpu power limit, allow multiple values for N cards
|
||||
--tlimit=85 Set the gpu thermal limit (windows only)
|
||||
--keep-clocks prevent reset clocks and/or power limit on exit
|
||||
--show-diff display submitted block and net difficulty
|
||||
-B, --background run the miner in the background
|
||||
@ -242,6 +245,7 @@ features.
|
||||
|
||||
July 2016 v1.8.0
|
||||
Pascal support with cuda 8
|
||||
lbry new multi sha / ripemd algo (LBC)
|
||||
x11evo algo (XRE)
|
||||
Lyra2v2, Neoscrypt and Decred improvements
|
||||
Enhance windows NVAPI clock and power limits
|
||||
|
2
algos.h
2
algos.h
@ -19,6 +19,7 @@ enum sha_algos {
|
||||
ALGO_HEAVY, /* Heavycoin hash */
|
||||
ALGO_KECCAK,
|
||||
ALGO_JACKPOT,
|
||||
ALGO_LBRY,
|
||||
ALGO_LUFFA,
|
||||
ALGO_LYRA2,
|
||||
ALGO_LYRA2v2,
|
||||
@ -67,6 +68,7 @@ static const char *algo_names[] = {
|
||||
"heavy",
|
||||
"keccak",
|
||||
"jackpot",
|
||||
"lbry",
|
||||
"luffa",
|
||||
"lyra2",
|
||||
"lyra2v2",
|
||||
|
25
ccminer.cpp
25
ccminer.cpp
@ -228,6 +228,7 @@ Options:\n\
|
||||
heavy Heavycoin\n\
|
||||
jackpot Jackpot\n\
|
||||
keccak Keccak-256 (Maxcoin)\n\
|
||||
lbry LBRY Credits (Sha/Ripemd)\n\
|
||||
luffa Joincoin\n\
|
||||
lyra2 LyraBar\n\
|
||||
lyra2v2 VertCoin\n\
|
||||
@ -567,6 +568,7 @@ static void calc_network_diff(struct work *work)
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = have_longpoll ? work->data[18] : swab32(work->data[18]);
|
||||
if (opt_algo == ALGO_LBRY) nbits = swab32(work->data[26]);
|
||||
if (opt_algo == ALGO_DECRED) nbits = work->data[29];
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
@ -837,6 +839,11 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
||||
le32enc(&nonce, work->data[19]);
|
||||
be16enc(&nvote, *((uint16_t*)&work->data[20]));
|
||||
break;
|
||||
case ALGO_LBRY:
|
||||
check_dups = true;
|
||||
le32enc(&ntime, work->data[25]);
|
||||
le32enc(&nonce, work->data[27]);
|
||||
break;
|
||||
case ALGO_ZR5:
|
||||
check_dups = true;
|
||||
be32enc(&ntime, work->data[17]);
|
||||
@ -1296,6 +1303,8 @@ bool get_work(struct thr_info *thr, struct work *work)
|
||||
memset(work->data + 19, 0x00, 52);
|
||||
if (opt_algo == ALGO_DECRED) {
|
||||
memset(&work->data[35], 0x00, 52);
|
||||
} else if (opt_algo == ALGO_LBRY) {
|
||||
work->data[28] = 0x80000000;
|
||||
} else {
|
||||
work->data[20] = 0x80000000;
|
||||
work->data[31] = 0x00000280;
|
||||
@ -1441,6 +1450,14 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
work->data[37] = (rand()*4) << 8; // random work data
|
||||
sctx->job.height = work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
} else if (opt_algo == ALGO_LBRY) {
|
||||
for (i = 0; i < 8; i++)
|
||||
work->data[9 + i] = be32dec((uint32_t *)merkle_root + i);
|
||||
for (i = 0; i < 8; i++)
|
||||
work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
work->data[25] = le32dec(sctx->job.ntime);
|
||||
work->data[26] = le32dec(sctx->job.nbits);
|
||||
work->data[28] = 0x80000000;
|
||||
} else {
|
||||
for (i = 0; i < 8; i++)
|
||||
work->data[9 + i] = be32dec((uint32_t *)merkle_root + i);
|
||||
@ -1498,6 +1515,7 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
case ALGO_FRESH:
|
||||
case ALGO_FUGUE256:
|
||||
case ALGO_GROESTL:
|
||||
case ALGO_LBRY:
|
||||
case ALGO_LYRA2v2:
|
||||
work_set_target(work, sctx->job.diff / (256.0 * opt_difficulty));
|
||||
break;
|
||||
@ -1658,6 +1676,7 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
// &work.data[19]
|
||||
int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
|
||||
if (opt_algo == ALGO_LBRY) wcmplen = 108;
|
||||
int wcmpoft = 0;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
|
||||
@ -1910,6 +1929,7 @@ static void *miner_thread(void *userdata)
|
||||
minmax = 0x40000000U;
|
||||
break;
|
||||
case ALGO_KECCAK:
|
||||
case ALGO_LBRY:
|
||||
case ALGO_LUFFA:
|
||||
case ALGO_SKEIN:
|
||||
case ALGO_SKEIN2:
|
||||
@ -2035,6 +2055,9 @@ static void *miner_thread(void *userdata)
|
||||
case ALGO_JACKPOT:
|
||||
rc = scanhash_jackpot(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
case ALGO_LBRY:
|
||||
rc = scanhash_lbry(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
case ALGO_LUFFA:
|
||||
rc = scanhash_luffa(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
@ -2130,7 +2153,7 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
// todo: update all algos to use work->nonces
|
||||
work.nonces[0] = nonceptr[0];
|
||||
if (opt_algo != ALGO_DECRED && opt_algo != ALGO_BLAKE2S) {
|
||||
if (opt_algo != ALGO_DECRED && opt_algo != ALGO_BLAKE2S && opt_algo != ALGO_LBRY) {
|
||||
work.nonces[1] = nonceptr[2];
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@
|
||||
<MaxRegCount>80</MaxRegCount>
|
||||
<PtxAsOptionV>true</PtxAsOptionV>
|
||||
<Keep>true</Keep>
|
||||
<CodeGeneration>compute_50,sm_50</CodeGeneration>
|
||||
<CodeGeneration>compute_61,sm_61;compute_52,sm_52</CodeGeneration>
|
||||
<Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
@ -273,15 +273,11 @@
|
||||
<ClCompile Include="sph\fugue.c" />
|
||||
<ClCompile Include="sph\groestl.c" />
|
||||
<ClCompile Include="sph\haval.c" />
|
||||
<ClCompile Include="sph\haval_helper.c">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\jh.c" />
|
||||
<ClCompile Include="sph\keccak.c" />
|
||||
<ClCompile Include="sph\luffa.c" />
|
||||
<ClCompile Include="sph\md_helper.c">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\ripemd.c" />
|
||||
<ClCompile Include="sph\sph_sha2.c" />
|
||||
<ClCompile Include="sph\sha2.c" />
|
||||
<ClCompile Include="sph\sha2big.c" />
|
||||
<ClCompile Include="sph\shabal.c" />
|
||||
@ -428,6 +424,10 @@
|
||||
<MaxRegCount>92</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="Algo256\cuda_skein256.cu" />
|
||||
<CudaCompile Include="lbry\cuda_ripemd160.cu" />
|
||||
<CudaCompile Include="lbry\cuda_sha256_lbry.cu" />
|
||||
<CudaCompile Include="lbry\cuda_sha512_lbry.cu" />
|
||||
<CudaCompile Include="lbry\lbry.cu" />
|
||||
<CudaCompile Include="pentablake.cu">
|
||||
<MaxRegCount>80</MaxRegCount>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
|
||||
|
@ -82,6 +82,9 @@
|
||||
<Filter Include="Source Files\CUDA\lyra2">
|
||||
<UniqueIdentifier>{1613763f-895c-4321-b58b-6f5849868956}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\lbry">
|
||||
<UniqueIdentifier>{3079ea1f-f768-455a-acd6-f517fac535b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="compat\jansson\dump.c">
|
||||
@ -150,9 +153,6 @@
|
||||
<ClCompile Include="sph\luffa.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\md_helper.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\sha2.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
@ -180,7 +180,10 @@
|
||||
<ClCompile Include="sph\haval.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\haval_helper.c">
|
||||
<ClCompile Include="sph\ripemd.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\sph_sha2.c">
|
||||
<Filter>Source Files\sph</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="sph\sha2big.c">
|
||||
@ -721,6 +724,18 @@
|
||||
<CudaCompile Include="Algo256\blake2s.cu">
|
||||
<Filter>Source Files\CUDA\Algo256</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="lbry\cuda_sha256_lbry.cu">
|
||||
<Filter>Source Files\CUDA\lbry</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="lbry\cuda_sha512_lbry.cu">
|
||||
<Filter>Source Files\CUDA\lbry</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="lbry\cuda_ripemd160.cu">
|
||||
<Filter>Source Files\CUDA\lbry</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="lbry\lbry.cu">
|
||||
<Filter>Source Files\CUDA\lbry</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Image Include="res\ccminer.ico">
|
||||
|
441
lbry/cuda_ripemd160.cu
Normal file
441
lbry/cuda_ripemd160.cu
Normal file
@ -0,0 +1,441 @@
|
||||
/*
|
||||
* ripemd-160 kernel implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2014, 2016 djm34, tpruvot
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include <cuda_helper.h>
|
||||
|
||||
static __constant__ uint32_t c_IV[5] = {
|
||||
0x67452301u, 0xEFCDAB89u, 0x98BADCFEu, 0x10325476u, 0xC3D2E1F0u
|
||||
};
|
||||
|
||||
//__host__
|
||||
//uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c) {
|
||||
// return c ^ (a | !b);
|
||||
//}
|
||||
|
||||
__forceinline__ __device__
|
||||
uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("{ .reg .u64 m,n; // xornot64\n\t"
|
||||
"not.b64 m,%2; \n\t"
|
||||
"or.b64 n, %1,m;\n\t"
|
||||
"xor.b64 %0, n,%3;\n\t"
|
||||
"}\n\t"
|
||||
: "=l"(result) : "l"(a), "l"(b), "l"(c));
|
||||
return result;
|
||||
}
|
||||
|
||||
//__host__
|
||||
//uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c) {
|
||||
// return a ^ (b | !c);
|
||||
//}
|
||||
|
||||
__device__ __forceinline__
|
||||
uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("{ .reg .u64 m,n; // xornt64\n\t"
|
||||
"not.b64 m,%3; \n\t"
|
||||
"or.b64 n, %2,m;\n\t"
|
||||
"xor.b64 %0, %1,n;\n\t"
|
||||
"}\n\t"
|
||||
: "=l"(result) : "l"(a), "l"(b), "l"(c));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||
*/
|
||||
#if 1
|
||||
#define F1(x, y, z) ((x) ^ (y) ^ (z))
|
||||
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
|
||||
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
|
||||
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
|
||||
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
|
||||
#else
|
||||
#define F1(x, y, z) xor3(x,y,z)
|
||||
#define F2(x, y, z) xandx(x,y,z)
|
||||
#define F3(x, y, z) xornot64(x,y,z)
|
||||
#define F4(x, y, z) xandx(z,x,y)
|
||||
#define F5(x, y, z) xornt64(x,y,z)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-160.
|
||||
*/
|
||||
#define K11 0x00000000u
|
||||
#define K12 0x5A827999u
|
||||
#define K13 0x6ED9EBA1u
|
||||
#define K14 0x8F1BBCDCu
|
||||
#define K15 0xA953FD4Eu
|
||||
|
||||
#define K21 0x50A28BE6u
|
||||
#define K22 0x5C4DD124u
|
||||
#define K23 0x6D703EF3u
|
||||
#define K24 0x7A6D76E9u
|
||||
#define K25 0x00000000u
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) { \
|
||||
a = SPH_T32(ROTL32(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
|
||||
c = ROTL32(c, 10); \
|
||||
}
|
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
#define RIPEMD160_ROUND_BODY(in, h) { \
|
||||
uint32_t A1, B1, C1, D1, E1; \
|
||||
uint32_t A2, B2, C2, D2, E2; \
|
||||
uint32_t tmp; \
|
||||
\
|
||||
A1 = A2 = h[0]; \
|
||||
B1 = B2 = h[1]; \
|
||||
C1 = C2 = h[2]; \
|
||||
D1 = D2 = h[3]; \
|
||||
E1 = E2 = h[4]; \
|
||||
\
|
||||
ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \
|
||||
\
|
||||
ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \
|
||||
\
|
||||
ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \
|
||||
\
|
||||
ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \
|
||||
\
|
||||
ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \
|
||||
\
|
||||
ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \
|
||||
\
|
||||
ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \
|
||||
\
|
||||
ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \
|
||||
\
|
||||
ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \
|
||||
\
|
||||
ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \
|
||||
\
|
||||
tmp = (h[1] + C1 + D2); \
|
||||
h[1] = (h[2] + D1 + E2); \
|
||||
h[2] = (h[3] + E1 + A2); \
|
||||
h[3] = (h[4] + A1 + B2); \
|
||||
h[4] = (h[0] + B1 + C2); \
|
||||
h[0] = tmp; \
|
||||
}
|
||||
|
||||
#if 0
|
||||
__global__
|
||||
void lbry_ripemd160_gpu_hash_32(const uint32_t threads, uint64_t *g_hash, const uint32_t byteOffset)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t *hash = (uint32_t*) (&g_hash[thread * 8U + byteOffset/8]);
|
||||
|
||||
uint32_t in[16];
|
||||
for (int i=0; i<8; i++)
|
||||
in[i] = (hash[i]);
|
||||
in[8] = 0x80;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=9;i<16;i++) in[i] = 0;
|
||||
|
||||
in[14] = 0x100; // size in bits
|
||||
|
||||
uint32_t h[5];
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
h[i] = c_IV[i];
|
||||
|
||||
RIPEMD160_ROUND_BODY(in, h);
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
hash[i] = h[i];
|
||||
|
||||
#ifdef PAD_ZEROS
|
||||
// 20 bytes hash on 32 or 64 bytes output space
|
||||
hash[5] = 0;
|
||||
hash[6] = 0;
|
||||
hash[7] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_ripemd160_hash_32(int thr_id, uint32_t threads, uint32_t *g_Hash, uint32_t byteOffset, cudaStream_t stream)
|
||||
{
|
||||
const uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_ripemd160_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) g_Hash, byteOffset);
|
||||
}
|
||||
#endif
|
||||
|
||||
__global__
|
||||
//__launch_bounds__(256,6)
|
||||
void lbry_ripemd160_gpu_hash_32x2(const uint32_t threads, uint64_t *g_hash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t *hash = (uint32_t*) (&g_hash[thread * 8U]);
|
||||
|
||||
uint32_t in[16];
|
||||
for (int i=0; i<8; i++)
|
||||
in[i] = (hash[i]);
|
||||
in[8] = 0x80;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=9;i<16;i++) in[i] = 0;
|
||||
|
||||
in[14] = 0x100; // size in bits
|
||||
|
||||
uint32_t h[5];
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
h[i] = c_IV[i];
|
||||
|
||||
RIPEMD160_ROUND_BODY(in, h);
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
hash[i] = h[i];
|
||||
|
||||
#ifdef PAD_ZEROS
|
||||
// 20 bytes hash on 32 output space
|
||||
hash[5] = 0;
|
||||
hash[6] = 0;
|
||||
hash[7] = 0;
|
||||
#endif
|
||||
// second 32 bytes block hash
|
||||
hash += 8;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<8; i++)
|
||||
in[i] = (hash[i]);
|
||||
in[8] = 0x80;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=9;i<16;i++) in[i] = 0;
|
||||
|
||||
in[14] = 0x100; // size in bits
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
h[i] = c_IV[i];
|
||||
|
||||
RIPEMD160_ROUND_BODY(in, h);
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++)
|
||||
hash[i] = h[i];
|
||||
|
||||
#ifdef PAD_ZEROS
|
||||
// 20 bytes hash on 32 output space
|
||||
hash[5] = 0;
|
||||
hash[6] = 0;
|
||||
hash[7] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream)
|
||||
{
|
||||
const uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_ripemd160_gpu_hash_32x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) g_Hash);
|
||||
}
|
||||
|
||||
void lbry_ripemd160_init(int thr_id)
|
||||
{
|
||||
//cudaMemcpyToSymbol(c_IV, IV, sizeof(IV), 0, cudaMemcpyHostToDevice);
|
||||
}
|
712
lbry/cuda_sha256_lbry.cu
Normal file
712
lbry/cuda_sha256_lbry.cu
Normal file
@ -0,0 +1,712 @@
|
||||
/*
|
||||
* sha256 CUDA implementation.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include <cuda_helper.h>
|
||||
#include <miner.h>
|
||||
|
||||
__constant__ static uint32_t __align__(8) c_midstate112[8];
|
||||
__constant__ static uint32_t __align__(8) c_dataEnd112[12];
|
||||
|
||||
const __constant__ uint32_t __align__(8) c_H256[8] = {
|
||||
0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU,
|
||||
0x510E527FU, 0x9B05688CU, 0x1F83D9ABU, 0x5BE0CD19U
|
||||
};
|
||||
__constant__ static uint32_t __align__(8) c_K[64];
|
||||
|
||||
static __thread uint32_t* d_resNonces;
|
||||
__constant__ static uint32_t __align__(8) c_target[2];
|
||||
__device__ uint64_t d_target[1];
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
static const uint32_t cpu_H256[8] = {
|
||||
0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU,
|
||||
0x510E527FU, 0x9B05688CU, 0x1F83D9ABU, 0x5BE0CD19U
|
||||
};
|
||||
|
||||
static const uint32_t cpu_K[64] = {
|
||||
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
|
||||
0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
|
||||
0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
|
||||
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
|
||||
0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
|
||||
0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
|
||||
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
|
||||
0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
|
||||
};
|
||||
|
||||
#define ROTR ROTR32
|
||||
|
||||
__host__
|
||||
static void sha256_step1_host(uint32_t a, uint32_t b, uint32_t c, uint32_t &d,
|
||||
uint32_t e, uint32_t f, uint32_t g, uint32_t &h,
|
||||
uint32_t in, const uint32_t Kshared)
|
||||
{
|
||||
uint32_t t1,t2;
|
||||
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
|
||||
uint32_t bsg21 = ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
|
||||
uint32_t bsg20 = ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
|
||||
uint32_t andorv = ((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
|
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in;
|
||||
t2 = bsg20 + andorv;
|
||||
d = d + t1;
|
||||
h = t1 + t2;
|
||||
}
|
||||
|
||||
__host__
|
||||
static void sha256_step2_host(uint32_t a, uint32_t b, uint32_t c, uint32_t &d,
|
||||
uint32_t e, uint32_t f, uint32_t g, uint32_t &h,
|
||||
uint32_t* in, uint32_t pc, const uint32_t Kshared)
|
||||
{
|
||||
uint32_t t1,t2;
|
||||
|
||||
int pcidx1 = (pc-2) & 0xF;
|
||||
int pcidx2 = (pc-7) & 0xF;
|
||||
int pcidx3 = (pc-15) & 0xF;
|
||||
|
||||
uint32_t inx0 = in[pc];
|
||||
uint32_t inx1 = in[pcidx1];
|
||||
uint32_t inx2 = in[pcidx2];
|
||||
uint32_t inx3 = in[pcidx3];
|
||||
|
||||
uint32_t ssg21 = ROTR(inx1, 17) ^ ROTR(inx1, 19) ^ SPH_T32((inx1) >> 10); //ssg2_1(inx1);
|
||||
uint32_t ssg20 = ROTR(inx3, 7) ^ ROTR(inx3, 18) ^ SPH_T32((inx3) >> 3); //ssg2_0(inx3);
|
||||
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
|
||||
uint32_t bsg21 = ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
|
||||
uint32_t bsg20 = ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
|
||||
uint32_t andorv = ((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
|
||||
|
||||
in[pc] = ssg21 + inx2 + ssg20 + inx0;
|
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in[pc];
|
||||
t2 = bsg20 + andorv;
|
||||
d = d + t1;
|
||||
h = t1 + t2;
|
||||
}
|
||||
|
||||
__host__
|
||||
static void sha256_round_body_host(uint32_t* in, uint32_t* state, const uint32_t* Kshared)
|
||||
{
|
||||
uint32_t a = state[0];
|
||||
uint32_t b = state[1];
|
||||
uint32_t c = state[2];
|
||||
uint32_t d = state[3];
|
||||
uint32_t e = state[4];
|
||||
uint32_t f = state[5];
|
||||
uint32_t g = state[6];
|
||||
uint32_t h = state[7];
|
||||
|
||||
sha256_step1_host(a,b,c,d,e,f,g,h,in[0], Kshared[0]);
|
||||
sha256_step1_host(h,a,b,c,d,e,f,g,in[1], Kshared[1]);
|
||||
sha256_step1_host(g,h,a,b,c,d,e,f,in[2], Kshared[2]);
|
||||
sha256_step1_host(f,g,h,a,b,c,d,e,in[3], Kshared[3]);
|
||||
sha256_step1_host(e,f,g,h,a,b,c,d,in[4], Kshared[4]);
|
||||
sha256_step1_host(d,e,f,g,h,a,b,c,in[5], Kshared[5]);
|
||||
sha256_step1_host(c,d,e,f,g,h,a,b,in[6], Kshared[6]);
|
||||
sha256_step1_host(b,c,d,e,f,g,h,a,in[7], Kshared[7]);
|
||||
sha256_step1_host(a,b,c,d,e,f,g,h,in[8], Kshared[8]);
|
||||
sha256_step1_host(h,a,b,c,d,e,f,g,in[9], Kshared[9]);
|
||||
sha256_step1_host(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
|
||||
sha256_step1_host(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
|
||||
sha256_step1_host(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
|
||||
sha256_step1_host(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
|
||||
sha256_step1_host(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
|
||||
sha256_step1_host(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
|
||||
|
||||
for (int i=0; i<3; i++)
|
||||
{
|
||||
sha256_step2_host(a,b,c,d,e,f,g,h,in,0, Kshared[16+16*i]);
|
||||
sha256_step2_host(h,a,b,c,d,e,f,g,in,1, Kshared[17+16*i]);
|
||||
sha256_step2_host(g,h,a,b,c,d,e,f,in,2, Kshared[18+16*i]);
|
||||
sha256_step2_host(f,g,h,a,b,c,d,e,in,3, Kshared[19+16*i]);
|
||||
sha256_step2_host(e,f,g,h,a,b,c,d,in,4, Kshared[20+16*i]);
|
||||
sha256_step2_host(d,e,f,g,h,a,b,c,in,5, Kshared[21+16*i]);
|
||||
sha256_step2_host(c,d,e,f,g,h,a,b,in,6, Kshared[22+16*i]);
|
||||
sha256_step2_host(b,c,d,e,f,g,h,a,in,7, Kshared[23+16*i]);
|
||||
sha256_step2_host(a,b,c,d,e,f,g,h,in,8, Kshared[24+16*i]);
|
||||
sha256_step2_host(h,a,b,c,d,e,f,g,in,9, Kshared[25+16*i]);
|
||||
sha256_step2_host(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
|
||||
sha256_step2_host(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
|
||||
sha256_step2_host(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
|
||||
sha256_step2_host(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
|
||||
sha256_step2_host(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
|
||||
sha256_step2_host(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
|
||||
}
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
state[5] += f;
|
||||
state[6] += g;
|
||||
state[7] += h;
|
||||
}
|
||||
|
||||
__device__ __forceinline__
|
||||
uint32_t xor3b(const uint32_t a, const uint32_t b, const uint32_t c) {
|
||||
uint32_t result;
|
||||
#if __CUDA_ARCH__ >= 500 && CUDA_VERSION >= 7050
|
||||
asm ("lop3.b32 %0, %1, %2, %3, 0x96; // xor3b" //0x96 = 0xF0 ^ 0xCC ^ 0xAA
|
||||
: "=r"(result) : "r"(a), "r"(b),"r"(c));
|
||||
#else
|
||||
result = a^b^c;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
__device__ __forceinline__
|
||||
uint32_t xor3b(const uint32_t a, const uint32_t b, const uint32_t c) {
|
||||
uint32_t result;
|
||||
asm("{ .reg .u32 t1; // xor3b \n\t"
|
||||
"xor.b32 t1, %2, %3;\n\t"
|
||||
"xor.b32 %0, %1, t1;"
|
||||
"}"
|
||||
: "=r"(result) : "r"(a) ,"r"(b),"r"(c));
|
||||
return result;
|
||||
}
|
||||
#define xor3b(a,b,c) (a ^ b ^ c)
|
||||
*/
|
||||
|
||||
__device__ __forceinline__ uint32_t bsg2_0(const uint32_t x)
|
||||
{
|
||||
uint32_t r1 = ROTR32(x,2);
|
||||
uint32_t r2 = ROTR32(x,13);
|
||||
uint32_t r3 = ROTR32(x,22);
|
||||
return xor3b(r1,r2,r3);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t bsg2_1(const uint32_t x)
|
||||
{
|
||||
uint32_t r1 = ROTR32(x,6);
|
||||
uint32_t r2 = ROTR32(x,11);
|
||||
uint32_t r3 = ROTR32(x,25);
|
||||
return xor3b(r1,r2,r3);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t ssg2_0(const uint32_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR32(x,7);
|
||||
uint64_t r2 = ROTR32(x,18);
|
||||
uint64_t r3 = shr_t32(x,3);
|
||||
return xor3b(r1,r2,r3);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t ssg2_1(const uint32_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR32(x,17);
|
||||
uint64_t r2 = ROTR32(x,19);
|
||||
uint64_t r3 = shr_t32(x,10);
|
||||
return xor3b(r1,r2,r3);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t andor32(const uint32_t a, const uint32_t b, const uint32_t c)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("{\n\t"
|
||||
".reg .u32 m,n,o;\n\t"
|
||||
"and.b32 m, %1, %2;\n\t"
|
||||
" or.b32 n, %1, %2;\n\t"
|
||||
"and.b32 o, n, %3;\n\t"
|
||||
" or.b32 %0, m, o ;\n\t"
|
||||
"}\n\t" : "=r"(result) : "r"(a), "r"(b), "r"(c)
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__
|
||||
static void sha2_step1(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, uint32_t e, uint32_t f, uint32_t g, uint32_t &h,
|
||||
uint32_t in, const uint32_t Kshared)
|
||||
{
|
||||
uint32_t t1,t2;
|
||||
uint32_t vxandx = xandx(e, f, g);
|
||||
uint32_t bsg21 = bsg2_1(e);
|
||||
uint32_t bsg20 = bsg2_0(a);
|
||||
uint32_t andorv = andor32(a,b,c);
|
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in;
|
||||
t2 = bsg20 + andorv;
|
||||
d = d + t1;
|
||||
h = t1 + t2;
|
||||
}
|
||||
|
||||
__device__
|
||||
static void sha2_step2(uint32_t a, uint32_t b, uint32_t c, uint32_t &d, uint32_t e, uint32_t f, uint32_t g, uint32_t &h,
|
||||
uint32_t* in, uint32_t pc, const uint32_t Kshared)
|
||||
{
|
||||
uint32_t t1,t2;
|
||||
|
||||
int pcidx1 = (pc-2) & 0xF;
|
||||
int pcidx2 = (pc-7) & 0xF;
|
||||
int pcidx3 = (pc-15) & 0xF;
|
||||
|
||||
uint32_t inx0 = in[pc];
|
||||
uint32_t inx1 = in[pcidx1];
|
||||
uint32_t inx2 = in[pcidx2];
|
||||
uint32_t inx3 = in[pcidx3];
|
||||
|
||||
uint32_t ssg21 = ssg2_1(inx1);
|
||||
uint32_t ssg20 = ssg2_0(inx3);
|
||||
uint32_t vxandx = xandx(e, f, g);
|
||||
uint32_t bsg21 = bsg2_1(e);
|
||||
uint32_t bsg20 = bsg2_0(a);
|
||||
uint32_t andorv = andor32(a,b,c);
|
||||
|
||||
in[pc] = ssg21 + inx2 + ssg20 + inx0;
|
||||
|
||||
t1 = h + bsg21 + vxandx + Kshared + in[pc];
|
||||
t2 = bsg20 + andorv;
|
||||
d = d + t1;
|
||||
h = t1 + t2;
|
||||
}
|
||||
|
||||
__device__
|
||||
static void sha256_round_body(uint32_t* in, uint32_t* state, uint32_t* const Kshared)
|
||||
{
|
||||
uint32_t a = state[0];
|
||||
uint32_t b = state[1];
|
||||
uint32_t c = state[2];
|
||||
uint32_t d = state[3];
|
||||
uint32_t e = state[4];
|
||||
uint32_t f = state[5];
|
||||
uint32_t g = state[6];
|
||||
uint32_t h = state[7];
|
||||
|
||||
sha2_step1(a,b,c,d,e,f,g,h,in[0], Kshared[0]);
|
||||
sha2_step1(h,a,b,c,d,e,f,g,in[1], Kshared[1]);
|
||||
sha2_step1(g,h,a,b,c,d,e,f,in[2], Kshared[2]);
|
||||
sha2_step1(f,g,h,a,b,c,d,e,in[3], Kshared[3]);
|
||||
sha2_step1(e,f,g,h,a,b,c,d,in[4], Kshared[4]);
|
||||
sha2_step1(d,e,f,g,h,a,b,c,in[5], Kshared[5]);
|
||||
sha2_step1(c,d,e,f,g,h,a,b,in[6], Kshared[6]);
|
||||
sha2_step1(b,c,d,e,f,g,h,a,in[7], Kshared[7]);
|
||||
sha2_step1(a,b,c,d,e,f,g,h,in[8], Kshared[8]);
|
||||
sha2_step1(h,a,b,c,d,e,f,g,in[9], Kshared[9]);
|
||||
sha2_step1(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
|
||||
sha2_step1(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
|
||||
sha2_step1(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
|
||||
sha2_step1(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
|
||||
sha2_step1(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
|
||||
sha2_step1(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<3; i++)
|
||||
{
|
||||
sha2_step2(a,b,c,d,e,f,g,h,in,0, Kshared[16+16*i]);
|
||||
sha2_step2(h,a,b,c,d,e,f,g,in,1, Kshared[17+16*i]);
|
||||
sha2_step2(g,h,a,b,c,d,e,f,in,2, Kshared[18+16*i]);
|
||||
sha2_step2(f,g,h,a,b,c,d,e,in,3, Kshared[19+16*i]);
|
||||
sha2_step2(e,f,g,h,a,b,c,d,in,4, Kshared[20+16*i]);
|
||||
sha2_step2(d,e,f,g,h,a,b,c,in,5, Kshared[21+16*i]);
|
||||
sha2_step2(c,d,e,f,g,h,a,b,in,6, Kshared[22+16*i]);
|
||||
sha2_step2(b,c,d,e,f,g,h,a,in,7, Kshared[23+16*i]);
|
||||
sha2_step2(a,b,c,d,e,f,g,h,in,8, Kshared[24+16*i]);
|
||||
sha2_step2(h,a,b,c,d,e,f,g,in,9, Kshared[25+16*i]);
|
||||
sha2_step2(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
|
||||
sha2_step2(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
|
||||
sha2_step2(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
|
||||
sha2_step2(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
|
||||
sha2_step2(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
|
||||
sha2_step2(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
|
||||
}
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
state[5] += f;
|
||||
state[6] += g;
|
||||
state[7] += h;
|
||||
}
|
||||
|
||||
__device__
|
||||
uint64_t cuda_swab32ll(uint64_t x) {
|
||||
return MAKE_ULONGLONG(cuda_swab32(_LODWORD(x)), cuda_swab32(_HIDWORD(x)));
|
||||
}
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256,3)*/
|
||||
void lbry_sha256_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
const uint32_t nonce = startNonce + thread;
|
||||
|
||||
uint32_t dat[16];
|
||||
#pragma unroll
|
||||
for (int i=0;i<11;i++) dat[i] = c_dataEnd112[i]; // pre "swabed"
|
||||
dat[11] = swabNonce ? cuda_swab32(nonce) : nonce;
|
||||
dat[12] = 0x80000000;
|
||||
dat[13] = 0;
|
||||
dat[14] = 0;
|
||||
dat[15] = 0x380;
|
||||
|
||||
uint32_t __align__(8) buf[8];
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_midstate112[i];
|
||||
|
||||
sha256_round_body(dat, buf, c_K);
|
||||
|
||||
// output
|
||||
uint2* output = (uint2*) (&outputHash[thread * 8U]);
|
||||
#pragma unroll
|
||||
for (int i=0;i<4;i++) {
|
||||
//output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
||||
output[i] = vectorize(((uint64_t*)buf)[i]); // out without swap, new sha256 after
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256,3)*/
|
||||
void lbry_sha256_gpu_hash_32(uint32_t threads, uint64_t *Hash512)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
||||
#pragma unroll
|
||||
for (int i=0; i<8; i++) buf[i] = c_H256[i];
|
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
||||
|
||||
uint32_t dat[16];
|
||||
#pragma unroll
|
||||
//for (int i=0;i<8;i++) dat[i] = cuda_swab32(input[i]);
|
||||
for (int i=0; i<8; i++) dat[i] = input[i];
|
||||
dat[8] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=9; i<15; i++) dat[i] = 0;
|
||||
dat[15] = 0x100;
|
||||
|
||||
sha256_round_body(dat, buf, c_K);
|
||||
|
||||
// output
|
||||
uint2* output = (uint2*) input;
|
||||
#pragma unroll
|
||||
for (int i=0;i<4;i++) {
|
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
||||
}
|
||||
#ifdef PAD_ZEROS
|
||||
#pragma unroll
|
||||
for (int i=4; i<8; i++) output[i] = vectorize(0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256,3)*/
|
||||
void lbry_sha256d_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
extern __shared__ uint32_t s_K[];
|
||||
//s_K[thread & 63] = c_K[thread & 63];
|
||||
if (threadIdx.x < 64U) s_K[threadIdx.x] = c_K[threadIdx.x];
|
||||
if (thread < threads)
|
||||
{
|
||||
const uint32_t nonce = startNonce + thread;
|
||||
|
||||
uint32_t dat[16];
|
||||
#pragma unroll
|
||||
for (int i=0; i<11; i++) dat[i] = c_dataEnd112[i];
|
||||
dat[11] = swabNonce ? cuda_swab32(nonce) : nonce;
|
||||
dat[12] = 0x80000000;
|
||||
dat[13] = 0;
|
||||
dat[14] = 0;
|
||||
dat[15] = 0x380;
|
||||
|
||||
uint32_t __align__(8) buf[8];
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_midstate112[i];
|
||||
|
||||
sha256_round_body(dat, buf, s_K);
|
||||
|
||||
// second sha256
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<8; i++) dat[i] = buf[i];
|
||||
dat[8] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=9; i<15; i++) dat[i] = 0;
|
||||
dat[15] = 0x100;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<8; i++) buf[i] = c_H256[i];
|
||||
|
||||
sha256_round_body(dat, buf, s_K);
|
||||
|
||||
// output
|
||||
uint2* output = (uint2*) (&outputHash[thread * 8U]);
|
||||
#pragma unroll
|
||||
for (int i=0;i<4;i++) {
|
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
||||
//output[i] = vectorize(((uint64_t*)buf)[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256,3)*/
|
||||
void lbry_sha256_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
||||
|
||||
uint32_t dat[16];
|
||||
#pragma unroll
|
||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]);
|
||||
#pragma unroll
|
||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]);
|
||||
dat[10] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=11;i<15;i++) dat[i] = 0;
|
||||
dat[15] = 0x140;
|
||||
|
||||
sha256_round_body(dat, buf, c_K);
|
||||
|
||||
// output
|
||||
uint2* output = (uint2*) input;
|
||||
#pragma unroll
|
||||
for (int i=0;i<4;i++) {
|
||||
//output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
||||
output[i] = vectorize(((uint64_t*)buf)[i]);
|
||||
}
|
||||
#ifdef PAD_ZEROS
|
||||
#pragma unroll
|
||||
for (int i=4; i<8; i++) output[i] = vectorize(0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256,3)*/
|
||||
void lbry_sha256d_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
extern __shared__ uint32_t s_K[];
|
||||
if (threadIdx.x < 64U) s_K[threadIdx.x] = c_K[threadIdx.x];
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
||||
|
||||
uint32_t dat[16];
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++) dat[i] = cuda_swab32(input[i]);
|
||||
#pragma unroll
|
||||
for (int i=0; i<5; i++) dat[i+5] = cuda_swab32(input[i+8]);
|
||||
dat[10] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=11;i<15;i++) dat[i] = 0;
|
||||
dat[15] = 0x140;
|
||||
|
||||
sha256_round_body(dat, buf, s_K);
|
||||
|
||||
// second sha256
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0; i<8; i++) dat[i] = buf[i];
|
||||
dat[8] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=9; i<15; i++) dat[i] = 0;
|
||||
dat[15] = 0x100;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||
|
||||
sha256_round_body(dat, buf, s_K);
|
||||
|
||||
// output
|
||||
uint2* output = (uint2*) input;
|
||||
|
||||
#ifdef FULL_HASH
|
||||
#pragma unroll
|
||||
for (int i=0;i<4;i++) {
|
||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
||||
//output[i] = vectorize(((uint64_t*)buf)[i]);
|
||||
}
|
||||
# ifdef PAD_ZEROS
|
||||
#pragma unroll
|
||||
for (int i=4; i<8; i++) output[i] = vectorize(0);
|
||||
# endif
|
||||
|
||||
#else
|
||||
//input[6] = cuda_swab32(buf[6]);
|
||||
//input[7] = cuda_swab32(buf[7]);
|
||||
output[3] = vectorize(cuda_swab32ll(((uint64_t*)buf)[3]));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_init(int thr_id)
|
||||
{
|
||||
//cudaMemcpyToSymbol(c_H256, cpu_H256, sizeof(cpu_H256), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(c_K, cpu_K, sizeof(cpu_K), 0, cudaMemcpyHostToDevice);
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNonces, 4*sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_free(int thr_id)
|
||||
{
|
||||
cudaFree(d_resNonces);
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget)
|
||||
{
|
||||
uint32_t in[16], buf[8], end[11];
|
||||
for (int i=0;i<16;i++) in[i] = cuda_swab32(pdata[i]);
|
||||
for (int i=0; i<8;i++) buf[i] = cpu_H256[i];
|
||||
for (int i=0;i<11;i++) end[i] = cuda_swab32(pdata[16+i]);
|
||||
sha256_round_body_host(in, buf, cpu_K);
|
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_midstate112, buf, 32, 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_dataEnd112, end, sizeof(end), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_sha256_gpu_hash_112 <<<grid, block, 0, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
||||
cudaGetLastError();
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_sha256_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash);
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_sha256d_gpu_hash_112 <<<grid, block, 64*4, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_sha256_gpu_hash_20x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash);
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
lbry_sha256d_gpu_hash_20x2 <<<grid, block, 64*4, stream>>> (threads, (uint64_t*) d_Hash);
|
||||
}
|
||||
|
||||
__global__
|
||||
__launch_bounds__(256,3)
|
||||
void lbry_sha256d_gpu_hash_final(const uint32_t threads, const uint32_t startNonce, uint64_t *Hash512, uint32_t *resNonces)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||
|
||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
||||
|
||||
uint32_t __align__(8) dat[16];
|
||||
#pragma unroll
|
||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]);
|
||||
#pragma unroll
|
||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]);
|
||||
dat[10] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=11;i<15;i++) dat[i] = 0;
|
||||
dat[15] = 0x140;
|
||||
|
||||
sha256_round_body(dat, buf, c_K);
|
||||
|
||||
// second sha256
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) dat[i] = buf[i];
|
||||
dat[8] = 0x80000000;
|
||||
#pragma unroll
|
||||
for (int i=9;i<15;i++) dat[i] = 0;
|
||||
dat[15] = 0x100;
|
||||
|
||||
#pragma unroll
|
||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||
|
||||
sha256_round_body(dat, buf, c_K);
|
||||
|
||||
// valid nonces
|
||||
uint64_t high = cuda_swab32ll(((uint64_t*)buf)[3]);
|
||||
if (high <= d_target[0]) {
|
||||
// printf("%08x %08x - %016llx %016llx - %08x %08x\n", buf[7], buf[6], high, d_target[0], c_target[1], c_target[0]);
|
||||
uint32_t nonce = startNonce + thread;
|
||||
resNonces[1] = atomicExch(resNonces, nonce);
|
||||
d_target[0] = high;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid(threads/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
CUDA_SAFE_CALL(cudaMemset(d_resNonces, 0xFF, 2 * sizeof(uint32_t)));
|
||||
cudaThreadSynchronize();
|
||||
|
||||
lbry_sha256d_gpu_hash_final <<<grid, block, 0, stream>>> (threads, startNonce, (uint64_t*) d_inputHash, d_resNonces);
|
||||
|
||||
cudaThreadSynchronize();
|
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpy(resNonces, d_resNonces, 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
||||
if (resNonces[0] == resNonces[1]) {
|
||||
resNonces[1] = UINT32_MAX;
|
||||
}
|
||||
}
|
181
lbry/cuda_sha512_lbry.cu
Normal file
181
lbry/cuda_sha512_lbry.cu
Normal file
@ -0,0 +1,181 @@
|
||||
/**
|
||||
* sha-512 CUDA implementation.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include <cuda_helper.h>
|
||||
|
||||
static __constant__ uint64_t K_512[80];
|
||||
|
||||
static const uint64_t K512[80] = {
|
||||
0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
|
||||
0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
|
||||
0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
|
||||
0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694,
|
||||
0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
|
||||
0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
|
||||
0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
|
||||
0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70,
|
||||
0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
|
||||
0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B,
|
||||
0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
|
||||
0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8,
|
||||
0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
|
||||
0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
|
||||
0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
|
||||
0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
|
||||
0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
|
||||
0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B,
|
||||
0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
|
||||
0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
|
||||
};
|
||||
|
||||
//#undef xor3
|
||||
//#define xor3(a,b,c) (a^b^c)
|
||||
|
||||
//#undef
|
||||
|
||||
static __device__ __forceinline__
|
||||
uint64_t bsg5_0(const uint64_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR64(x,28);
|
||||
uint64_t r2 = ROTR64(x,34);
|
||||
uint64_t r3 = ROTR64(x,39);
|
||||
return xor3(r1,r2,r3);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
uint64_t bsg5_1(const uint64_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR64(x,14);
|
||||
uint64_t r2 = ROTR64(x,18);
|
||||
uint64_t r3 = ROTR64(x,41);
|
||||
return xor3(r1,r2,r3);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
uint64_t ssg5_0(const uint64_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR64(x,1);
|
||||
uint64_t r2 = ROTR64(x,8);
|
||||
uint64_t r3 = shr_t64(x,7);
|
||||
return xor3(r1,r2,r3);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
uint64_t ssg5_1(const uint64_t x)
|
||||
{
|
||||
uint64_t r1 = ROTR64(x,19);
|
||||
uint64_t r2 = ROTR64(x,61);
|
||||
uint64_t r3 = shr_t64(x,6);
|
||||
return xor3(r1,r2,r3);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
uint64_t xandx64(const uint64_t a, const uint64_t b, const uint64_t c)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("{ .reg .u64 m,n; // xandx64\n\t"
|
||||
"xor.b64 m, %2,%3;\n\t"
|
||||
"and.b64 n, m,%1;\n\t"
|
||||
"xor.b64 %0, n,%3;\n\t"
|
||||
"}" : "=l"(result) : "l"(a), "l"(b), "l"(c));
|
||||
return result;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__
|
||||
void sha512_step2(uint64_t* r, uint64_t* W, uint64_t* K, const int ord, int i)
|
||||
{
|
||||
int u = 8-ord;
|
||||
uint64_t a = r[(0+u) & 7];
|
||||
uint64_t b = r[(1+u) & 7];
|
||||
uint64_t c = r[(2+u) & 7];
|
||||
uint64_t d = r[(3+u) & 7];
|
||||
uint64_t e = r[(4+u) & 7];
|
||||
uint64_t f = r[(5+u) & 7];
|
||||
uint64_t g = r[(6+u) & 7];
|
||||
uint64_t h = r[(7+u) & 7];
|
||||
|
||||
uint64_t T1 = h + bsg5_1(e) + xandx64(e,f,g) + W[i] + K[i];
|
||||
uint64_t T2 = bsg5_0(a) + andor(a,b,c);
|
||||
r[(3+u)& 7] = d + T1;
|
||||
r[(7+u)& 7] = T1 + T2;
|
||||
}
|
||||
|
||||
/**************************************************************************************************/
|
||||
|
||||
__global__
|
||||
void lbry_sha512_gpu_hash_32(const uint32_t threads, uint64_t *g_hash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint64_t *pHash = &g_hash[thread * 8U];
|
||||
|
||||
uint64_t W[80];
|
||||
uint64_t r[8];
|
||||
|
||||
uint64_t IV512[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++)
|
||||
r[i] = IV512[i];
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// 32 bytes input
|
||||
W[i] = cuda_swab64(pHash[i]);
|
||||
}
|
||||
|
||||
W[4] = 0x8000000000000000; // end tag
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 5; i < 15; i++) W[i] = 0;
|
||||
|
||||
W[15] = 0x100; // 256 bits
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 16; i < 80; i++) W[i] = 0;
|
||||
|
||||
#pragma unroll 64
|
||||
for (int i = 16; i < 80; i++)
|
||||
W[i] = ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16];
|
||||
|
||||
#pragma unroll 10
|
||||
for (int i = 0; i < 10; i++) {
|
||||
#pragma unroll 8
|
||||
for (int ord=0; ord<8; ord++)
|
||||
sha512_step2(r, W, K_512, ord, 8*i + ord);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i = 0; i < 8; i++)
|
||||
pHash[i] = cuda_swab64(r[i] + IV512[i]);
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
size_t shared_size = 80*8;
|
||||
lbry_sha512_gpu_hash_32 <<<grid, block, shared_size, stream>>> (threads, (uint64_t*)d_hash);
|
||||
}
|
||||
|
||||
/**************************************************************************************************/
|
||||
|
||||
__host__
|
||||
void lbry_sha512_init(int thr_id)
|
||||
{
|
||||
cudaMemcpyToSymbol(K_512, K512, 80*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
225
lbry/lbry.cu
Normal file
225
lbry/lbry.cu
Normal file
@ -0,0 +1,225 @@
|
||||
/**
|
||||
* Lbry CUDA Implementation
|
||||
*
|
||||
* by tpruvot@github - July 2016
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
#include <sph/sph_sha2.h>
|
||||
#include <sph/sph_ripemd.h>
|
||||
}
|
||||
|
||||
#include <cuda_helper.h>
|
||||
#include <miner.h>
|
||||
|
||||
#define A 64
|
||||
#define debug_cpu 0
|
||||
|
||||
extern "C" void lbry_hash(void* output, const void* input)
|
||||
{
|
||||
uint32_t _ALIGN(A) hashA[16];
|
||||
uint32_t _ALIGN(A) hashB[8];
|
||||
uint32_t _ALIGN(A) hashC[8];
|
||||
|
||||
sph_sha256_context ctx_sha256;
|
||||
sph_sha512_context ctx_sha512;
|
||||
sph_ripemd160_context ctx_ripemd;
|
||||
|
||||
sph_sha256_init(&ctx_sha256);
|
||||
sph_sha256(&ctx_sha256, input, 112);
|
||||
sph_sha256_close(&ctx_sha256, hashA);
|
||||
|
||||
sph_sha256(&ctx_sha256, hashA, 32);
|
||||
sph_sha256_close(&ctx_sha256, hashA);
|
||||
|
||||
sph_sha512_init(&ctx_sha512);
|
||||
sph_sha512(&ctx_sha512, hashA, 32);
|
||||
sph_sha512_close(&ctx_sha512, hashA);
|
||||
|
||||
sph_ripemd160_init(&ctx_ripemd);
|
||||
sph_ripemd160(&ctx_ripemd, hashA, 32); // sha512 low
|
||||
sph_ripemd160_close(&ctx_ripemd, hashB);
|
||||
if (debug_cpu) applog_hex(hashB, 20);
|
||||
|
||||
sph_ripemd160(&ctx_ripemd, &hashA[8], 32); // sha512 high
|
||||
sph_ripemd160_close(&ctx_ripemd, hashC);
|
||||
if (debug_cpu) applog_hex(hashC, 20);
|
||||
|
||||
sph_sha256(&ctx_sha256, hashB, 20);
|
||||
sph_sha256(&ctx_sha256, hashC, 20);
|
||||
sph_sha256_close(&ctx_sha256, hashA);
|
||||
if (debug_cpu) applog_hex(hashA,32);
|
||||
|
||||
sph_sha256(&ctx_sha256, hashA, 32);
|
||||
sph_sha256_close(&ctx_sha256, hashA);
|
||||
|
||||
memcpy(output, hashA, 32);
|
||||
}
|
||||
|
||||
/* ############################################################################################################################### */
|
||||
|
||||
extern void lbry_ripemd160_init(int thr_id);
|
||||
extern void lbry_sha256_init(int thr_id);
|
||||
extern void lbry_sha256_free(int thr_id);
|
||||
extern void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget);
|
||||
extern void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream);
|
||||
extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream);
|
||||
extern void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
||||
extern void lbry_sha512_init(int thr_id);
|
||||
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
||||
extern void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
||||
extern void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
||||
extern void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
||||
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream);
|
||||
|
||||
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
|
||||
return iftrue ? swab32(val) : val;
|
||||
}
|
||||
|
||||
static bool init[MAX_GPUS] = { 0 };
|
||||
|
||||
static uint32_t *d_hash[MAX_GPUS];
|
||||
|
||||
// nonce position is different
|
||||
#define LBC_NONCE_OFT32 27
|
||||
|
||||
extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(A) vhash[8];
|
||||
uint32_t _ALIGN(A) endiandata[28];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t first_nonce = pdata[LBC_NONCE_OFT32];
|
||||
const int swap = 0; // to toggle nonce endian
|
||||
|
||||
const int dev_id = device_map[thr_id];
|
||||
int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 22 : 20;
|
||||
if (device_sm[dev_id] >= 600) intensity = 23;
|
||||
if (device_sm[dev_id] < 350) intensity = 18;
|
||||
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||
|
||||
if (opt_benchmark) {
|
||||
ptarget[7] = 0xff;
|
||||
}
|
||||
|
||||
if (!init[thr_id]){
|
||||
cudaSetDevice(dev_id);
|
||||
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
||||
cudaDeviceReset();
|
||||
// reduce cpu usage (linux)
|
||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||
CUDA_LOG_ERROR();
|
||||
}
|
||||
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
||||
|
||||
lbry_sha256_init(thr_id);
|
||||
lbry_sha512_init(thr_id);
|
||||
lbry_ripemd160_init(thr_id);
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
CUDA_LOG_ERROR();
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int i=0; i < LBC_NONCE_OFT32; i++) {
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
lbry_sha256_setBlock_112(endiandata, ptarget);
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
|
||||
do {
|
||||
|
||||
// Hash with CUDA
|
||||
#if 0
|
||||
lbry_sha256_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], swap, 0);
|
||||
lbry_sha256_hash_32(thr_id, throughput, d_hash[thr_id], 0);
|
||||
#else
|
||||
lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], swap, 0);
|
||||
#endif
|
||||
CUDA_LOG_ERROR();
|
||||
|
||||
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id], 0);
|
||||
|
||||
lbry_ripemd160_hash_32x2(thr_id, throughput, d_hash[thr_id], 0);
|
||||
|
||||
#if 0
|
||||
lbry_sha256d_hash_20x2(thr_id, throughput, d_hash[thr_id], 0);
|
||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);
|
||||
#else
|
||||
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
|
||||
lbry_sha256d_hash_final(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], resNonces, 0);
|
||||
uint32_t foundNonce = resNonces[0];
|
||||
#endif
|
||||
|
||||
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
|
||||
|
||||
if (foundNonce != UINT32_MAX)
|
||||
{
|
||||
endiandata[LBC_NONCE_OFT32] = swab32_if(foundNonce, !swap);
|
||||
lbry_hash(vhash, endiandata);
|
||||
|
||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||
int res = 1;
|
||||
uint32_t secNonce = resNonces[1];
|
||||
//uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], 1);
|
||||
work->nonces[0] = swab32_if(foundNonce, swap);
|
||||
work_set_target_ratio(work, vhash);
|
||||
if (secNonce != UINT32_MAX) {
|
||||
//if (secNonce) {
|
||||
if (opt_debug)
|
||||
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", swab32(secNonce));
|
||||
endiandata[LBC_NONCE_OFT32] = swab32_if(secNonce, !swap);
|
||||
lbry_hash(vhash, endiandata);
|
||||
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio) {
|
||||
work_set_target_ratio(work, vhash);
|
||||
xchg(work->nonces[0], work->nonces[1]);
|
||||
}
|
||||
work->nonces[1] = swab32_if(secNonce, swap);
|
||||
res++;
|
||||
}
|
||||
pdata[LBC_NONCE_OFT32] = work->nonces[0];
|
||||
return res;
|
||||
} else {
|
||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU %08x > %08x!", foundNonce, vhash[7], ptarget[7]);
|
||||
}
|
||||
}
|
||||
|
||||
if ((uint64_t) throughput + pdata[LBC_NONCE_OFT32] >= max_nonce) {
|
||||
pdata[LBC_NONCE_OFT32] = max_nonce;
|
||||
break;
|
||||
}
|
||||
|
||||
pdata[LBC_NONCE_OFT32] += throughput;
|
||||
|
||||
} while (!work_restart[thr_id].restart);
|
||||
|
||||
//*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
void free_lbry(int thr_id)
|
||||
{
|
||||
if (!init[thr_id])
|
||||
return;
|
||||
|
||||
cudaThreadSynchronize();
|
||||
|
||||
cudaFree(d_hash[thr_id]);
|
||||
lbry_sha256_free(thr_id);
|
||||
|
||||
cuda_check_cpu_free(thr_id);
|
||||
init[thr_id] = false;
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
}
|
3
miner.h
3
miner.h
@ -275,6 +275,7 @@ extern int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce,
|
||||
extern int scanhash_groestlcoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_heavy(int thr_id,struct work *work, uint32_t max_nonce, unsigned long *hashes_done, uint32_t maxvote, int blocklen);
|
||||
extern int scanhash_jackpot(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_lyra2v2(int thr_id,struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
@ -584,6 +585,7 @@ struct stratum_job {
|
||||
unsigned char version[4];
|
||||
unsigned char nbits[4];
|
||||
unsigned char ntime[4];
|
||||
unsigned char claim[32]; // lbry
|
||||
bool clean;
|
||||
unsigned char nreward[2];
|
||||
uint32_t height;
|
||||
@ -797,6 +799,7 @@ void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
|
||||
void keccak256_hash(void *state, const void *input);
|
||||
unsigned int jackpothash(void *state, const void *input);
|
||||
void groestlhash(void *state, const void *input);
|
||||
void lbry_hash(void *output, const void *input);
|
||||
void lyra2re_hash(void *state, const void *input);
|
||||
void lyra2v2_hash(void *state, const void *input);
|
||||
void myriadhash(void *state, const void *input);
|
||||
|
833
sph/ripemd.c
Normal file
833
sph/ripemd.c
Normal file
@ -0,0 +1,833 @@
|
||||
/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* RIPEMD-160 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_ripemd.h"
|
||||
|
||||
/*
|
||||
* Round functions for RIPEMD (original).
|
||||
*/
|
||||
#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
|
||||
#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
|
||||
#define H(x, y, z) ((x) ^ (y) ^ (z))
|
||||
|
||||
static const sph_u32 oIV[5] = {
|
||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
|
||||
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
|
||||
};
|
||||
|
||||
/*
|
||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||
*/
|
||||
#define F1(x, y, z) ((x) ^ (y) ^ (z))
|
||||
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
|
||||
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
|
||||
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
|
||||
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
|
||||
|
||||
static const sph_u32 IV[5] = {
|
||||
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE),
|
||||
SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0)
|
||||
};
|
||||
|
||||
#define ROTL SPH_ROTL32
|
||||
|
||||
/* ===================================================================== */
|
||||
/*
|
||||
* RIPEMD (original hash, deprecated).
|
||||
*/
|
||||
|
||||
#define FF1(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define GG1(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + G(B, C, D) \
|
||||
+ (X) + SPH_C32(0x5A827999)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define HH1(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
|
||||
+ (X) + SPH_C32(0x6ED9EBA1)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define FF2(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + F(B, C, D) \
|
||||
+ (X) + SPH_C32(0x50A28BE6)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define GG2(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define HH2(A, B, C, D, X, s) do { \
|
||||
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
|
||||
+ (X) + SPH_C32(0x5C4DD124)); \
|
||||
(A) = ROTL(tmp, (s)); \
|
||||
} while (0)
|
||||
|
||||
#define RIPEMD_ROUND_BODY(in, h) do { \
|
||||
sph_u32 A1, B1, C1, D1; \
|
||||
sph_u32 A2, B2, C2, D2; \
|
||||
sph_u32 tmp; \
|
||||
\
|
||||
A1 = A2 = (h)[0]; \
|
||||
B1 = B2 = (h)[1]; \
|
||||
C1 = C2 = (h)[2]; \
|
||||
D1 = D2 = (h)[3]; \
|
||||
\
|
||||
FF1(A1, B1, C1, D1, in( 0), 11); \
|
||||
FF1(D1, A1, B1, C1, in( 1), 14); \
|
||||
FF1(C1, D1, A1, B1, in( 2), 15); \
|
||||
FF1(B1, C1, D1, A1, in( 3), 12); \
|
||||
FF1(A1, B1, C1, D1, in( 4), 5); \
|
||||
FF1(D1, A1, B1, C1, in( 5), 8); \
|
||||
FF1(C1, D1, A1, B1, in( 6), 7); \
|
||||
FF1(B1, C1, D1, A1, in( 7), 9); \
|
||||
FF1(A1, B1, C1, D1, in( 8), 11); \
|
||||
FF1(D1, A1, B1, C1, in( 9), 13); \
|
||||
FF1(C1, D1, A1, B1, in(10), 14); \
|
||||
FF1(B1, C1, D1, A1, in(11), 15); \
|
||||
FF1(A1, B1, C1, D1, in(12), 6); \
|
||||
FF1(D1, A1, B1, C1, in(13), 7); \
|
||||
FF1(C1, D1, A1, B1, in(14), 9); \
|
||||
FF1(B1, C1, D1, A1, in(15), 8); \
|
||||
\
|
||||
GG1(A1, B1, C1, D1, in( 7), 7); \
|
||||
GG1(D1, A1, B1, C1, in( 4), 6); \
|
||||
GG1(C1, D1, A1, B1, in(13), 8); \
|
||||
GG1(B1, C1, D1, A1, in( 1), 13); \
|
||||
GG1(A1, B1, C1, D1, in(10), 11); \
|
||||
GG1(D1, A1, B1, C1, in( 6), 9); \
|
||||
GG1(C1, D1, A1, B1, in(15), 7); \
|
||||
GG1(B1, C1, D1, A1, in( 3), 15); \
|
||||
GG1(A1, B1, C1, D1, in(12), 7); \
|
||||
GG1(D1, A1, B1, C1, in( 0), 12); \
|
||||
GG1(C1, D1, A1, B1, in( 9), 15); \
|
||||
GG1(B1, C1, D1, A1, in( 5), 9); \
|
||||
GG1(A1, B1, C1, D1, in(14), 7); \
|
||||
GG1(D1, A1, B1, C1, in( 2), 11); \
|
||||
GG1(C1, D1, A1, B1, in(11), 13); \
|
||||
GG1(B1, C1, D1, A1, in( 8), 12); \
|
||||
\
|
||||
HH1(A1, B1, C1, D1, in( 3), 11); \
|
||||
HH1(D1, A1, B1, C1, in(10), 13); \
|
||||
HH1(C1, D1, A1, B1, in( 2), 14); \
|
||||
HH1(B1, C1, D1, A1, in( 4), 7); \
|
||||
HH1(A1, B1, C1, D1, in( 9), 14); \
|
||||
HH1(D1, A1, B1, C1, in(15), 9); \
|
||||
HH1(C1, D1, A1, B1, in( 8), 13); \
|
||||
HH1(B1, C1, D1, A1, in( 1), 15); \
|
||||
HH1(A1, B1, C1, D1, in(14), 6); \
|
||||
HH1(D1, A1, B1, C1, in( 7), 8); \
|
||||
HH1(C1, D1, A1, B1, in( 0), 13); \
|
||||
HH1(B1, C1, D1, A1, in( 6), 6); \
|
||||
HH1(A1, B1, C1, D1, in(11), 12); \
|
||||
HH1(D1, A1, B1, C1, in(13), 5); \
|
||||
HH1(C1, D1, A1, B1, in( 5), 7); \
|
||||
HH1(B1, C1, D1, A1, in(12), 5); \
|
||||
\
|
||||
FF2(A2, B2, C2, D2, in( 0), 11); \
|
||||
FF2(D2, A2, B2, C2, in( 1), 14); \
|
||||
FF2(C2, D2, A2, B2, in( 2), 15); \
|
||||
FF2(B2, C2, D2, A2, in( 3), 12); \
|
||||
FF2(A2, B2, C2, D2, in( 4), 5); \
|
||||
FF2(D2, A2, B2, C2, in( 5), 8); \
|
||||
FF2(C2, D2, A2, B2, in( 6), 7); \
|
||||
FF2(B2, C2, D2, A2, in( 7), 9); \
|
||||
FF2(A2, B2, C2, D2, in( 8), 11); \
|
||||
FF2(D2, A2, B2, C2, in( 9), 13); \
|
||||
FF2(C2, D2, A2, B2, in(10), 14); \
|
||||
FF2(B2, C2, D2, A2, in(11), 15); \
|
||||
FF2(A2, B2, C2, D2, in(12), 6); \
|
||||
FF2(D2, A2, B2, C2, in(13), 7); \
|
||||
FF2(C2, D2, A2, B2, in(14), 9); \
|
||||
FF2(B2, C2, D2, A2, in(15), 8); \
|
||||
\
|
||||
GG2(A2, B2, C2, D2, in( 7), 7); \
|
||||
GG2(D2, A2, B2, C2, in( 4), 6); \
|
||||
GG2(C2, D2, A2, B2, in(13), 8); \
|
||||
GG2(B2, C2, D2, A2, in( 1), 13); \
|
||||
GG2(A2, B2, C2, D2, in(10), 11); \
|
||||
GG2(D2, A2, B2, C2, in( 6), 9); \
|
||||
GG2(C2, D2, A2, B2, in(15), 7); \
|
||||
GG2(B2, C2, D2, A2, in( 3), 15); \
|
||||
GG2(A2, B2, C2, D2, in(12), 7); \
|
||||
GG2(D2, A2, B2, C2, in( 0), 12); \
|
||||
GG2(C2, D2, A2, B2, in( 9), 15); \
|
||||
GG2(B2, C2, D2, A2, in( 5), 9); \
|
||||
GG2(A2, B2, C2, D2, in(14), 7); \
|
||||
GG2(D2, A2, B2, C2, in( 2), 11); \
|
||||
GG2(C2, D2, A2, B2, in(11), 13); \
|
||||
GG2(B2, C2, D2, A2, in( 8), 12); \
|
||||
\
|
||||
HH2(A2, B2, C2, D2, in( 3), 11); \
|
||||
HH2(D2, A2, B2, C2, in(10), 13); \
|
||||
HH2(C2, D2, A2, B2, in( 2), 14); \
|
||||
HH2(B2, C2, D2, A2, in( 4), 7); \
|
||||
HH2(A2, B2, C2, D2, in( 9), 14); \
|
||||
HH2(D2, A2, B2, C2, in(15), 9); \
|
||||
HH2(C2, D2, A2, B2, in( 8), 13); \
|
||||
HH2(B2, C2, D2, A2, in( 1), 15); \
|
||||
HH2(A2, B2, C2, D2, in(14), 6); \
|
||||
HH2(D2, A2, B2, C2, in( 7), 8); \
|
||||
HH2(C2, D2, A2, B2, in( 0), 13); \
|
||||
HH2(B2, C2, D2, A2, in( 6), 6); \
|
||||
HH2(A2, B2, C2, D2, in(11), 12); \
|
||||
HH2(D2, A2, B2, C2, in(13), 5); \
|
||||
HH2(C2, D2, A2, B2, in( 5), 7); \
|
||||
HH2(B2, C2, D2, A2, in(12), 5); \
|
||||
\
|
||||
tmp = SPH_T32((h)[1] + C1 + D2); \
|
||||
(h)[1] = SPH_T32((h)[2] + D1 + A2); \
|
||||
(h)[2] = SPH_T32((h)[3] + A1 + B2); \
|
||||
(h)[3] = SPH_T32((h)[0] + B1 + C2); \
|
||||
(h)[0] = tmp; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* One round of RIPEMD. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
ripemd_round(const unsigned char *data, sph_u32 r[5])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
|
||||
#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x)))
|
||||
|
||||
#else
|
||||
|
||||
sph_u32 X_var[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++)
|
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i);
|
||||
#define RIPEMD_IN(x) X_var[x]
|
||||
|
||||
#endif
|
||||
RIPEMD_ROUND_BODY(RIPEMD_IN, r);
|
||||
#undef RIPEMD_IN
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd_init(void *cc)
|
||||
{
|
||||
sph_ripemd_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, oIV, sizeof sc->val);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN ripemd_round
|
||||
#define HASH ripemd
|
||||
#define LE32 1
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
#undef LE32
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd_close(void *cc, void *dst)
|
||||
{
|
||||
ripemd_close(cc, dst, 4);
|
||||
sph_ripemd_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4])
|
||||
{
|
||||
#define RIPEMD_IN(x) msg[x]
|
||||
RIPEMD_ROUND_BODY(RIPEMD_IN, val);
|
||||
#undef RIPEMD_IN
|
||||
}
|
||||
|
||||
/* ===================================================================== */
|
||||
/*
|
||||
* RIPEMD-128.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-128.
|
||||
*/
|
||||
#define sK11 SPH_C32(0x00000000)
|
||||
#define sK12 SPH_C32(0x5A827999)
|
||||
#define sK13 SPH_C32(0x6ED9EBA1)
|
||||
#define sK14 SPH_C32(0x8F1BBCDC)
|
||||
|
||||
#define sK21 SPH_C32(0x50A28BE6)
|
||||
#define sK22 SPH_C32(0x5C4DD124)
|
||||
#define sK23 SPH_C32(0x6D703EF3)
|
||||
#define sK24 SPH_C32(0x00000000)
|
||||
|
||||
#define sRR(a, b, c, d, f, s, r, k) do { \
|
||||
a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \
|
||||
} while (0)
|
||||
|
||||
#define sROUND1(a, b, c, d, f, s, r, k) \
|
||||
sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k)
|
||||
|
||||
#define sROUND2(a, b, c, d, f, s, r, k) \
|
||||
sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k)
|
||||
|
||||
/*
|
||||
* This macro defines the body for a RIPEMD-128 compression function
|
||||
* implementation. The "in" parameter should evaluate, when applied to a
|
||||
* numerical input parameter from 0 to 15, to an expression which yields
|
||||
* the corresponding input block. The "h" parameter should evaluate to
|
||||
* an array or pointer expression designating the array of 4 words which
|
||||
* contains the input and output of the compression function.
|
||||
*/
|
||||
|
||||
#define RIPEMD128_ROUND_BODY(in, h) do { \
|
||||
sph_u32 A1, B1, C1, D1; \
|
||||
sph_u32 A2, B2, C2, D2; \
|
||||
sph_u32 tmp; \
|
||||
\
|
||||
A1 = A2 = (h)[0]; \
|
||||
B1 = B2 = (h)[1]; \
|
||||
C1 = C2 = (h)[2]; \
|
||||
D1 = D2 = (h)[3]; \
|
||||
\
|
||||
sROUND1(A, B, C, D, F1, 11, in( 0), 1); \
|
||||
sROUND1(D, A, B, C, F1, 14, in( 1), 1); \
|
||||
sROUND1(C, D, A, B, F1, 15, in( 2), 1); \
|
||||
sROUND1(B, C, D, A, F1, 12, in( 3), 1); \
|
||||
sROUND1(A, B, C, D, F1, 5, in( 4), 1); \
|
||||
sROUND1(D, A, B, C, F1, 8, in( 5), 1); \
|
||||
sROUND1(C, D, A, B, F1, 7, in( 6), 1); \
|
||||
sROUND1(B, C, D, A, F1, 9, in( 7), 1); \
|
||||
sROUND1(A, B, C, D, F1, 11, in( 8), 1); \
|
||||
sROUND1(D, A, B, C, F1, 13, in( 9), 1); \
|
||||
sROUND1(C, D, A, B, F1, 14, in(10), 1); \
|
||||
sROUND1(B, C, D, A, F1, 15, in(11), 1); \
|
||||
sROUND1(A, B, C, D, F1, 6, in(12), 1); \
|
||||
sROUND1(D, A, B, C, F1, 7, in(13), 1); \
|
||||
sROUND1(C, D, A, B, F1, 9, in(14), 1); \
|
||||
sROUND1(B, C, D, A, F1, 8, in(15), 1); \
|
||||
\
|
||||
sROUND1(A, B, C, D, F2, 7, in( 7), 2); \
|
||||
sROUND1(D, A, B, C, F2, 6, in( 4), 2); \
|
||||
sROUND1(C, D, A, B, F2, 8, in(13), 2); \
|
||||
sROUND1(B, C, D, A, F2, 13, in( 1), 2); \
|
||||
sROUND1(A, B, C, D, F2, 11, in(10), 2); \
|
||||
sROUND1(D, A, B, C, F2, 9, in( 6), 2); \
|
||||
sROUND1(C, D, A, B, F2, 7, in(15), 2); \
|
||||
sROUND1(B, C, D, A, F2, 15, in( 3), 2); \
|
||||
sROUND1(A, B, C, D, F2, 7, in(12), 2); \
|
||||
sROUND1(D, A, B, C, F2, 12, in( 0), 2); \
|
||||
sROUND1(C, D, A, B, F2, 15, in( 9), 2); \
|
||||
sROUND1(B, C, D, A, F2, 9, in( 5), 2); \
|
||||
sROUND1(A, B, C, D, F2, 11, in( 2), 2); \
|
||||
sROUND1(D, A, B, C, F2, 7, in(14), 2); \
|
||||
sROUND1(C, D, A, B, F2, 13, in(11), 2); \
|
||||
sROUND1(B, C, D, A, F2, 12, in( 8), 2); \
|
||||
\
|
||||
sROUND1(A, B, C, D, F3, 11, in( 3), 3); \
|
||||
sROUND1(D, A, B, C, F3, 13, in(10), 3); \
|
||||
sROUND1(C, D, A, B, F3, 6, in(14), 3); \
|
||||
sROUND1(B, C, D, A, F3, 7, in( 4), 3); \
|
||||
sROUND1(A, B, C, D, F3, 14, in( 9), 3); \
|
||||
sROUND1(D, A, B, C, F3, 9, in(15), 3); \
|
||||
sROUND1(C, D, A, B, F3, 13, in( 8), 3); \
|
||||
sROUND1(B, C, D, A, F3, 15, in( 1), 3); \
|
||||
sROUND1(A, B, C, D, F3, 14, in( 2), 3); \
|
||||
sROUND1(D, A, B, C, F3, 8, in( 7), 3); \
|
||||
sROUND1(C, D, A, B, F3, 13, in( 0), 3); \
|
||||
sROUND1(B, C, D, A, F3, 6, in( 6), 3); \
|
||||
sROUND1(A, B, C, D, F3, 5, in(13), 3); \
|
||||
sROUND1(D, A, B, C, F3, 12, in(11), 3); \
|
||||
sROUND1(C, D, A, B, F3, 7, in( 5), 3); \
|
||||
sROUND1(B, C, D, A, F3, 5, in(12), 3); \
|
||||
\
|
||||
sROUND1(A, B, C, D, F4, 11, in( 1), 4); \
|
||||
sROUND1(D, A, B, C, F4, 12, in( 9), 4); \
|
||||
sROUND1(C, D, A, B, F4, 14, in(11), 4); \
|
||||
sROUND1(B, C, D, A, F4, 15, in(10), 4); \
|
||||
sROUND1(A, B, C, D, F4, 14, in( 0), 4); \
|
||||
sROUND1(D, A, B, C, F4, 15, in( 8), 4); \
|
||||
sROUND1(C, D, A, B, F4, 9, in(12), 4); \
|
||||
sROUND1(B, C, D, A, F4, 8, in( 4), 4); \
|
||||
sROUND1(A, B, C, D, F4, 9, in(13), 4); \
|
||||
sROUND1(D, A, B, C, F4, 14, in( 3), 4); \
|
||||
sROUND1(C, D, A, B, F4, 5, in( 7), 4); \
|
||||
sROUND1(B, C, D, A, F4, 6, in(15), 4); \
|
||||
sROUND1(A, B, C, D, F4, 8, in(14), 4); \
|
||||
sROUND1(D, A, B, C, F4, 6, in( 5), 4); \
|
||||
sROUND1(C, D, A, B, F4, 5, in( 6), 4); \
|
||||
sROUND1(B, C, D, A, F4, 12, in( 2), 4); \
|
||||
\
|
||||
sROUND2(A, B, C, D, F4, 8, in( 5), 1); \
|
||||
sROUND2(D, A, B, C, F4, 9, in(14), 1); \
|
||||
sROUND2(C, D, A, B, F4, 9, in( 7), 1); \
|
||||
sROUND2(B, C, D, A, F4, 11, in( 0), 1); \
|
||||
sROUND2(A, B, C, D, F4, 13, in( 9), 1); \
|
||||
sROUND2(D, A, B, C, F4, 15, in( 2), 1); \
|
||||
sROUND2(C, D, A, B, F4, 15, in(11), 1); \
|
||||
sROUND2(B, C, D, A, F4, 5, in( 4), 1); \
|
||||
sROUND2(A, B, C, D, F4, 7, in(13), 1); \
|
||||
sROUND2(D, A, B, C, F4, 7, in( 6), 1); \
|
||||
sROUND2(C, D, A, B, F4, 8, in(15), 1); \
|
||||
sROUND2(B, C, D, A, F4, 11, in( 8), 1); \
|
||||
sROUND2(A, B, C, D, F4, 14, in( 1), 1); \
|
||||
sROUND2(D, A, B, C, F4, 14, in(10), 1); \
|
||||
sROUND2(C, D, A, B, F4, 12, in( 3), 1); \
|
||||
sROUND2(B, C, D, A, F4, 6, in(12), 1); \
|
||||
\
|
||||
sROUND2(A, B, C, D, F3, 9, in( 6), 2); \
|
||||
sROUND2(D, A, B, C, F3, 13, in(11), 2); \
|
||||
sROUND2(C, D, A, B, F3, 15, in( 3), 2); \
|
||||
sROUND2(B, C, D, A, F3, 7, in( 7), 2); \
|
||||
sROUND2(A, B, C, D, F3, 12, in( 0), 2); \
|
||||
sROUND2(D, A, B, C, F3, 8, in(13), 2); \
|
||||
sROUND2(C, D, A, B, F3, 9, in( 5), 2); \
|
||||
sROUND2(B, C, D, A, F3, 11, in(10), 2); \
|
||||
sROUND2(A, B, C, D, F3, 7, in(14), 2); \
|
||||
sROUND2(D, A, B, C, F3, 7, in(15), 2); \
|
||||
sROUND2(C, D, A, B, F3, 12, in( 8), 2); \
|
||||
sROUND2(B, C, D, A, F3, 7, in(12), 2); \
|
||||
sROUND2(A, B, C, D, F3, 6, in( 4), 2); \
|
||||
sROUND2(D, A, B, C, F3, 15, in( 9), 2); \
|
||||
sROUND2(C, D, A, B, F3, 13, in( 1), 2); \
|
||||
sROUND2(B, C, D, A, F3, 11, in( 2), 2); \
|
||||
\
|
||||
sROUND2(A, B, C, D, F2, 9, in(15), 3); \
|
||||
sROUND2(D, A, B, C, F2, 7, in( 5), 3); \
|
||||
sROUND2(C, D, A, B, F2, 15, in( 1), 3); \
|
||||
sROUND2(B, C, D, A, F2, 11, in( 3), 3); \
|
||||
sROUND2(A, B, C, D, F2, 8, in( 7), 3); \
|
||||
sROUND2(D, A, B, C, F2, 6, in(14), 3); \
|
||||
sROUND2(C, D, A, B, F2, 6, in( 6), 3); \
|
||||
sROUND2(B, C, D, A, F2, 14, in( 9), 3); \
|
||||
sROUND2(A, B, C, D, F2, 12, in(11), 3); \
|
||||
sROUND2(D, A, B, C, F2, 13, in( 8), 3); \
|
||||
sROUND2(C, D, A, B, F2, 5, in(12), 3); \
|
||||
sROUND2(B, C, D, A, F2, 14, in( 2), 3); \
|
||||
sROUND2(A, B, C, D, F2, 13, in(10), 3); \
|
||||
sROUND2(D, A, B, C, F2, 13, in( 0), 3); \
|
||||
sROUND2(C, D, A, B, F2, 7, in( 4), 3); \
|
||||
sROUND2(B, C, D, A, F2, 5, in(13), 3); \
|
||||
\
|
||||
sROUND2(A, B, C, D, F1, 15, in( 8), 4); \
|
||||
sROUND2(D, A, B, C, F1, 5, in( 6), 4); \
|
||||
sROUND2(C, D, A, B, F1, 8, in( 4), 4); \
|
||||
sROUND2(B, C, D, A, F1, 11, in( 1), 4); \
|
||||
sROUND2(A, B, C, D, F1, 14, in( 3), 4); \
|
||||
sROUND2(D, A, B, C, F1, 14, in(11), 4); \
|
||||
sROUND2(C, D, A, B, F1, 6, in(15), 4); \
|
||||
sROUND2(B, C, D, A, F1, 14, in( 0), 4); \
|
||||
sROUND2(A, B, C, D, F1, 6, in( 5), 4); \
|
||||
sROUND2(D, A, B, C, F1, 9, in(12), 4); \
|
||||
sROUND2(C, D, A, B, F1, 12, in( 2), 4); \
|
||||
sROUND2(B, C, D, A, F1, 9, in(13), 4); \
|
||||
sROUND2(A, B, C, D, F1, 12, in( 9), 4); \
|
||||
sROUND2(D, A, B, C, F1, 5, in( 7), 4); \
|
||||
sROUND2(C, D, A, B, F1, 15, in(10), 4); \
|
||||
sROUND2(B, C, D, A, F1, 8, in(14), 4); \
|
||||
\
|
||||
tmp = SPH_T32((h)[1] + C1 + D2); \
|
||||
(h)[1] = SPH_T32((h)[2] + D1 + A2); \
|
||||
(h)[2] = SPH_T32((h)[3] + A1 + B2); \
|
||||
(h)[3] = SPH_T32((h)[0] + B1 + C2); \
|
||||
(h)[0] = tmp; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* One round of RIPEMD-128. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
ripemd128_round(const unsigned char *data, sph_u32 r[5])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
|
||||
#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x)))
|
||||
|
||||
#else
|
||||
|
||||
sph_u32 X_var[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++)
|
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i);
|
||||
#define RIPEMD128_IN(x) X_var[x]
|
||||
|
||||
#endif
|
||||
RIPEMD128_ROUND_BODY(RIPEMD128_IN, r);
|
||||
#undef RIPEMD128_IN
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd128_init(void *cc)
|
||||
{
|
||||
sph_ripemd128_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, IV, sizeof sc->val);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN ripemd128_round
|
||||
#define HASH ripemd128
|
||||
#define LE32 1
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
#undef LE32
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd128_close(void *cc, void *dst)
|
||||
{
|
||||
ripemd128_close(cc, dst, 4);
|
||||
sph_ripemd128_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
|
||||
{
|
||||
#define RIPEMD128_IN(x) msg[x]
|
||||
RIPEMD128_ROUND_BODY(RIPEMD128_IN, val);
|
||||
#undef RIPEMD128_IN
|
||||
}
|
||||
|
||||
/* ===================================================================== */
|
||||
/*
|
||||
* RIPEMD-160.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Round constants for RIPEMD-160.
|
||||
*/
|
||||
#define K11 SPH_C32(0x00000000)
|
||||
#define K12 SPH_C32(0x5A827999)
|
||||
#define K13 SPH_C32(0x6ED9EBA1)
|
||||
#define K14 SPH_C32(0x8F1BBCDC)
|
||||
#define K15 SPH_C32(0xA953FD4E)
|
||||
|
||||
#define K21 SPH_C32(0x50A28BE6)
|
||||
#define K22 SPH_C32(0x5C4DD124)
|
||||
#define K23 SPH_C32(0x6D703EF3)
|
||||
#define K24 SPH_C32(0x7A6D76E9)
|
||||
#define K25 SPH_C32(0x00000000)
|
||||
|
||||
#define RR(a, b, c, d, e, f, s, r, k) do { \
|
||||
a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
|
||||
c = ROTL(c, 10); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||
|
||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||
|
||||
/*
|
||||
* This macro defines the body for a RIPEMD-160 compression function
|
||||
* implementation. The "in" parameter should evaluate, when applied to a
|
||||
* numerical input parameter from 0 to 15, to an expression which yields
|
||||
* the corresponding input block. The "h" parameter should evaluate to
|
||||
* an array or pointer expression designating the array of 5 words which
|
||||
* contains the input and output of the compression function.
|
||||
*/
|
||||
|
||||
#define RIPEMD160_ROUND_BODY(in, h) do { \
|
||||
sph_u32 A1, B1, C1, D1, E1; \
|
||||
sph_u32 A2, B2, C2, D2, E2; \
|
||||
sph_u32 tmp; \
|
||||
\
|
||||
A1 = A2 = (h)[0]; \
|
||||
B1 = B2 = (h)[1]; \
|
||||
C1 = C2 = (h)[2]; \
|
||||
D1 = D2 = (h)[3]; \
|
||||
E1 = E2 = (h)[4]; \
|
||||
\
|
||||
ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \
|
||||
ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \
|
||||
ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \
|
||||
ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \
|
||||
ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \
|
||||
ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \
|
||||
\
|
||||
ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \
|
||||
ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \
|
||||
ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \
|
||||
ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \
|
||||
ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \
|
||||
ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \
|
||||
\
|
||||
ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \
|
||||
ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \
|
||||
ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \
|
||||
ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \
|
||||
ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \
|
||||
ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \
|
||||
\
|
||||
ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \
|
||||
ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \
|
||||
ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \
|
||||
ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \
|
||||
ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \
|
||||
ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \
|
||||
\
|
||||
ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \
|
||||
ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \
|
||||
ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \
|
||||
ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \
|
||||
ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \
|
||||
ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \
|
||||
\
|
||||
ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \
|
||||
ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \
|
||||
ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \
|
||||
ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \
|
||||
ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \
|
||||
ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \
|
||||
\
|
||||
ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \
|
||||
ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \
|
||||
ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \
|
||||
ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \
|
||||
ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \
|
||||
ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \
|
||||
\
|
||||
ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \
|
||||
ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \
|
||||
ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \
|
||||
ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \
|
||||
ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \
|
||||
ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \
|
||||
\
|
||||
ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \
|
||||
ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \
|
||||
ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \
|
||||
ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \
|
||||
ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \
|
||||
ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \
|
||||
\
|
||||
ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \
|
||||
ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \
|
||||
ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \
|
||||
ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \
|
||||
ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \
|
||||
ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \
|
||||
\
|
||||
tmp = SPH_T32((h)[1] + C1 + D2); \
|
||||
(h)[1] = SPH_T32((h)[2] + D1 + E2); \
|
||||
(h)[2] = SPH_T32((h)[3] + E1 + A2); \
|
||||
(h)[3] = SPH_T32((h)[4] + A1 + B2); \
|
||||
(h)[4] = SPH_T32((h)[0] + B1 + C2); \
|
||||
(h)[0] = tmp; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* One round of RIPEMD-160. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
ripemd160_round(const unsigned char *data, sph_u32 r[5])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
|
||||
#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x)))
|
||||
|
||||
#else
|
||||
|
||||
sph_u32 X_var[16];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++)
|
||||
X_var[i] = sph_dec32le_aligned(data + 4 * i);
|
||||
#define RIPEMD160_IN(x) X_var[x]
|
||||
|
||||
#endif
|
||||
RIPEMD160_ROUND_BODY(RIPEMD160_IN, r);
|
||||
#undef RIPEMD160_IN
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd160_init(void *cc)
|
||||
{
|
||||
sph_ripemd160_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, IV, sizeof sc->val);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN ripemd160_round
|
||||
#define HASH ripemd160
|
||||
#define LE32 1
|
||||
#include "md_helper.c"
|
||||
#undef RFUN
|
||||
#undef HASH
|
||||
#undef LE32
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd160_close(void *cc, void *dst)
|
||||
{
|
||||
ripemd160_close(cc, dst, 5);
|
||||
sph_ripemd160_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_ripemd.h */
|
||||
void
|
||||
sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5])
|
||||
{
|
||||
#define RIPEMD160_IN(x) msg[x]
|
||||
RIPEMD160_ROUND_BODY(RIPEMD160_IN, val);
|
||||
#undef RIPEMD160_IN
|
||||
}
|
273
sph/sph_ripemd.h
Normal file
273
sph/sph_ripemd.h
Normal file
@ -0,0 +1,273 @@
|
||||
/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* RIPEMD, RIPEMD-128 and RIPEMD-160 interface.
|
||||
*
|
||||
* RIPEMD was first described in: Research and Development in Advanced
|
||||
* Communication Technologies in Europe, "RIPE Integrity Primitives:
|
||||
* Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE,
|
||||
* June 1992.
|
||||
*
|
||||
* A new, strengthened version, dubbed RIPEMD-160, was published in: H.
|
||||
* Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened
|
||||
* version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039,
|
||||
* Springer (1996), pp. 71--82.
|
||||
*
|
||||
* This article describes both RIPEMD-160, with a 160-bit output, and a
|
||||
* reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128
|
||||
* was meant as a "drop-in" replacement for any hash function with 128-bit
|
||||
* output, especially the original RIPEMD.
|
||||
*
|
||||
* @warning Collisions, and an efficient method to build other collisions,
|
||||
* have been published for the original RIPEMD, which is thus considered as
|
||||
* cryptographically broken. It is also very rarely encountered, and there
|
||||
* seems to exist no free description or implementation of RIPEMD (except
|
||||
* the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160
|
||||
* seem as secure as their output length allows.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_ripemd.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_RIPEMD_H__
|
||||
#define SPH_RIPEMD_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD.
|
||||
*/
|
||||
#define SPH_SIZE_ripemd 128
|
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD-128.
|
||||
*/
|
||||
#define SPH_SIZE_ripemd128 128
|
||||
|
||||
/**
|
||||
* Output size (in bits) for RIPEMD-160.
|
||||
*/
|
||||
#define SPH_SIZE_ripemd160 160
|
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a RIPEMD computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running RIPEMD computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[4];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_ripemd_context;
|
||||
|
||||
/**
|
||||
* Initialize a RIPEMD context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the RIPEMD context (pointer to
|
||||
* a <code>sph_ripemd_context</code>)
|
||||
*/
|
||||
void sph_ripemd_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the RIPEMD context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_ripemd(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (16 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the RIPEMD context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_ripemd_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the RIPEMD compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks,
|
||||
* as numerical values (hence after the little-endian decoding). The
|
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 128-bit input and output
|
||||
*/
|
||||
void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
||||
|
||||
/* ===================================================================== */
|
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD-128 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a RIPEMD-128 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running RIPEMD-128 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[4];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_ripemd128_context;
|
||||
|
||||
/**
|
||||
* Initialize a RIPEMD-128 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the RIPEMD-128 context (pointer to
|
||||
* a <code>sph_ripemd128_context</code>)
|
||||
*/
|
||||
void sph_ripemd128_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the RIPEMD-128 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_ripemd128(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD-128 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (16 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the RIPEMD-128 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_ripemd128_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the RIPEMD-128 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks,
|
||||
* as numerical values (hence after the little-endian decoding). The
|
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 128-bit input and output
|
||||
*/
|
||||
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
|
||||
|
||||
/* ===================================================================== */
|
||||
|
||||
/**
|
||||
* This structure is a context for RIPEMD-160 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a RIPEMD-160 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running RIPEMD-160 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[5];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_ripemd160_context;
|
||||
|
||||
/**
|
||||
* Initialize a RIPEMD-160 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the RIPEMD-160 context (pointer to
|
||||
* a <code>sph_ripemd160_context</code>)
|
||||
*/
|
||||
void sph_ripemd160_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the RIPEMD-160 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_ripemd160(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current RIPEMD-160 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (20 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the RIPEMD-160 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_ripemd160_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the RIPEMD-160 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks,
|
||||
* as numerical values (hence after the little-endian decoding). The
|
||||
* <code>val</code> parameter contains the 5 32-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 160-bit input and output
|
||||
*/
|
||||
void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]);
|
||||
|
||||
#endif
|
691
sph/sph_sha2.c
Normal file
691
sph/sph_sha2.c
Normal file
@ -0,0 +1,691 @@
|
||||
/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* SHA-224 / SHA-256 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_sha2.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
|
||||
#define SPH_SMALL_FOOTPRINT_SHA2 1
|
||||
#endif
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
|
||||
|
||||
#define ROTR SPH_ROTR32
|
||||
|
||||
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
|
||||
#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
|
||||
|
||||
static const sph_u32 H224[8] = {
|
||||
SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
|
||||
SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
|
||||
SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
|
||||
};
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
|
||||
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
/*
|
||||
* The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
|
||||
* compression function implementation. The "in" parameter should
|
||||
* evaluate, when applied to a numerical input parameter from 0 to 15,
|
||||
* to an expression which yields the corresponding input block. The "r"
|
||||
* parameter should evaluate to an array or pointer expression
|
||||
* designating the array of 8 words which contains the input and output
|
||||
* of the compression function.
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
|
||||
static const sph_u32 K[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
|
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
|
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
|
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
|
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
|
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
|
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
|
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
|
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
|
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
|
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
|
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
|
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
|
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
|
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
|
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
|
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
|
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
|
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
|
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
|
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
|
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
|
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
|
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
|
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \
|
||||
W[pc] = in(pc); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_MEXP2(in, pc) do { \
|
||||
W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
|
||||
+ W[((pc) - 7) & 0x0F] \
|
||||
+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \
|
||||
sph_u32 t1, t2; \
|
||||
SHA2_MEXP ## n(in, pc); \
|
||||
t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
|
||||
+ K[pcount + (pc)] + W[(pc) & 0x0F]); \
|
||||
t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
|
||||
d = SPH_T32(d + t1); \
|
||||
h = SPH_T32(t1 + t2); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
|
||||
#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H; \
|
||||
sph_u32 W[16]; \
|
||||
unsigned pcount; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
pcount = 0; \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
|
||||
for (pcount = 16; pcount < 64; pcount += 16) { \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
|
||||
} \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
|
||||
sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
|
||||
sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
|
||||
int i; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
W00 = in(0); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x428A2F98) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = in(1); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x71374491) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = in(2); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB5C0FBCF) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = in(3); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xE9B5DBA5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = in(4); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x3956C25B) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = in(5); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x59F111F1) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = in(6); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x923F82A4) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = in(7); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xAB1C5ED5) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = in(8); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xD807AA98) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = in(9); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x12835B01) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = in(10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x243185BE) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = in(11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x550C7DC3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = in(12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x72BE5D74) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = in(13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x80DEB1FE) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = in(14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x9BDC06A7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = in(15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC19BF174) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xE49B69C1) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xEFBE4786) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x0FC19DC6) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x240CA1CC) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x2DE92C6F) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4A7484AA) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5CB0A9DC) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x76F988DA) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x983E5152) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA831C66D) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB00327C8) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xBF597FC7) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xC6E00BF3) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD5A79147) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x06CA6351) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x14292967) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x27B70A85) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x2E1B2138) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x4D2C6DFC) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x53380D13) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x650A7354) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x766A0ABB) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x81C2C92E) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x92722C85) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xA2BFE8A1) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA81A664B) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xC24B8B70) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xC76C51A3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xD192E819) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD6990624) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xF40E3585) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x106AA070) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x19A4C116) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x1E376C08) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x2748774C) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x34B0BCB5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x391C0CB3) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4ED8AA4A) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5B9CCA4F) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x682E6FF3) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x748F82EE) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x78A5636F) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x84C87814) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x8CC70208) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x90BEFFFA) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xA4506CEB) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xBEF9A3F7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC67178F2) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
sha2_round(const unsigned char *data, sph_u32 r[8])
|
||||
{
|
||||
#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x)))
|
||||
SHA2_ROUND_BODY(SHA2_IN, r);
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_init(void *cc)
|
||||
{
|
||||
sph_sha224_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H224, sizeof H224);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_init(void *cc)
|
||||
{
|
||||
sph_sha256_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H256, sizeof H256);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN sha2_round
|
||||
#define HASH sha224
|
||||
#define BE32 1
|
||||
#include "md_helper.c"
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 7);
|
||||
sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 8);
|
||||
sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 8);
|
||||
sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
{
|
||||
#define SHA2_IN(x) msg[x]
|
||||
SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
#undef SHA2_IN
|
||||
}
|
43
util.cpp
43
util.cpp
@ -1421,28 +1421,39 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
|
||||
|
||||
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
{
|
||||
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime, *nreward;
|
||||
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime;
|
||||
const char *claim = NULL, *nreward = NULL;
|
||||
size_t coinb1_size, coinb2_size;
|
||||
bool clean, ret = false;
|
||||
int merkle_count, i;
|
||||
int merkle_count, i, p=0;
|
||||
json_t *merkle_arr;
|
||||
uchar **merkle = NULL;
|
||||
// uchar(*merkle_tree)[32] = { 0 };
|
||||
int ntime;
|
||||
char algo[64] = { 0 };
|
||||
get_currentalgo(algo, sizeof(algo));
|
||||
bool has_claim = !strcasecmp(algo, "lbry");
|
||||
|
||||
job_id = json_string_value(json_array_get(params, 0));
|
||||
prevhash = json_string_value(json_array_get(params, 1));
|
||||
coinb1 = json_string_value(json_array_get(params, 2));
|
||||
coinb2 = json_string_value(json_array_get(params, 3));
|
||||
merkle_arr = json_array_get(params, 4);
|
||||
job_id = json_string_value(json_array_get(params, p++));
|
||||
prevhash = json_string_value(json_array_get(params, p++));
|
||||
if (has_claim) {
|
||||
claim = json_string_value(json_array_get(params, p++));
|
||||
if (!claim || strlen(claim) != 64) {
|
||||
applog(LOG_ERR, "Stratum notify: invalid claim parameter");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
coinb1 = json_string_value(json_array_get(params, p++));
|
||||
coinb2 = json_string_value(json_array_get(params, p++));
|
||||
merkle_arr = json_array_get(params, p++);
|
||||
if (!merkle_arr || !json_is_array(merkle_arr))
|
||||
goto out;
|
||||
merkle_count = (int) json_array_size(merkle_arr);
|
||||
version = json_string_value(json_array_get(params, 5));
|
||||
nbits = json_string_value(json_array_get(params, 6));
|
||||
stime = json_string_value(json_array_get(params, 7));
|
||||
clean = json_is_true(json_array_get(params, 8));
|
||||
nreward = json_string_value(json_array_get(params, 9));
|
||||
version = json_string_value(json_array_get(params, p++));
|
||||
nbits = json_string_value(json_array_get(params, p++));
|
||||
stime = json_string_value(json_array_get(params, p++));
|
||||
clean = json_is_true(json_array_get(params, p)); p++;
|
||||
nreward = json_string_value(json_array_get(params, p++));
|
||||
|
||||
if (!job_id || !prevhash || !coinb1 || !coinb2 || !version || !nbits || !stime ||
|
||||
strlen(prevhash) != 64 || strlen(version) != 8 ||
|
||||
@ -1494,6 +1505,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
free(sctx->job.job_id);
|
||||
sctx->job.job_id = strdup(job_id);
|
||||
hex2bin(sctx->job.prevhash, prevhash, 32);
|
||||
if (has_claim) hex2bin(sctx->job.claim, claim, 32);
|
||||
|
||||
sctx->job.height = getblocheight(sctx);
|
||||
|
||||
@ -2079,7 +2091,8 @@ void do_gpu_tests(void)
|
||||
//scanhash_scrypt_jane(0, &work, NULL, 1, &done, &tv, &tv);
|
||||
|
||||
memset(work.data, 0, sizeof(work.data));
|
||||
scanhash_decred(0, &work, 1, &done);
|
||||
work.data[0] = 0;
|
||||
scanhash_lbry(0, &work, 1, &done);
|
||||
|
||||
free(work_restart);
|
||||
work_restart = NULL;
|
||||
@ -2142,6 +2155,10 @@ void print_hash_tests(void)
|
||||
keccak256_hash(&hash[0], &buf[0]);
|
||||
printpfx("keccak", hash);
|
||||
|
||||
memset(buf, 0, 128);
|
||||
lbry_hash(&hash[0], &buf[0]);
|
||||
printpfx("lbry", hash);
|
||||
|
||||
luffa_hash(&hash[0], &buf[0]);
|
||||
printpfx("luffa", hash);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user