merged sha256d/ripemd
drop ripemd.cu and remove unused kernels
This commit is contained in:
parent
940c1b3a2f
commit
223077d11a
@ -51,7 +51,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
|
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
|
||||||
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
||||||
sph/ripemd.c sph/sph_sha2.c \
|
sph/ripemd.c sph/sph_sha2.c \
|
||||||
lbry/lbry.cu lbry/cuda_ripemd160.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu \
|
lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu \
|
||||||
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
|
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
|
||||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
|
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
|
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
||||||
@ -424,7 +424,6 @@
|
|||||||
<MaxRegCount>92</MaxRegCount>
|
<MaxRegCount>92</MaxRegCount>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
<CudaCompile Include="Algo256\cuda_skein256.cu" />
|
<CudaCompile Include="Algo256\cuda_skein256.cu" />
|
||||||
<CudaCompile Include="lbry\cuda_ripemd160.cu" />
|
|
||||||
<CudaCompile Include="lbry\cuda_sha256_lbry.cu" />
|
<CudaCompile Include="lbry\cuda_sha256_lbry.cu" />
|
||||||
<CudaCompile Include="lbry\cuda_sha512_lbry.cu" />
|
<CudaCompile Include="lbry\cuda_sha512_lbry.cu" />
|
||||||
<CudaCompile Include="lbry\lbry.cu" />
|
<CudaCompile Include="lbry\lbry.cu" />
|
||||||
@ -530,7 +529,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
|
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
|
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
||||||
|
@ -730,9 +730,6 @@
|
|||||||
<CudaCompile Include="lbry\cuda_sha512_lbry.cu">
|
<CudaCompile Include="lbry\cuda_sha512_lbry.cu">
|
||||||
<Filter>Source Files\CUDA\lbry</Filter>
|
<Filter>Source Files\CUDA\lbry</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
<CudaCompile Include="lbry\cuda_ripemd160.cu">
|
|
||||||
<Filter>Source Files\CUDA\lbry</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="lbry\lbry.cu">
|
<CudaCompile Include="lbry\lbry.cu">
|
||||||
<Filter>Source Files\CUDA\lbry</Filter>
|
<Filter>Source Files\CUDA\lbry</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
@ -1,402 +0,0 @@
|
|||||||
/*
|
|
||||||
* ripemd-160 kernel implementation.
|
|
||||||
*
|
|
||||||
* ==========================(LICENSE BEGIN)============================
|
|
||||||
*
|
|
||||||
* Copyright (c) 2014, 2016 djm34, tpruvot
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
* a copy of this software and associated documentation files (the
|
|
||||||
* "Software"), to deal in the Software without restriction, including
|
|
||||||
* without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
* permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
* the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*
|
|
||||||
* ===========================(LICENSE END)=============================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <memory.h>
|
|
||||||
|
|
||||||
#include <cuda_helper.h>
|
|
||||||
|
|
||||||
static __constant__ uint32_t c_IV[5] = {
|
|
||||||
0x67452301u, 0xEFCDAB89u, 0x98BADCFEu, 0x10325476u, 0xC3D2E1F0u
|
|
||||||
};
|
|
||||||
|
|
||||||
__device__ __forceinline__
|
|
||||||
uint32_t xor3b(const uint32_t a, const uint32_t b, const uint32_t c) {
|
|
||||||
uint32_t result;
|
|
||||||
#if __CUDA_ARCH__ >= 500 && CUDA_VERSION >= 7050
|
|
||||||
asm ("lop3.b32 %0, %1, %2, %3, 0x96; // xor3b" //0x96 = 0xF0 ^ 0xCC ^ 0xAA
|
|
||||||
: "=r"(result) : "r"(a), "r"(b),"r"(c));
|
|
||||||
#else
|
|
||||||
result = a^b^c;
|
|
||||||
#endif
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
//__host__
|
|
||||||
//uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c) {
|
|
||||||
// return c ^ (a | !b);
|
|
||||||
//}
|
|
||||||
|
|
||||||
__forceinline__ __device__
|
|
||||||
uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c)
|
|
||||||
{
|
|
||||||
uint64_t result;
|
|
||||||
asm("{ .reg .u64 m,n; // xornot64\n\t"
|
|
||||||
"not.b64 m,%2; \n\t"
|
|
||||||
"or.b64 n, %1,m;\n\t"
|
|
||||||
"xor.b64 %0, n,%3;\n\t"
|
|
||||||
"}\n\t"
|
|
||||||
: "=l"(result) : "l"(a), "l"(b), "l"(c));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
//__host__
|
|
||||||
//uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c) {
|
|
||||||
// return a ^ (b | !c);
|
|
||||||
//}
|
|
||||||
|
|
||||||
__device__ __forceinline__
|
|
||||||
uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c)
|
|
||||||
{
|
|
||||||
uint64_t result;
|
|
||||||
asm("{ .reg .u64 m,n; // xornt64\n\t"
|
|
||||||
"not.b64 m,%3; \n\t"
|
|
||||||
"or.b64 n, %2,m;\n\t"
|
|
||||||
"xor.b64 %0, %1,n;\n\t"
|
|
||||||
"}\n\t"
|
|
||||||
: "=l"(result) : "l"(a), "l"(b), "l"(c));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Round functions for RIPEMD-128 and RIPEMD-160.
|
|
||||||
*/
|
|
||||||
#if 1
|
|
||||||
#define F1(x, y, z) ((x) ^ (y) ^ (z))
|
|
||||||
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
|
|
||||||
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
|
|
||||||
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
|
|
||||||
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
|
|
||||||
#else
|
|
||||||
#define F1(x, y, z) xor3b(x,y,z)
|
|
||||||
#define F2(x, y, z) xandx(x,y,z)
|
|
||||||
#define F3(x, y, z) xornot64(x,y,z)
|
|
||||||
#define F4(x, y, z) xandx(z,x,y)
|
|
||||||
#define F5(x, y, z) xornt64(x,y,z)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Round constants for RIPEMD-160.
|
|
||||||
*/
|
|
||||||
#define K11 0x00000000u
|
|
||||||
#define K12 0x5A827999u
|
|
||||||
#define K13 0x6ED9EBA1u
|
|
||||||
#define K14 0x8F1BBCDCu
|
|
||||||
#define K15 0xA953FD4Eu
|
|
||||||
|
|
||||||
#define K21 0x50A28BE6u
|
|
||||||
#define K22 0x5C4DD124u
|
|
||||||
#define K23 0x6D703EF3u
|
|
||||||
#define K24 0x7A6D76E9u
|
|
||||||
#define K25 0x00000000u
|
|
||||||
|
|
||||||
#define RR(a, b, c, d, e, f, s, r, k) { \
|
|
||||||
a = SPH_T32(ROTL32(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
|
|
||||||
c = ROTL32(c, 10); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
|
||||||
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
|
||||||
|
|
||||||
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
|
||||||
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
|
||||||
|
|
||||||
#define RIPEMD160_ROUND_BODY(in, h) { \
|
|
||||||
uint32_t A1, B1, C1, D1, E1; \
|
|
||||||
uint32_t A2, B2, C2, D2, E2; \
|
|
||||||
uint32_t tmp; \
|
|
||||||
\
|
|
||||||
A1 = A2 = h[0]; \
|
|
||||||
B1 = B2 = h[1]; \
|
|
||||||
C1 = C2 = h[2]; \
|
|
||||||
D1 = D2 = h[3]; \
|
|
||||||
E1 = E2 = h[4]; \
|
|
||||||
\
|
|
||||||
ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \
|
|
||||||
ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \
|
|
||||||
ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \
|
|
||||||
ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \
|
|
||||||
ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \
|
|
||||||
ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \
|
|
||||||
ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \
|
|
||||||
ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \
|
|
||||||
ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \
|
|
||||||
ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \
|
|
||||||
ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \
|
|
||||||
ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \
|
|
||||||
ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \
|
|
||||||
ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \
|
|
||||||
ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \
|
|
||||||
ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \
|
|
||||||
\
|
|
||||||
ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \
|
|
||||||
ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \
|
|
||||||
ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \
|
|
||||||
ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \
|
|
||||||
ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \
|
|
||||||
ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \
|
|
||||||
ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \
|
|
||||||
ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \
|
|
||||||
ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \
|
|
||||||
ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \
|
|
||||||
ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \
|
|
||||||
ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \
|
|
||||||
ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \
|
|
||||||
ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \
|
|
||||||
ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \
|
|
||||||
ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \
|
|
||||||
\
|
|
||||||
ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \
|
|
||||||
ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \
|
|
||||||
ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \
|
|
||||||
ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \
|
|
||||||
ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \
|
|
||||||
ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \
|
|
||||||
ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \
|
|
||||||
ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
|
||||||
ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \
|
|
||||||
ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
|
||||||
ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \
|
|
||||||
ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
|
||||||
ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \
|
|
||||||
ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \
|
|
||||||
ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \
|
|
||||||
ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \
|
|
||||||
\
|
|
||||||
ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \
|
|
||||||
ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \
|
|
||||||
ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \
|
|
||||||
ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \
|
|
||||||
ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \
|
|
||||||
ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \
|
|
||||||
ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \
|
|
||||||
ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \
|
|
||||||
ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \
|
|
||||||
ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \
|
|
||||||
ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \
|
|
||||||
ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \
|
|
||||||
ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \
|
|
||||||
ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \
|
|
||||||
ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \
|
|
||||||
ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \
|
|
||||||
\
|
|
||||||
ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \
|
|
||||||
ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \
|
|
||||||
ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \
|
|
||||||
ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \
|
|
||||||
ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \
|
|
||||||
ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \
|
|
||||||
ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \
|
|
||||||
ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \
|
|
||||||
ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \
|
|
||||||
ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \
|
|
||||||
ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \
|
|
||||||
ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \
|
|
||||||
ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \
|
|
||||||
ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \
|
|
||||||
ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \
|
|
||||||
ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \
|
|
||||||
\
|
|
||||||
ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \
|
|
||||||
ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \
|
|
||||||
ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \
|
|
||||||
ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \
|
|
||||||
ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \
|
|
||||||
ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \
|
|
||||||
ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \
|
|
||||||
ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \
|
|
||||||
ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \
|
|
||||||
ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \
|
|
||||||
ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \
|
|
||||||
ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \
|
|
||||||
ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \
|
|
||||||
ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \
|
|
||||||
ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \
|
|
||||||
ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \
|
|
||||||
\
|
|
||||||
ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \
|
|
||||||
ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \
|
|
||||||
ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \
|
|
||||||
ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \
|
|
||||||
ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \
|
|
||||||
ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \
|
|
||||||
ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \
|
|
||||||
ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \
|
|
||||||
ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \
|
|
||||||
ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \
|
|
||||||
ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \
|
|
||||||
ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \
|
|
||||||
ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \
|
|
||||||
ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \
|
|
||||||
ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \
|
|
||||||
ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \
|
|
||||||
\
|
|
||||||
ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \
|
|
||||||
ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \
|
|
||||||
ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
|
||||||
ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \
|
|
||||||
ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
|
||||||
ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \
|
|
||||||
ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
|
||||||
ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \
|
|
||||||
ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \
|
|
||||||
ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \
|
|
||||||
ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \
|
|
||||||
ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \
|
|
||||||
ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \
|
|
||||||
ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \
|
|
||||||
ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \
|
|
||||||
ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \
|
|
||||||
\
|
|
||||||
ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \
|
|
||||||
ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \
|
|
||||||
ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \
|
|
||||||
ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \
|
|
||||||
ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \
|
|
||||||
ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \
|
|
||||||
ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \
|
|
||||||
ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \
|
|
||||||
ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \
|
|
||||||
ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \
|
|
||||||
ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \
|
|
||||||
ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \
|
|
||||||
ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \
|
|
||||||
ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \
|
|
||||||
ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \
|
|
||||||
ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \
|
|
||||||
\
|
|
||||||
ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \
|
|
||||||
ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \
|
|
||||||
ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \
|
|
||||||
ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \
|
|
||||||
ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \
|
|
||||||
ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \
|
|
||||||
ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \
|
|
||||||
ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \
|
|
||||||
ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \
|
|
||||||
ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \
|
|
||||||
ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \
|
|
||||||
ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \
|
|
||||||
ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \
|
|
||||||
ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \
|
|
||||||
ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \
|
|
||||||
ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \
|
|
||||||
\
|
|
||||||
tmp = (h[1] + C1 + D2); \
|
|
||||||
h[1] = (h[2] + D1 + E2); \
|
|
||||||
h[2] = (h[3] + E1 + A2); \
|
|
||||||
h[3] = (h[4] + A1 + B2); \
|
|
||||||
h[4] = (h[0] + B1 + C2); \
|
|
||||||
h[0] = tmp; \
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__
|
|
||||||
void lbry_ripemd160_gpu_hash_32x2(const uint32_t threads, uint64_t *g_hash)
|
|
||||||
{
|
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
|
||||||
if (thread < threads)
|
|
||||||
{
|
|
||||||
uint32_t *hash = (uint32_t*) (&g_hash[thread * 8U]);
|
|
||||||
|
|
||||||
uint32_t in[16];
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<8; i++)
|
|
||||||
in[i] = (hash[i]);
|
|
||||||
in[8] = 0x80;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=9;i<16;i++) in[i] = 0;
|
|
||||||
|
|
||||||
in[14] = 0x100; // size in bits
|
|
||||||
|
|
||||||
uint32_t h[5];
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++)
|
|
||||||
h[i] = c_IV[i];
|
|
||||||
|
|
||||||
RIPEMD160_ROUND_BODY(in, h);
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++)
|
|
||||||
hash[i] = h[i];
|
|
||||||
|
|
||||||
#ifdef PAD_ZEROS
|
|
||||||
// 20 bytes hash on 32 output space
|
|
||||||
hash[5] = 0;
|
|
||||||
hash[6] = 0;
|
|
||||||
hash[7] = 0;
|
|
||||||
#endif
|
|
||||||
// second 32 bytes block hash
|
|
||||||
hash += 8;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<8; i++)
|
|
||||||
in[i] = (hash[i]);
|
|
||||||
in[8] = 0x80;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=9;i<16;i++) in[i] = 0;
|
|
||||||
|
|
||||||
in[14] = 0x100; // size in bits
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++)
|
|
||||||
h[i] = c_IV[i];
|
|
||||||
|
|
||||||
RIPEMD160_ROUND_BODY(in, h);
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++)
|
|
||||||
hash[i] = h[i];
|
|
||||||
|
|
||||||
#ifdef PAD_ZEROS
|
|
||||||
// 20 bytes hash on 32 output space
|
|
||||||
hash[5] = 0;
|
|
||||||
hash[6] = 0;
|
|
||||||
hash[7] = 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__host__
|
|
||||||
void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
const uint32_t threadsperblock = 128;
|
|
||||||
|
|
||||||
dim3 grid(threads/threadsperblock);
|
|
||||||
dim3 block(threadsperblock);
|
|
||||||
|
|
||||||
lbry_ripemd160_gpu_hash_32x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) g_Hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
void lbry_ripemd160_init(int thr_id)
|
|
||||||
{
|
|
||||||
//cudaMemcpyToSymbol(c_IV, IV, sizeof(IV), 0, cudaMemcpyHostToDevice);
|
|
||||||
}
|
|
@ -21,6 +21,10 @@ static __thread uint32_t* d_resNonces;
|
|||||||
__constant__ static uint32_t __align__(8) c_target[2];
|
__constant__ static uint32_t __align__(8) c_target[2];
|
||||||
__device__ uint64_t d_target[1];
|
__device__ uint64_t d_target[1];
|
||||||
|
|
||||||
|
#ifdef __INTELLISENSE__
|
||||||
|
#define atomicExch(p,y) y
|
||||||
|
#endif
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
static const uint32_t cpu_H256[8] = {
|
static const uint32_t cpu_H256[8] = {
|
||||||
@ -324,8 +328,8 @@ uint64_t cuda_swab32ll(uint64_t x) {
|
|||||||
return MAKE_ULONGLONG(cuda_swab32(_LODWORD(x)), cuda_swab32(_HIDWORD(x)));
|
return MAKE_ULONGLONG(cuda_swab32(_LODWORD(x)), cuda_swab32(_HIDWORD(x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
__global__
|
__global__
|
||||||
/*__launch_bounds__(256,3)*/
|
|
||||||
void lbry_sha256_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
void lbry_sha256_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
||||||
{
|
{
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
@ -359,7 +363,6 @@ void lbry_sha256_gpu_hash_112(const uint32_t threads, const uint32_t startNonce,
|
|||||||
}
|
}
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
/*__launch_bounds__(256,3)*/
|
|
||||||
void lbry_sha256_gpu_hash_32(uint32_t threads, uint64_t *Hash512)
|
void lbry_sha256_gpu_hash_32(uint32_t threads, uint64_t *Hash512)
|
||||||
{
|
{
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
@ -396,8 +399,31 @@ void lbry_sha256_gpu_hash_32(uint32_t threads, uint64_t *Hash512)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
const int threadsperblock = 256;
|
||||||
|
|
||||||
|
dim3 grid(threads/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
|
lbry_sha256_gpu_hash_112 <<<grid, block, 0, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
||||||
|
cudaGetLastError();
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
const int threadsperblock = 256;
|
||||||
|
|
||||||
|
dim3 grid(threads/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
|
lbry_sha256_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
/*__launch_bounds__(256,3)*/
|
|
||||||
void lbry_sha256d_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
void lbry_sha256d_gpu_hash_112(const uint32_t threads, const uint32_t startNonce, const bool swabNonce, uint64_t *outputHash)
|
||||||
{
|
{
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
@ -447,106 +473,15 @@ void lbry_sha256d_gpu_hash_112(const uint32_t threads, const uint32_t startNonce
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__
|
__host__
|
||||||
/*__launch_bounds__(256,3)*/
|
void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
||||||
void lbry_sha256_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512)
|
|
||||||
{
|
{
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
const int threadsperblock = 256;
|
||||||
if (thread < threads)
|
|
||||||
{
|
|
||||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
|
||||||
|
|
||||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
dim3 grid(threads/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
uint32_t dat[16];
|
lbry_sha256d_gpu_hash_112 <<<grid, block, 64*4, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]);
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]);
|
|
||||||
dat[10] = 0x80000000;
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=11;i<15;i++) dat[i] = 0;
|
|
||||||
dat[15] = 0x140;
|
|
||||||
|
|
||||||
sha256_round_body(dat, buf, c_K);
|
|
||||||
|
|
||||||
// output
|
|
||||||
uint2* output = (uint2*) input;
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<4;i++) {
|
|
||||||
//output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
|
||||||
output[i] = vectorize(((uint64_t*)buf)[i]);
|
|
||||||
}
|
|
||||||
#ifdef PAD_ZEROS
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=4; i<8; i++) output[i] = vectorize(0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__
|
|
||||||
/*__launch_bounds__(256,3)*/
|
|
||||||
void lbry_sha256d_gpu_hash_20x2(uint32_t threads, uint64_t *Hash512)
|
|
||||||
{
|
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
|
||||||
extern __shared__ uint32_t s_K[];
|
|
||||||
if (threadIdx.x < 64U) s_K[threadIdx.x] = c_K[threadIdx.x];
|
|
||||||
if (thread < threads)
|
|
||||||
{
|
|
||||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
|
||||||
|
|
||||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
|
||||||
|
|
||||||
uint32_t dat[16];
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++) dat[i] = cuda_swab32(input[i]);
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<5; i++) dat[i+5] = cuda_swab32(input[i+8]);
|
|
||||||
dat[10] = 0x80000000;
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=11;i<15;i++) dat[i] = 0;
|
|
||||||
dat[15] = 0x140;
|
|
||||||
|
|
||||||
sha256_round_body(dat, buf, s_K);
|
|
||||||
|
|
||||||
// second sha256
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0; i<8; i++) dat[i] = buf[i];
|
|
||||||
dat[8] = 0x80000000;
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=9; i<15; i++) dat[i] = 0;
|
|
||||||
dat[15] = 0x100;
|
|
||||||
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
|
||||||
|
|
||||||
sha256_round_body(dat, buf, s_K);
|
|
||||||
|
|
||||||
// output
|
|
||||||
uint2* output = (uint2*) input;
|
|
||||||
|
|
||||||
#ifdef FULL_HASH
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<4;i++) {
|
|
||||||
output[i] = vectorize(cuda_swab32ll(((uint64_t*)buf)[i]));
|
|
||||||
//output[i] = vectorize(((uint64_t*)buf)[i]);
|
|
||||||
}
|
|
||||||
# ifdef PAD_ZEROS
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=4; i<8; i++) output[i] = vectorize(0);
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
//input[6] = cuda_swab32(buf[6]);
|
|
||||||
//input[7] = cuda_swab32(buf[7]);
|
|
||||||
output[3] = vectorize(cuda_swab32ll(((uint64_t*)buf)[3]));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
@ -578,60 +513,243 @@ void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget)
|
|||||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_target, &ptarget[6], sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
// ------------------------------------------------------------------------------------------
|
||||||
void lbry_sha256_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
const int threadsperblock = 256;
|
|
||||||
|
|
||||||
dim3 grid(threads/threadsperblock);
|
|
||||||
dim3 block(threadsperblock);
|
|
||||||
|
|
||||||
lbry_sha256_gpu_hash_112 <<<grid, block, 0, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
static __constant__ uint32_t c_IV[5] = {
|
||||||
cudaGetLastError();
|
0x67452301u, 0xEFCDAB89u, 0x98BADCFEu, 0x10325476u, 0xC3D2E1F0u
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Round functions for RIPEMD-128 and RIPEMD-160.
|
||||||
|
*/
|
||||||
|
#if 1
|
||||||
|
#define F1(x, y, z) ((x) ^ (y) ^ (z))
|
||||||
|
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
|
||||||
|
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
|
||||||
|
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
|
||||||
|
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
|
||||||
|
#else
|
||||||
|
#define F1(x, y, z) xor3b(x,y,z)
|
||||||
|
#define F2(x, y, z) xandx(x,y,z)
|
||||||
|
#define F3(x, y, z) xornot64(x,y,z)
|
||||||
|
#define F4(x, y, z) xandx(z,x,y)
|
||||||
|
#define F5(x, y, z) xornt64(x,y,z)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Round constants for RIPEMD-160.
|
||||||
|
*/
|
||||||
|
#define K11 0x00000000u
|
||||||
|
#define K12 0x5A827999u
|
||||||
|
#define K13 0x6ED9EBA1u
|
||||||
|
#define K14 0x8F1BBCDCu
|
||||||
|
#define K15 0xA953FD4Eu
|
||||||
|
|
||||||
|
#define K21 0x50A28BE6u
|
||||||
|
#define K22 0x5C4DD124u
|
||||||
|
#define K23 0x6D703EF3u
|
||||||
|
#define K24 0x7A6D76E9u
|
||||||
|
#define K25 0x00000000u
|
||||||
|
|
||||||
|
#define RR(a, b, c, d, e, f, s, r, k) { \
|
||||||
|
a = SPH_T32(ROTL32(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
|
||||||
|
c = ROTL32(c, 10); \
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
#define ROUND1(a, b, c, d, e, f, s, r, k) \
|
||||||
void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
|
||||||
{
|
|
||||||
const int threadsperblock = 256;
|
|
||||||
|
|
||||||
dim3 grid(threads/threadsperblock);
|
#define ROUND2(a, b, c, d, e, f, s, r, k) \
|
||||||
dim3 block(threadsperblock);
|
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
|
||||||
|
|
||||||
lbry_sha256_gpu_hash_32 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash);
|
#define RIPEMD160_ROUND_BODY(in, h) { \
|
||||||
}
|
uint32_t A1, B1, C1, D1, E1; \
|
||||||
|
uint32_t A2, B2, C2, D2, E2; \
|
||||||
__host__
|
uint32_t tmp; \
|
||||||
void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash, bool swabNonce, cudaStream_t stream)
|
\
|
||||||
{
|
A1 = A2 = h[0]; \
|
||||||
const int threadsperblock = 256;
|
B1 = B2 = h[1]; \
|
||||||
|
C1 = C2 = h[2]; \
|
||||||
dim3 grid(threads/threadsperblock);
|
D1 = D2 = h[3]; \
|
||||||
dim3 block(threadsperblock);
|
E1 = E2 = h[4]; \
|
||||||
|
\
|
||||||
lbry_sha256d_gpu_hash_112 <<<grid, block, 64*4, stream>>> (threads, startNonce, swabNonce, (uint64_t*) d_outputHash);
|
ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \
|
||||||
}
|
ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \
|
||||||
|
ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \
|
||||||
__host__
|
ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \
|
||||||
void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \
|
||||||
{
|
ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \
|
||||||
const int threadsperblock = 256;
|
ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \
|
||||||
|
ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \
|
||||||
dim3 grid(threads/threadsperblock);
|
ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \
|
||||||
dim3 block(threadsperblock);
|
ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \
|
||||||
|
ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \
|
||||||
lbry_sha256_gpu_hash_20x2 <<<grid, block, 0, stream>>> (threads, (uint64_t*) d_Hash);
|
ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \
|
||||||
}
|
ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \
|
||||||
|
ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \
|
||||||
__host__
|
ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \
|
||||||
void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *d_Hash, cudaStream_t stream)
|
ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \
|
||||||
{
|
\
|
||||||
const int threadsperblock = 256;
|
ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \
|
||||||
|
ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \
|
||||||
dim3 grid(threads/threadsperblock);
|
ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \
|
||||||
dim3 block(threadsperblock);
|
ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \
|
||||||
|
ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \
|
||||||
lbry_sha256d_gpu_hash_20x2 <<<grid, block, 64*4, stream>>> (threads, (uint64_t*) d_Hash);
|
ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \
|
||||||
|
ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \
|
||||||
|
ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \
|
||||||
|
ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \
|
||||||
|
ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \
|
||||||
|
ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \
|
||||||
|
ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \
|
||||||
|
ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \
|
||||||
|
ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \
|
||||||
|
ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \
|
||||||
|
ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \
|
||||||
|
\
|
||||||
|
ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \
|
||||||
|
ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \
|
||||||
|
ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \
|
||||||
|
ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \
|
||||||
|
ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \
|
||||||
|
ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \
|
||||||
|
ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \
|
||||||
|
ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
||||||
|
ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \
|
||||||
|
ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
||||||
|
ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \
|
||||||
|
ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
||||||
|
ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \
|
||||||
|
ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \
|
||||||
|
ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \
|
||||||
|
ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \
|
||||||
|
\
|
||||||
|
ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \
|
||||||
|
ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \
|
||||||
|
ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \
|
||||||
|
ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \
|
||||||
|
ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \
|
||||||
|
ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \
|
||||||
|
ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \
|
||||||
|
ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \
|
||||||
|
ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \
|
||||||
|
ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \
|
||||||
|
ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \
|
||||||
|
ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \
|
||||||
|
ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \
|
||||||
|
ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \
|
||||||
|
ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \
|
||||||
|
ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \
|
||||||
|
\
|
||||||
|
ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \
|
||||||
|
ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \
|
||||||
|
ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \
|
||||||
|
ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \
|
||||||
|
ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \
|
||||||
|
ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \
|
||||||
|
ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \
|
||||||
|
ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \
|
||||||
|
ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \
|
||||||
|
ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \
|
||||||
|
ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \
|
||||||
|
ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \
|
||||||
|
ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \
|
||||||
|
ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \
|
||||||
|
ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \
|
||||||
|
ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \
|
||||||
|
\
|
||||||
|
ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \
|
||||||
|
ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \
|
||||||
|
ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \
|
||||||
|
ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \
|
||||||
|
ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \
|
||||||
|
ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \
|
||||||
|
ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \
|
||||||
|
ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \
|
||||||
|
ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \
|
||||||
|
ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \
|
||||||
|
ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \
|
||||||
|
ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \
|
||||||
|
ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \
|
||||||
|
ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \
|
||||||
|
ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \
|
||||||
|
ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \
|
||||||
|
\
|
||||||
|
ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \
|
||||||
|
ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \
|
||||||
|
ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \
|
||||||
|
ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \
|
||||||
|
ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \
|
||||||
|
ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \
|
||||||
|
ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \
|
||||||
|
ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \
|
||||||
|
ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \
|
||||||
|
ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \
|
||||||
|
ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \
|
||||||
|
ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \
|
||||||
|
ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \
|
||||||
|
ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \
|
||||||
|
ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \
|
||||||
|
ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \
|
||||||
|
\
|
||||||
|
ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \
|
||||||
|
ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \
|
||||||
|
ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \
|
||||||
|
ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \
|
||||||
|
ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \
|
||||||
|
ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \
|
||||||
|
ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \
|
||||||
|
ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \
|
||||||
|
ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \
|
||||||
|
ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \
|
||||||
|
ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \
|
||||||
|
ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \
|
||||||
|
ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \
|
||||||
|
ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \
|
||||||
|
ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \
|
||||||
|
ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \
|
||||||
|
\
|
||||||
|
ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \
|
||||||
|
ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \
|
||||||
|
ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \
|
||||||
|
ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \
|
||||||
|
ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \
|
||||||
|
ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \
|
||||||
|
ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \
|
||||||
|
ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \
|
||||||
|
ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \
|
||||||
|
ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \
|
||||||
|
ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \
|
||||||
|
ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \
|
||||||
|
ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \
|
||||||
|
ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \
|
||||||
|
ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \
|
||||||
|
ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \
|
||||||
|
\
|
||||||
|
ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \
|
||||||
|
ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \
|
||||||
|
ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \
|
||||||
|
ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \
|
||||||
|
ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \
|
||||||
|
ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \
|
||||||
|
ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \
|
||||||
|
ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \
|
||||||
|
ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \
|
||||||
|
ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \
|
||||||
|
ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \
|
||||||
|
ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \
|
||||||
|
ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \
|
||||||
|
ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \
|
||||||
|
ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \
|
||||||
|
ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \
|
||||||
|
\
|
||||||
|
tmp = (h[1] + C1 + D2); \
|
||||||
|
h[1] = (h[2] + D1 + E2); \
|
||||||
|
h[2] = (h[3] + E1 + A2); \
|
||||||
|
h[3] = (h[4] + A1 + B2); \
|
||||||
|
h[4] = (h[0] + B1 + C2); \
|
||||||
|
h[0] = tmp; \
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
@ -641,22 +759,63 @@ void lbry_sha256d_gpu_hash_final(const uint32_t threads, const uint32_t startNon
|
|||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
uint32_t __align__(8) buf[8]; // align for vectorize
|
|
||||||
#pragma unroll
|
|
||||||
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
|
||||||
|
|
||||||
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
uint32_t* input = (uint32_t*) (&Hash512[thread * 8U]);
|
||||||
|
|
||||||
uint32_t __align__(8) dat[16];
|
uint32_t __align__(8) buf[8]; // align for vectorize
|
||||||
|
uint32_t dat[16];
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0;i<5;i++) dat[i] = cuda_swab32(input[i]);
|
for (int i=0; i<8; i++)
|
||||||
|
dat[i] = (input[i]);
|
||||||
|
dat[8] = 0x80;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(input[i+8]);
|
for (int i=9;i<16;i++) dat[i] = 0;
|
||||||
|
|
||||||
|
dat[14] = 0x100; // size in bits
|
||||||
|
|
||||||
|
uint32_t h[5];
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0; i<5; i++)
|
||||||
|
h[i] = c_IV[i];
|
||||||
|
|
||||||
|
RIPEMD160_ROUND_BODY(dat, h);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0; i<5; i++)
|
||||||
|
buf[i] = h[i];
|
||||||
|
|
||||||
|
// second 32 bytes block hash
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0; i<8; i++)
|
||||||
|
dat[i] = (input[8+i]);
|
||||||
|
dat[8] = 0x80;
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=9;i<16;i++) dat[i] = 0;
|
||||||
|
|
||||||
|
dat[14] = 0x100; // size in bits
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0; i<5; i++)
|
||||||
|
h[i] = c_IV[i];
|
||||||
|
|
||||||
|
RIPEMD160_ROUND_BODY(dat, h);
|
||||||
|
|
||||||
|
// first final sha256
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0;i<5;i++) dat[i] = cuda_swab32(buf[i]);
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0;i<5;i++) dat[i+5] = cuda_swab32(h[i]);
|
||||||
dat[10] = 0x80000000;
|
dat[10] = 0x80000000;
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i=11;i<15;i++) dat[i] = 0;
|
for (int i=11;i<15;i++) dat[i] = 0;
|
||||||
dat[15] = 0x140;
|
dat[15] = 0x140;
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i=0;i<8;i++) buf[i] = c_H256[i];
|
||||||
|
|
||||||
sha256_round_body(dat, buf, c_K);
|
sha256_round_body(dat, buf, c_K);
|
||||||
|
|
||||||
// second sha256
|
// second sha256
|
||||||
|
16
lbry/lbry.cu
16
lbry/lbry.cu
@ -62,7 +62,6 @@ extern "C" void lbry_hash(void* output, const void* input)
|
|||||||
|
|
||||||
/* ############################################################################################################################### */
|
/* ############################################################################################################################### */
|
||||||
|
|
||||||
extern void lbry_ripemd160_init(int thr_id);
|
|
||||||
extern void lbry_sha256_init(int thr_id);
|
extern void lbry_sha256_init(int thr_id);
|
||||||
extern void lbry_sha256_free(int thr_id);
|
extern void lbry_sha256_free(int thr_id);
|
||||||
extern void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget);
|
extern void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget);
|
||||||
@ -71,9 +70,6 @@ extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNo
|
|||||||
extern void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
extern void lbry_sha256_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
||||||
extern void lbry_sha512_init(int thr_id);
|
extern void lbry_sha512_init(int thr_id);
|
||||||
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash, cudaStream_t stream);
|
||||||
extern void lbry_ripemd160_hash_32x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
|
||||||
extern void lbry_sha256_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
|
||||||
extern void lbry_sha256d_hash_20x2(int thr_id, uint32_t threads, uint32_t *g_Hash, cudaStream_t stream);
|
|
||||||
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream);
|
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_inputHash, uint32_t *resNonces, cudaStream_t stream);
|
||||||
|
|
||||||
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
|
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
|
||||||
@ -122,7 +118,6 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
lbry_sha256_init(thr_id);
|
lbry_sha256_init(thr_id);
|
||||||
lbry_sha512_init(thr_id);
|
lbry_sha512_init(thr_id);
|
||||||
lbry_ripemd160_init(thr_id);
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
@ -149,17 +144,10 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id], 0);
|
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id], 0);
|
||||||
|
|
||||||
lbry_ripemd160_hash_32x2(thr_id, throughput, d_hash[thr_id], 0);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
lbry_sha256d_hash_20x2(thr_id, throughput, d_hash[thr_id], 0);
|
|
||||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);
|
|
||||||
#else
|
|
||||||
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
|
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
|
||||||
lbry_sha256d_hash_final(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], resNonces, 0);
|
lbry_sha256d_hash_final(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], resNonces, 0);
|
||||||
uint32_t foundNonce = resNonces[0];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
uint32_t foundNonce = resNonces[0];
|
||||||
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
|
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
|
||||||
|
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
@ -170,11 +158,9 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
uint32_t secNonce = resNonces[1];
|
uint32_t secNonce = resNonces[1];
|
||||||
//uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id], 1);
|
|
||||||
work->nonces[0] = swab32_if(foundNonce, swap);
|
work->nonces[0] = swab32_if(foundNonce, swap);
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
if (secNonce != UINT32_MAX) {
|
if (secNonce != UINT32_MAX) {
|
||||||
//if (secNonce) {
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", swab32(secNonce));
|
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", swab32(secNonce));
|
||||||
endiandata[LBC_NONCE_OFT32] = swab32_if(secNonce, !swap);
|
endiandata[LBC_NONCE_OFT32] = swab32_if(secNonce, !swap);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user