mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-24 13:34:19 +00:00
x16r algo and new kernels
Was a very long work but finally working, and unlike xevan these new kernels are reusable.. Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
This commit is contained in:
parent
b71f85ec31
commit
78dad7dd65
@ -76,6 +76,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
x13/hsr.cu x13/cuda_hsr_sm3.cu x13/sm3.c \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
|
||||
x15/whirlpool.cu x15/cuda_x15_whirlpool_sm3.cu \
|
||||
x16r/x16r.cu x16r/cuda_x16_echo512.cu x16r/cuda_x16_fugue512.cu \
|
||||
x16r/cuda_x16_shabal512.cu x16r/cuda_x16_simd512_80.cu \
|
||||
x17/x17.cu x17/hmq17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \
|
||||
x11/phi.cu x11/cuda_streebog_maxwell.cu \
|
||||
x11/c11.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
ccminer 2.2.4 (Jan. 2018) "lyra2v2 and keccak improvements"
|
||||
ccminer 2.2.5 (Feb 2018) "x16r algo"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
@ -122,6 +122,7 @@ its command line interface and options.
|
||||
x11 use to mine DarkCoin
|
||||
x14 use to mine X14Coin
|
||||
x15 use to mine Halcyon
|
||||
x16r use to mine Raven
|
||||
x17 use to mine X17
|
||||
vanilla use to mine Vanilla (Blake256)
|
||||
veltor use to mine VeltorCoin
|
||||
@ -277,6 +278,9 @@ so we can more efficiently implement new algorithms using the latest hardware
|
||||
features.
|
||||
|
||||
>>> RELEASE HISTORY <<<
|
||||
Feb. 2017 v2.2.5
|
||||
New x16r algo
|
||||
|
||||
Jan. 04th 2017 v2.2.4
|
||||
Improve lyra2v2
|
||||
Higher keccak default intensity
|
||||
|
2
algos.h
2
algos.h
@ -59,6 +59,7 @@ enum sha_algos {
|
||||
ALGO_X13,
|
||||
ALGO_X14,
|
||||
ALGO_X15,
|
||||
ALGO_X16R,
|
||||
ALGO_X17,
|
||||
ALGO_VANILLA,
|
||||
ALGO_VELTOR,
|
||||
@ -128,6 +129,7 @@ static const char *algo_names[] = {
|
||||
"x13",
|
||||
"x14",
|
||||
"x15",
|
||||
"x16r",
|
||||
"x17",
|
||||
"vanilla",
|
||||
"veltor",
|
||||
|
@ -102,6 +102,7 @@ void algo_free_all(int thr_id)
|
||||
free_x13(thr_id);
|
||||
free_x14(thr_id);
|
||||
free_x15(thr_id);
|
||||
free_x16r(thr_id);
|
||||
free_x17(thr_id);
|
||||
free_zr5(thr_id);
|
||||
free_scrypt(thr_id);
|
||||
|
@ -293,6 +293,7 @@ Options:\n\
|
||||
x13 X13 (MaruCoin)\n\
|
||||
x14 X14\n\
|
||||
x15 X15\n\
|
||||
x16r X16R (Raven)\n\
|
||||
x17 X17\n\
|
||||
wildkeccak Boolberry\n\
|
||||
zr5 ZR5 (ZiftrCoin)\n\
|
||||
@ -1705,6 +1706,7 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
case ALGO_LYRA2Z:
|
||||
case ALGO_TIMETRAVEL:
|
||||
case ALGO_BITCORE:
|
||||
case ALGO_X16R:
|
||||
work_set_target(work, sctx->job.diff / (256.0 * opt_difficulty));
|
||||
break;
|
||||
case ALGO_KECCAK:
|
||||
@ -2499,6 +2501,9 @@ static void *miner_thread(void *userdata)
|
||||
case ALGO_X15:
|
||||
rc = scanhash_x15(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
case ALGO_X16R:
|
||||
rc = scanhash_x16r(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
case ALGO_X17:
|
||||
rc = scanhash_x17(thr_id, &work, max_nonce, &hashes_done);
|
||||
break;
|
||||
|
@ -269,6 +269,7 @@
|
||||
<ClCompile Include="neoscrypt\neoscrypt-cpu.c" />
|
||||
<ClInclude Include="neoscrypt\cuda_vectors.h" />
|
||||
<ClInclude Include="x11\cuda_x11_simd512_sm2.cuh" />
|
||||
<ClInclude Include="x16r\cuda_x16r.h" />
|
||||
<CudaCompile Include="Algo256\bmw.cu" />
|
||||
<CudaCompile Include="Algo256\cuda_bmw.cu">
|
||||
<MaxRegCount>76</MaxRegCount>
|
||||
@ -434,6 +435,12 @@
|
||||
<CudaCompile Include="sha256\sha256d.cu" />
|
||||
<CudaCompile Include="sha256\cuda_sha256t.cu" />
|
||||
<CudaCompile Include="sha256\sha256t.cu" />
|
||||
<CudaCompile Include="x15\cuda_x15_whirlpool_sm3.cu" />
|
||||
<CudaCompile Include="x16r\x16r.cu" />
|
||||
<CudaCompile Include="x16r\cuda_x16_echo512.cu" />
|
||||
<CudaCompile Include="x16r\cuda_x16_fugue512.cu" />
|
||||
<CudaCompile Include="x16r\cuda_x16_shabal512.cu" />
|
||||
<CudaCompile Include="x16r\cuda_x16_simd512_80.cu" />
|
||||
<CudaCompile Include="zr5.cu" />
|
||||
<CudaCompile Include="heavy\cuda_blake512.cu">
|
||||
</CudaCompile>
|
||||
@ -587,8 +594,7 @@
|
||||
<CudaCompile Include="x17\hmq17.cu" />
|
||||
<CudaCompile Include="x15\x15.cu" />
|
||||
<CudaCompile Include="x15\whirlpool.cu" />
|
||||
<CudaCompile Include="x17\x17.cu">
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x17\x17.cu" />
|
||||
<CudaCompile Include="x17\cuda_x17_haval256.cu">
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x17\cuda_x17_sha512.cu">
|
||||
@ -615,4 +621,4 @@
|
||||
<Target Name="AfterClean">
|
||||
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
|
||||
</Target>
|
||||
</Project>
|
||||
</Project>
|
@ -58,6 +58,9 @@
|
||||
<Filter Include="Source Files\CUDA\x15">
|
||||
<UniqueIdentifier>{a2403c22-6777-46ab-a55a-3fcc7386c974}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\x16r">
|
||||
<UniqueIdentifier>{55dfae6a-66ba-43e2-8ceb-98ee70cbdf16}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\x17">
|
||||
<UniqueIdentifier>{85dfae6a-66ca-4332-8cec-98ee70cbdf2f}</UniqueIdentifier>
|
||||
</Filter>
|
||||
@ -596,6 +599,9 @@
|
||||
<ClInclude Include="equi\equihash.h">
|
||||
<Filter>Source Files\equi</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="x16r\cuda_x16r.h">
|
||||
<Filter>Header Files\CUDA</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="cuda.cpp">
|
||||
@ -967,6 +973,24 @@
|
||||
<CudaCompile Include="equi\cuda_equi.cu">
|
||||
<Filter>Source Files\equi</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\cuda_x15_whirlpool_sm3.cu">
|
||||
<Filter>Source Files\CUDA\x15</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x16r\cuda_x16_echo512.cu">
|
||||
<Filter>Source Files\CUDA\x16r</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x16r\cuda_x16_fugue512.cu">
|
||||
<Filter>Source Files\CUDA\x16r</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x16r\cuda_x16_shabal512.cu">
|
||||
<Filter>Source Files\CUDA\x16r</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x16r\cuda_x16_simd512_80.cu">
|
||||
<Filter>Source Files\CUDA\x16r</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x16r\x16r.cu">
|
||||
<Filter>Source Files\CUDA\x16r</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Image Include="res\ccminer.ico">
|
||||
@ -983,4 +1007,4 @@
|
||||
<Filter>Ressources</Filter>
|
||||
</Text>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
@ -164,7 +164,7 @@
|
||||
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "2.2.4"
|
||||
#define PACKAGE_VERSION "2.2.5"
|
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be
|
||||
|
3
miner.h
3
miner.h
@ -325,6 +325,7 @@ extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsig
|
||||
extern int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x16r(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
extern int scanhash_zr5(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
|
||||
@ -389,6 +390,7 @@ extern void free_x11(int thr_id);
|
||||
extern void free_x13(int thr_id);
|
||||
extern void free_x14(int thr_id);
|
||||
extern void free_x15(int thr_id);
|
||||
extern void free_x16r(int thr_id);
|
||||
extern void free_x17(int thr_id);
|
||||
extern void free_zr5(int thr_id);
|
||||
//extern void free_sha256d(int thr_id);
|
||||
@ -933,6 +935,7 @@ void x11hash(void *output, const void *input);
|
||||
void x13hash(void *output, const void *input);
|
||||
void x14hash(void *output, const void *input);
|
||||
void x15hash(void *output, const void *input);
|
||||
void x16r_hash(void *output, const void *input);
|
||||
void x17hash(void *output, const void *input);
|
||||
void wildkeccak_hash(void *output, const void *input, uint64_t* scratchpad, uint64_t ssize);
|
||||
void zr5hash(void *output, const void *input);
|
||||
|
@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico"
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 2,2,4,0
|
||||
PRODUCTVERSION 2,2,4,0
|
||||
FILEVERSION 2,2,5,0
|
||||
PRODUCTVERSION 2,2,5,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x21L
|
||||
@ -76,10 +76,10 @@ BEGIN
|
||||
BEGIN
|
||||
BLOCK "040904e4"
|
||||
BEGIN
|
||||
VALUE "FileVersion", "2.2.4"
|
||||
VALUE "FileVersion", "2.2.5"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2018"
|
||||
VALUE "ProductName", "ccminer"
|
||||
VALUE "ProductVersion", "2.2.4"
|
||||
VALUE "ProductVersion", "2.2.5"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
3
util.cpp
3
util.cpp
@ -2325,6 +2325,9 @@ void print_hash_tests(void)
|
||||
x15hash(&hash[0], &buf[0]);
|
||||
printpfx("X15", hash);
|
||||
|
||||
x16r_hash(&hash[0], &buf[0]);
|
||||
printpfx("X16r", hash);
|
||||
|
||||
x17hash(&hash[0], &buf[0]);
|
||||
printpfx("X17", hash);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Quick Hamsi-512 for X13
|
||||
* by tsiv - 2014
|
||||
* Quick Hamsi-512 for X13 by tsiv - 2014
|
||||
* + Hamsi-512 80 by tpruvot - 2018
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
@ -16,31 +16,17 @@ static __constant__ uint32_t d_alpha_f[32];
|
||||
static __constant__ uint32_t d_T512[64][16];
|
||||
|
||||
static const uint32_t alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
|
||||
0xff00f0f0, 0xccccaaaa, 0xf0f0cccc, 0xff00aaaa, 0xccccaaaa, 0xf0f0ff00, 0xaaaacccc, 0xf0f0ff00,
|
||||
0xf0f0cccc, 0xaaaaff00, 0xccccff00, 0xaaaaf0f0, 0xaaaaf0f0, 0xff00cccc, 0xccccf0f0, 0xff00aaaa,
|
||||
0xccccaaaa, 0xff00f0f0, 0xff00aaaa, 0xf0f0cccc, 0xf0f0ff00, 0xccccaaaa, 0xf0f0ff00, 0xaaaacccc,
|
||||
0xaaaaff00, 0xf0f0cccc, 0xaaaaf0f0, 0xccccff00, 0xff00cccc, 0xaaaaf0f0, 0xff00aaaa, 0xccccf0f0
|
||||
};
|
||||
|
||||
static const uint32_t alpha_f[] = {
|
||||
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
0xcaf9639c, 0x0ff0f9c0, 0x639c0ff0, 0xcaf9f9c0, 0x0ff0f9c0, 0x639ccaf9, 0xf9c00ff0, 0x639ccaf9,
|
||||
0x639c0ff0, 0xf9c0caf9, 0x0ff0caf9, 0xf9c0639c, 0xf9c0639c, 0xcaf90ff0, 0x0ff0639c, 0xcaf9f9c0,
|
||||
0x0ff0f9c0, 0xcaf9639c, 0xcaf9f9c0, 0x639c0ff0, 0x639ccaf9, 0x0ff0f9c0, 0x639ccaf9, 0xf9c00ff0,
|
||||
0xf9c0caf9, 0x639c0ff0, 0xf9c0639c, 0x0ff0caf9, 0xcaf90ff0, 0xf9c0639c, 0xcaf9f9c0, 0x0ff0639c
|
||||
};
|
||||
|
||||
#define hamsi_s00 m0
|
||||
@ -200,390 +186,134 @@ static const uint32_t alpha_f[] = {
|
||||
|
||||
|
||||
static const uint32_t T512[64][16] = {
|
||||
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
|
||||
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
|
||||
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
|
||||
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
|
||||
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
|
||||
SPH_C32(0x9e69af68) },
|
||||
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
|
||||
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
|
||||
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
|
||||
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
|
||||
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
|
||||
SPH_C32(0x0c26f262) },
|
||||
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
|
||||
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
|
||||
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
|
||||
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
|
||||
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
|
||||
SPH_C32(0xdc24e61f) },
|
||||
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
|
||||
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
|
||||
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
|
||||
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
|
||||
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
|
||||
SPH_C32(0x3daac2da) },
|
||||
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
|
||||
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
|
||||
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
|
||||
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
|
||||
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
|
||||
SPH_C32(0x78cace29) },
|
||||
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
|
||||
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
|
||||
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
|
||||
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
|
||||
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
|
||||
SPH_C32(0x2dd1f9ab) },
|
||||
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
|
||||
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
|
||||
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
|
||||
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
|
||||
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
|
||||
SPH_C32(0xbf2c0be2) },
|
||||
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
|
||||
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
|
||||
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
|
||||
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
|
||||
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
|
||||
SPH_C32(0x32219526) },
|
||||
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
|
||||
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
|
||||
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
|
||||
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
|
||||
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
|
||||
SPH_C32(0xac8e6c88) },
|
||||
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
|
||||
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
|
||||
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
|
||||
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
|
||||
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
|
||||
SPH_C32(0x7b1bd6b9) },
|
||||
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
|
||||
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
|
||||
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
|
||||
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
|
||||
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
|
||||
SPH_C32(0xf746c320) },
|
||||
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
|
||||
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
|
||||
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
|
||||
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
|
||||
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
|
||||
SPH_C32(0x69505b3a) },
|
||||
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
|
||||
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
|
||||
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
|
||||
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
|
||||
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
|
||||
SPH_C32(0x8a341574) },
|
||||
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
|
||||
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
|
||||
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
|
||||
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
|
||||
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
|
||||
SPH_C32(0x450360bf) },
|
||||
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
|
||||
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
|
||||
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
|
||||
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
|
||||
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
|
||||
SPH_C32(0xf3d45758) },
|
||||
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
|
||||
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
|
||||
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
|
||||
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
|
||||
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
|
||||
SPH_C32(0x925c44e9) },
|
||||
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
|
||||
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
|
||||
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
|
||||
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
|
||||
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
|
||||
SPH_C32(0xa123ff9f) },
|
||||
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
|
||||
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
|
||||
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
|
||||
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
|
||||
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
|
||||
SPH_C32(0x1568ff0f) },
|
||||
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
|
||||
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
|
||||
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
|
||||
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
|
||||
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
|
||||
SPH_C32(0xc5c1eb3e) },
|
||||
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
|
||||
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
|
||||
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
|
||||
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
|
||||
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
|
||||
SPH_C32(0x1af21fe1) },
|
||||
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
|
||||
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
|
||||
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
|
||||
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
|
||||
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
|
||||
SPH_C32(0x857f3c2b) },
|
||||
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
|
||||
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
|
||||
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
|
||||
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
|
||||
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
|
||||
SPH_C32(0x2ba05a55) },
|
||||
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
|
||||
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
|
||||
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
|
||||
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
|
||||
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
|
||||
SPH_C32(0xfeabf254) },
|
||||
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
|
||||
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
|
||||
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
|
||||
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
|
||||
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
|
||||
SPH_C32(0xfe1cdc7f) },
|
||||
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
|
||||
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
|
||||
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
|
||||
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
|
||||
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
|
||||
SPH_C32(0xb0a51834) },
|
||||
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
|
||||
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
|
||||
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
|
||||
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
|
||||
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
|
||||
SPH_C32(0xa6b8c28d) },
|
||||
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
|
||||
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
|
||||
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
|
||||
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
|
||||
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
|
||||
SPH_C32(0x3a4e99d7) },
|
||||
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
|
||||
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
|
||||
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
|
||||
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
|
||||
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
|
||||
SPH_C32(0xe1844257) },
|
||||
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
|
||||
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
|
||||
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
|
||||
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
|
||||
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
|
||||
SPH_C32(0x2c3b504e) },
|
||||
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
|
||||
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
|
||||
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
|
||||
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
|
||||
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
|
||||
SPH_C32(0x524a0d59) },
|
||||
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
|
||||
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
|
||||
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
|
||||
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
|
||||
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
|
||||
SPH_C32(0x378dd173) },
|
||||
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
|
||||
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
|
||||
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
|
||||
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
|
||||
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
|
||||
SPH_C32(0x8b6c72bd) },
|
||||
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
|
||||
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
|
||||
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
|
||||
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
|
||||
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
|
||||
SPH_C32(0x8e67b7fa) },
|
||||
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
|
||||
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
|
||||
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
|
||||
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
|
||||
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
|
||||
SPH_C32(0x443d3004) },
|
||||
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
|
||||
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
|
||||
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
|
||||
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
|
||||
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
|
||||
SPH_C32(0xf4f6ea7b) },
|
||||
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
|
||||
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
|
||||
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
|
||||
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
|
||||
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
|
||||
SPH_C32(0x979961d0) },
|
||||
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
|
||||
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
|
||||
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
|
||||
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
|
||||
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
|
||||
SPH_C32(0x98aa496e) },
|
||||
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
|
||||
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
|
||||
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
|
||||
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
|
||||
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
|
||||
SPH_C32(0x094e3198) },
|
||||
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
|
||||
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
|
||||
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
|
||||
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
|
||||
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
|
||||
SPH_C32(0xe86cba2e) },
|
||||
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
|
||||
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
|
||||
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
|
||||
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
|
||||
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
|
||||
SPH_C32(0x4b7eec55) },
|
||||
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
|
||||
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
|
||||
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
|
||||
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
|
||||
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
|
||||
SPH_C32(0x1e7536a6) },
|
||||
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
|
||||
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
|
||||
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
|
||||
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
|
||||
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
|
||||
SPH_C32(0x24314f17) },
|
||||
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
|
||||
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
|
||||
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
|
||||
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
|
||||
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
|
||||
SPH_C32(0x9075b1ce) },
|
||||
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
|
||||
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
|
||||
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
|
||||
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
|
||||
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
|
||||
SPH_C32(0x9b6ef888) },
|
||||
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
|
||||
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
|
||||
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
|
||||
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
|
||||
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
|
||||
SPH_C32(0xd8b61463) },
|
||||
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
|
||||
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
|
||||
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
|
||||
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
|
||||
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
|
||||
SPH_C32(0x3ea660f7) },
|
||||
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
|
||||
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
|
||||
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
|
||||
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
|
||||
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
|
||||
SPH_C32(0x7f975691) },
|
||||
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
|
||||
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
|
||||
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
|
||||
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
|
||||
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
|
||||
SPH_C32(0x2c94459e) },
|
||||
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
|
||||
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
|
||||
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
|
||||
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
|
||||
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
|
||||
SPH_C32(0x56a7b19f) },
|
||||
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
|
||||
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
|
||||
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
|
||||
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
|
||||
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
|
||||
SPH_C32(0x81fdf908) },
|
||||
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
|
||||
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
|
||||
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
|
||||
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
|
||||
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
|
||||
SPH_C32(0x5bd61539) },
|
||||
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
|
||||
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
|
||||
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
|
||||
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
|
||||
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
|
||||
SPH_C32(0x15b961e7) },
|
||||
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
|
||||
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
|
||||
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
|
||||
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
|
||||
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
|
||||
SPH_C32(0x2a2c18f0) },
|
||||
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
|
||||
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
|
||||
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
|
||||
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
|
||||
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
|
||||
SPH_C32(0x551e3d6e) },
|
||||
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
|
||||
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
|
||||
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
|
||||
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
|
||||
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
|
||||
SPH_C32(0x33c5244f) },
|
||||
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
|
||||
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
|
||||
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
|
||||
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
|
||||
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
|
||||
SPH_C32(0x8a58e6a4) },
|
||||
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
|
||||
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
|
||||
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
|
||||
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
|
||||
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
|
||||
SPH_C32(0xda878000) },
|
||||
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
|
||||
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
|
||||
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
|
||||
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
|
||||
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
|
||||
SPH_C32(0x3c5dfffe) },
|
||||
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
|
||||
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
|
||||
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
|
||||
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
|
||||
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
|
||||
SPH_C32(0x7b1675d7) },
|
||||
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
|
||||
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
|
||||
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
|
||||
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
|
||||
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
|
||||
SPH_C32(0x2879ebac) },
|
||||
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
|
||||
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
|
||||
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
|
||||
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
|
||||
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
|
||||
SPH_C32(0xbe0a679e) },
|
||||
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
|
||||
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
|
||||
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
|
||||
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
|
||||
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
|
||||
SPH_C32(0x30aebcf7) },
|
||||
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
|
||||
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
|
||||
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
|
||||
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
|
||||
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
|
||||
SPH_C32(0xc7ff60f0) },
|
||||
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
|
||||
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
|
||||
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
|
||||
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
|
||||
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
|
||||
SPH_C32(0xe7e00a94) }
|
||||
{ 0xef0b0270, 0x3afd0000, 0x5dae0000, 0x69490000, 0x9b0f3c06, 0x4405b5f9, 0x66140a51, 0x924f5d0a, // 0
|
||||
0xc96b0030, 0xe7250000, 0x2f840000, 0x264f0000, 0x08695bf9, 0x6dfcf137, 0x509f6984, 0x9e69af68 },
|
||||
{ 0xc96b0030, 0xe7250000, 0x2f840000, 0x264f0000, 0x08695bf9, 0x6dfcf137, 0x509f6984, 0x9e69af68,
|
||||
0x26600240, 0xddd80000, 0x722a0000, 0x4f060000, 0x936667ff, 0x29f944ce, 0x368b63d5, 0x0c26f262 },
|
||||
{ 0x145a3c00, 0xb9e90000, 0x61270000, 0xf1610000, 0xce613d6c, 0xb0493d78, 0x47a96720, 0xe18e24c5,
|
||||
0x23671400, 0xc8b90000, 0xf4c70000, 0xfb750000, 0x73cd2465, 0xf8a6a549, 0x02c40a3f, 0xdc24e61f },
|
||||
{ 0x23671400, 0xc8b90000, 0xf4c70000, 0xfb750000, 0x73cd2465, 0xf8a6a549, 0x02c40a3f, 0xdc24e61f,
|
||||
0x373d2800, 0x71500000, 0x95e00000, 0x0a140000, 0xbdac1909, 0x48ef9831, 0x456d6d1f, 0x3daac2da },
|
||||
{ 0x54285c00, 0xeaed0000, 0xc5d60000, 0xa1c50000, 0xb3a26770, 0x94a5c4e1, 0x6bb0419d, 0x551b3782,
|
||||
0x9cbb1800, 0xb0d30000, 0x92510000, 0xed930000, 0x593a4345, 0xe114d5f4, 0x430633da, 0x78cace29 },
|
||||
{ 0x9cbb1800, 0xb0d30000, 0x92510000, 0xed930000, 0x593a4345, 0xe114d5f4, 0x430633da, 0x78cace29,
|
||||
0xc8934400, 0x5a3e0000, 0x57870000, 0x4c560000, 0xea982435, 0x75b11115, 0x28b67247, 0x2dd1f9ab },
|
||||
{ 0x29449c00, 0x64e70000, 0xf24b0000, 0xc2f30000, 0x0ede4e8f, 0x56c23745, 0xf3e04259, 0x8d0d9ec4,
|
||||
0x466d0c00, 0x08620000, 0xdd5d0000, 0xbadd0000, 0x6a927942, 0x441f2b93, 0x218ace6f, 0xbf2c0be2 },
|
||||
{ 0x466d0c00, 0x08620000, 0xdd5d0000, 0xbadd0000, 0x6a927942, 0x441f2b93, 0x218ace6f, 0xbf2c0be2, // 7
|
||||
0x6f299000, 0x6c850000, 0x2f160000, 0x782e0000, 0x644c37cd, 0x12dd1cd6, 0xd26a8c36, 0x32219526 },
|
||||
{ 0xf6800005, 0x3443c000, 0x24070000, 0x8f3d0000, 0x21373bfb, 0x0ab8d5ae, 0xcdc58b19, 0xd795ba31,
|
||||
0xa67f0001, 0x71378000, 0x19fc0000, 0x96db0000, 0x3a8b6dfd, 0xebcaaef3, 0x2c6d478f, 0xac8e6c88 },
|
||||
{ 0xa67f0001, 0x71378000, 0x19fc0000, 0x96db0000, 0x3a8b6dfd, 0xebcaaef3, 0x2c6d478f, 0xac8e6c88,
|
||||
0x50ff0004, 0x45744000, 0x3dfb0000, 0x19e60000, 0x1bbc5606, 0xe1727b5d, 0xe1a8cc96, 0x7b1bd6b9 },
|
||||
{ 0xf7750009, 0xcf3cc000, 0xc3d60000, 0x04920000, 0x029519a9, 0xf8e836ba, 0x7a87f14e, 0x9e16981a,
|
||||
0xd46a0000, 0x8dc8c000, 0xa5af0000, 0x4a290000, 0xfc4e427a, 0xc9b4866c, 0x98369604, 0xf746c320 },
|
||||
{ 0xd46a0000, 0x8dc8c000, 0xa5af0000, 0x4a290000, 0xfc4e427a, 0xc9b4866c, 0x98369604, 0xf746c320,
|
||||
0x231f0009, 0x42f40000, 0x66790000, 0x4ebb0000, 0xfedb5bd3, 0x315cb0d6, 0xe2b1674a, 0x69505b3a },
|
||||
{ 0x774400f0, 0xf15a0000, 0xf5b20000, 0x34140000, 0x89377e8c, 0x5a8bec25, 0x0bc3cd1e, 0xcf3775cb,
|
||||
0xf46c0050, 0x96180000, 0x14a50000, 0x031f0000, 0x42947eb8, 0x66bf7e19, 0x9ca470d2, 0x8a341574 },
|
||||
{ 0xf46c0050, 0x96180000, 0x14a50000, 0x031f0000, 0x42947eb8, 0x66bf7e19, 0x9ca470d2, 0x8a341574,
|
||||
0x832800a0, 0x67420000, 0xe1170000, 0x370b0000, 0xcba30034, 0x3c34923c, 0x9767bdcc, 0x450360bf },
|
||||
{ 0xe8870170, 0x9d720000, 0x12db0000, 0xd4220000, 0xf2886b27, 0xa921e543, 0x4ef8b518, 0x618813b1, // 14
|
||||
0xb4370060, 0x0c4c0000, 0x56c20000, 0x5cae0000, 0x94541f3f, 0x3b3ef825, 0x1b365f3d, 0xf3d45758 },
|
||||
{ 0xb4370060, 0x0c4c0000, 0x56c20000, 0x5cae0000, 0x94541f3f, 0x3b3ef825, 0x1b365f3d, 0xf3d45758,
|
||||
0x5cb00110, 0x913e0000, 0x44190000, 0x888c0000, 0x66dc7418, 0x921f1d66, 0x55ceea25, 0x925c44e9 },
|
||||
{ 0x0c720000, 0x49e50f00, 0x42790000, 0x5cea0000, 0x33aa301a, 0x15822514, 0x95a34b7b, 0xb44b0090,
|
||||
0xfe220000, 0xa7580500, 0x25d10000, 0xf7600000, 0x893178da, 0x1fd4f860, 0x4ed0a315, 0xa123ff9f },
|
||||
{ 0xfe220000, 0xa7580500, 0x25d10000, 0xf7600000, 0x893178da, 0x1fd4f860, 0x4ed0a315, 0xa123ff9f,
|
||||
0xf2500000, 0xeebd0a00, 0x67a80000, 0xab8a0000, 0xba9b48c0, 0x0a56dd74, 0xdb73e86e, 0x1568ff0f },
|
||||
{ 0x45180000, 0xa5b51700, 0xf96a0000, 0x3b480000, 0x1ecc142c, 0x231395d6, 0x16bca6b0, 0xdf33f4df,
|
||||
0xb83d0000, 0x16710600, 0x379a0000, 0xf5b10000, 0x228161ac, 0xae48f145, 0x66241616, 0xc5c1eb3e },
|
||||
{ 0xb83d0000, 0x16710600, 0x379a0000, 0xf5b10000, 0x228161ac, 0xae48f145, 0x66241616, 0xc5c1eb3e,
|
||||
0xfd250000, 0xb3c41100, 0xcef00000, 0xcef90000, 0x3c4d7580, 0x8d5b6493, 0x7098b0a6, 0x1af21fe1 },
|
||||
{ 0x75a40000, 0xc28b2700, 0x94a40000, 0x90f50000, 0xfb7857e0, 0x49ce0bae, 0x1767c483, 0xaedf667e,
|
||||
0xd1660000, 0x1bbc0300, 0x9eec0000, 0xf6940000, 0x03024527, 0xcf70fcf2, 0xb4431b17, 0x857f3c2b },
|
||||
{ 0xd1660000, 0x1bbc0300, 0x9eec0000, 0xf6940000, 0x03024527, 0xcf70fcf2, 0xb4431b17, 0x857f3c2b, // 21
|
||||
0xa4c20000, 0xd9372400, 0x0a480000, 0x66610000, 0xf87a12c7, 0x86bef75c, 0xa324df94, 0x2ba05a55 },
|
||||
{ 0x75c90003, 0x0e10c000, 0xd1200000, 0xbaea0000, 0x8bc42f3e, 0x8758b757, 0xbb28761d, 0x00b72e2b,
|
||||
0xeecf0001, 0x6f564000, 0xf33e0000, 0xa79e0000, 0xbdb57219, 0xb711ebc5, 0x4a3b40ba, 0xfeabf254 },
|
||||
{ 0xeecf0001, 0x6f564000, 0xf33e0000, 0xa79e0000, 0xbdb57219, 0xb711ebc5, 0x4a3b40ba, 0xfeabf254,
|
||||
0x9b060002, 0x61468000, 0x221e0000, 0x1d740000, 0x36715d27, 0x30495c92, 0xf11336a7, 0xfe1cdc7f },
|
||||
{ 0x86790000, 0x3f390002, 0xe19ae000, 0x98560000, 0x9565670e, 0x4e88c8ea, 0xd3dd4944, 0x161ddab9,
|
||||
0x30b70000, 0xe5d00000, 0xf4f46000, 0x42c40000, 0x63b83d6a, 0x78ba9460, 0x21afa1ea, 0xb0a51834 },
|
||||
{ 0x30b70000, 0xe5d00000, 0xf4f46000, 0x42c40000, 0x63b83d6a, 0x78ba9460, 0x21afa1ea, 0xb0a51834,
|
||||
0xb6ce0000, 0xdae90002, 0x156e8000, 0xda920000, 0xf6dd5a64, 0x36325c8a, 0xf272e8ae, 0xa6b8c28d },
|
||||
{ 0x14190000, 0x23ca003c, 0x50df0000, 0x44b60000, 0x1b6c67b0, 0x3cf3ac75, 0x61e610b0, 0xdbcadb80,
|
||||
0xe3430000, 0x3a4e0014, 0xf2c60000, 0xaa4e0000, 0xdb1e42a6, 0x256bbe15, 0x123db156, 0x3a4e99d7 },
|
||||
{ 0xe3430000, 0x3a4e0014, 0xf2c60000, 0xaa4e0000, 0xdb1e42a6, 0x256bbe15, 0x123db156, 0x3a4e99d7,
|
||||
0xf75a0000, 0x19840028, 0xa2190000, 0xeef80000, 0xc0722516, 0x19981260, 0x73dba1e6, 0xe1844257 },
|
||||
{ 0x54500000, 0x0671005c, 0x25ae0000, 0x6a1e0000, 0x2ea54edf, 0x664e8512, 0xbfba18c3, 0x7e715d17, // 28
|
||||
0xbc8d0000, 0xfc3b0018, 0x19830000, 0xd10b0000, 0xae1878c4, 0x42a69856, 0x0012da37, 0x2c3b504e },
|
||||
{ 0xbc8d0000, 0xfc3b0018, 0x19830000, 0xd10b0000, 0xae1878c4, 0x42a69856, 0x0012da37, 0x2c3b504e,
|
||||
0xe8dd0000, 0xfa4a0044, 0x3c2d0000, 0xbb150000, 0x80bd361b, 0x24e81d44, 0xbfa8c2f4, 0x524a0d59 },
|
||||
{ 0x69510000, 0xd4e1009c, 0xc3230000, 0xac2f0000, 0xe4950bae, 0xcea415dc, 0x87ec287c, 0xbce1a3ce,
|
||||
0xc6730000, 0xaf8d000c, 0xa4c10000, 0x218d0000, 0x23111587, 0x7913512f, 0x1d28ac88, 0x378dd173 },
|
||||
{ 0xc6730000, 0xaf8d000c, 0xa4c10000, 0x218d0000, 0x23111587, 0x7913512f, 0x1d28ac88, 0x378dd173,
|
||||
0xaf220000, 0x7b6c0090, 0x67e20000, 0x8da20000, 0xc7841e29, 0xb7b744f3, 0x9ac484f4, 0x8b6c72bd },
|
||||
{ 0xcc140000, 0xa5630000, 0x5ab90780, 0x3b500000, 0x4bd013ff, 0x879b3418, 0x694348c1, 0xca5a87fe,
|
||||
0x819e0000, 0xec570000, 0x66320280, 0x95f30000, 0x5da92802, 0x48f43cbc, 0xe65aa22d, 0x8e67b7fa },
|
||||
{ 0x819e0000, 0xec570000, 0x66320280, 0x95f30000, 0x5da92802, 0x48f43cbc, 0xe65aa22d, 0x8e67b7fa,
|
||||
0x4d8a0000, 0x49340000, 0x3c8b0500, 0xaea30000, 0x16793bfd, 0xcf6f08a4, 0x8f19eaec, 0x443d3004 },
|
||||
{ 0x78230000, 0x12fc0000, 0xa93a0b80, 0x90a50000, 0x713e2879, 0x7ee98924, 0xf08ca062, 0x636f8bab,
|
||||
0x02af0000, 0xb7280000, 0xba1c0300, 0x56980000, 0xba8d45d3, 0x8048c667, 0xa95c149a, 0xf4f6ea7b },
|
||||
{ 0x02af0000, 0xb7280000, 0xba1c0300, 0x56980000, 0xba8d45d3, 0x8048c667, 0xa95c149a, 0xf4f6ea7b, // 35
|
||||
0x7a8c0000, 0xa5d40000, 0x13260880, 0xc63d0000, 0xcbb36daa, 0xfea14f43, 0x59d0b4f8, 0x979961d0 },
|
||||
{ 0xac480000, 0x1ba60000, 0x45fb1380, 0x03430000, 0x5a85316a, 0x1fb250b6, 0xfe72c7fe, 0x91e478f6,
|
||||
0x1e4e0000, 0xdecf0000, 0x6df80180, 0x77240000, 0xec47079e, 0xf4a0694e, 0xcda31812, 0x98aa496e },
|
||||
{ 0x1e4e0000, 0xdecf0000, 0x6df80180, 0x77240000, 0xec47079e, 0xf4a0694e, 0xcda31812, 0x98aa496e,
|
||||
0xb2060000, 0xc5690000, 0x28031200, 0x74670000, 0xb6c236f4, 0xeb1239f8, 0x33d1dfec, 0x094e3198 },
|
||||
{ 0xaec30000, 0x9c4f0001, 0x79d1e000, 0x2c150000, 0x45cc75b3, 0x6650b736, 0xab92f78f, 0xa312567b,
|
||||
0xdb250000, 0x09290000, 0x49aac000, 0x81e10000, 0xcafe6b59, 0x42793431, 0x43566b76, 0xe86cba2e },
|
||||
{ 0xdb250000, 0x09290000, 0x49aac000, 0x81e10000, 0xcafe6b59, 0x42793431, 0x43566b76, 0xe86cba2e,
|
||||
0x75e60000, 0x95660001, 0x307b2000, 0xadf40000, 0x8f321eea, 0x24298307, 0xe8c49cf9, 0x4b7eec55 },
|
||||
{ 0x58430000, 0x807e0000, 0x78330001, 0xc66b3800, 0xe7375cdc, 0x79ad3fdd, 0xac73fe6f, 0x3a4479b1,
|
||||
0x1d5a0000, 0x2b720000, 0x488d0000, 0xaf611800, 0x25cb2ec5, 0xc879bfd0, 0x81a20429, 0x1e7536a6 },
|
||||
{ 0x1d5a0000, 0x2b720000, 0x488d0000, 0xaf611800, 0x25cb2ec5, 0xc879bfd0, 0x81a20429, 0x1e7536a6,
|
||||
0x45190000, 0xab0c0000, 0x30be0001, 0x690a2000, 0xc2fc7219, 0xb1d4800d, 0x2dd1fa46, 0x24314f17 },
|
||||
{ 0xa53b0000, 0x14260000, 0x4e30001e, 0x7cae0000, 0x8f9e0dd5, 0x78dfaa3d, 0xf73168d8, 0x0b1b4946, // 42
|
||||
0x07ed0000, 0xb2500000, 0x8774000a, 0x970d0000, 0x437223ae, 0x48c76ea4, 0xf4786222, 0x9075b1ce },
|
||||
{ 0x07ed0000, 0xb2500000, 0x8774000a, 0x970d0000, 0x437223ae, 0x48c76ea4, 0xf4786222, 0x9075b1ce,
|
||||
0xa2d60000, 0xa6760000, 0xc9440014, 0xeba30000, 0xccec2e7b, 0x3018c499, 0x03490afa, 0x9b6ef888 },
|
||||
{ 0x88980000, 0x1f940000, 0x7fcf002e, 0xfb4e0000, 0xf158079a, 0x61ae9167, 0xa895706c, 0xe6107494,
|
||||
0x0bc20000, 0xdb630000, 0x7e88000c, 0x15860000, 0x91fd48f3, 0x7581bb43, 0xf460449e, 0xd8b61463 },
|
||||
{ 0x0bc20000, 0xdb630000, 0x7e88000c, 0x15860000, 0x91fd48f3, 0x7581bb43, 0xf460449e, 0xd8b61463,
|
||||
0x835a0000, 0xc4f70000, 0x01470022, 0xeec80000, 0x60a54f69, 0x142f2a24, 0x5cf534f2, 0x3ea660f7 },
|
||||
{ 0x52500000, 0x29540000, 0x6a61004e, 0xf0ff0000, 0x9a317eec, 0x452341ce, 0xcf568fe5, 0x5303130f,
|
||||
0x538d0000, 0xa9fc0000, 0x9ef70006, 0x56ff0000, 0x0ae4004e, 0x92c5cdf9, 0xa9444018, 0x7f975691 },
|
||||
{ 0x538d0000, 0xa9fc0000, 0x9ef70006, 0x56ff0000, 0x0ae4004e, 0x92c5cdf9, 0xa9444018, 0x7f975691,
|
||||
0x01dd0000, 0x80a80000, 0xf4960048, 0xa6000000, 0x90d57ea2, 0xd7e68c37, 0x6612cffd, 0x2c94459e },
|
||||
{ 0xe6280000, 0x4c4b0000, 0xa8550000, 0xd3d002e0, 0xd86130b8, 0x98a7b0da, 0x289506b4, 0xd75a4897,
|
||||
0xf0c50000, 0x59230000, 0x45820000, 0xe18d00c0, 0x3b6d0631, 0xc2ed5699, 0xcbe0fe1c, 0x56a7b19f },
|
||||
{ 0xf0c50000, 0x59230000, 0x45820000, 0xe18d00c0, 0x3b6d0631, 0xc2ed5699, 0xcbe0fe1c, 0x56a7b19f, // 49
|
||||
0x16ed0000, 0x15680000, 0xedd70000, 0x325d0220, 0xe30c3689, 0x5a4ae643, 0xe375f8a8, 0x81fdf908 },
|
||||
{ 0xb4310000, 0x77330000, 0xb15d0000, 0x7fd004e0, 0x78a26138, 0xd116c35d, 0xd256d489, 0x4e6f74de,
|
||||
0xe3060000, 0xbdc10000, 0x87130000, 0xbff20060, 0x2eba0a1a, 0x8db53751, 0x73c5ab06, 0x5bd61539 },
|
||||
{ 0xe3060000, 0xbdc10000, 0x87130000, 0xbff20060, 0x2eba0a1a, 0x8db53751, 0x73c5ab06, 0x5bd61539,
|
||||
0x57370000, 0xcaf20000, 0x364e0000, 0xc0220480, 0x56186b22, 0x5ca3f40c, 0xa1937f8f, 0x15b961e7 },
|
||||
{ 0x02f20000, 0xa2810000, 0x873f0000, 0xe36c7800, 0x1e1d74ef, 0x073d2bd6, 0xc4c23237, 0x7f32259e,
|
||||
0xbadd0000, 0x13ad0000, 0xb7e70000, 0xf7282800, 0xdf45144d, 0x361ac33a, 0xea5a8d14, 0x2a2c18f0 },
|
||||
{ 0xbadd0000, 0x13ad0000, 0xb7e70000, 0xf7282800, 0xdf45144d, 0x361ac33a, 0xea5a8d14, 0x2a2c18f0,
|
||||
0xb82f0000, 0xb12c0000, 0x30d80000, 0x14445000, 0xc15860a2, 0x3127e8ec, 0x2e98bf23, 0x551e3d6e },
|
||||
{ 0x1e6c0000, 0xc4420000, 0x8a2e0000, 0xbcb6b800, 0x2c4413b6, 0x8bfdd3da, 0x6a0c1bc8, 0xb99dc2eb,
|
||||
0x92560000, 0x1eda0000, 0xea510000, 0xe8b13000, 0xa93556a5, 0xebfb6199, 0xb15c2254, 0x33c5244f },
|
||||
{ 0x92560000, 0x1eda0000, 0xea510000, 0xe8b13000, 0xa93556a5, 0xebfb6199, 0xb15c2254, 0x33c5244f,
|
||||
0x8c3a0000, 0xda980000, 0x607f0000, 0x54078800, 0x85714513, 0x6006b243, 0xdb50399c, 0x8a58e6a4 },
|
||||
{ 0x033d0000, 0x08b30000, 0xf33a0000, 0x3ac20007, 0x51298a50, 0x6b6e661f, 0x0ea5cfe3, 0xe6da7ffe, // 56
|
||||
0xa8da0000, 0x96be0000, 0x5c1d0000, 0x07da0002, 0x7d669583, 0x1f98708a, 0xbb668808, 0xda878000 },
|
||||
{ 0xa8da0000, 0x96be0000, 0x5c1d0000, 0x07da0002, 0x7d669583, 0x1f98708a, 0xbb668808, 0xda878000,
|
||||
0xabe70000, 0x9e0d0000, 0xaf270000, 0x3d180005, 0x2c4f1fd3, 0x74f61695, 0xb5c347eb, 0x3c5dfffe },
|
||||
{ 0x01930000, 0xe7820000, 0xedfb0000, 0xcf0c000b, 0x8dd08d58, 0xbca3b42e, 0x063661e1, 0x536f9e7b,
|
||||
0x92280000, 0xdc850000, 0x57fa0000, 0x56dc0003, 0xbae92316, 0x5aefa30c, 0x90cef752, 0x7b1675d7 },
|
||||
{ 0x92280000, 0xdc850000, 0x57fa0000, 0x56dc0003, 0xbae92316, 0x5aefa30c, 0x90cef752, 0x7b1675d7,
|
||||
0x93bb0000, 0x3b070000, 0xba010000, 0x99d00008, 0x3739ae4e, 0xe64c1722, 0x96f896b3, 0x2879ebac },
|
||||
{ 0x5fa80000, 0x56030000, 0x43ae0000, 0x64f30013, 0x257e86bf, 0x1311944e, 0x541e95bf, 0x8ea4db69,
|
||||
0x00440000, 0x7f480000, 0xda7c0000, 0x2a230001, 0x3badc9cc, 0xa9b69c87, 0x030a9e60, 0xbe0a679e },
|
||||
{ 0x00440000, 0x7f480000, 0xda7c0000, 0x2a230001, 0x3badc9cc, 0xa9b69c87, 0x030a9e60, 0xbe0a679e,
|
||||
0x5fec0000, 0x294b0000, 0x99d20000, 0x4ed00012, 0x1ed34f73, 0xbaa708c9, 0x57140bdf, 0x30aebcf7 },
|
||||
{ 0xee930000, 0xd6070000, 0x92c10000, 0x2b9801e0, 0x9451287c, 0x3b6cfb57, 0x45312374, 0x201f6a64,
|
||||
0x7b280000, 0x57420000, 0xa9e50000, 0x634300a0, 0x9edb442f, 0x6d9995bb, 0x27f83b03, 0xc7ff60f0 },
|
||||
{ 0x7b280000, 0x57420000, 0xa9e50000, 0x634300a0, 0x9edb442f, 0x6d9995bb, 0x27f83b03, 0xc7ff60f0,
|
||||
0x95bb0000, 0x81450000, 0x3b240000, 0x48db0140, 0x0a8a6c53, 0x56f56eec, 0x62c91877, 0xe7e00a94 }
|
||||
};
|
||||
|
||||
__global__
|
||||
@ -598,12 +328,12 @@ void x13_hamsi512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *
|
||||
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
|
||||
unsigned char *h1 = (unsigned char *)Hash;
|
||||
|
||||
uint32_t c0 = SPH_C32(0x73746565), c1 = SPH_C32(0x6c706172), c2 = SPH_C32(0x6b204172), c3 = SPH_C32(0x656e6265);
|
||||
uint32_t c4 = SPH_C32(0x72672031), c5 = SPH_C32(0x302c2062), c6 = SPH_C32(0x75732032), c7 = SPH_C32(0x3434362c);
|
||||
uint32_t c8 = SPH_C32(0x20422d33), c9 = SPH_C32(0x30303120), cA = SPH_C32(0x4c657576), cB = SPH_C32(0x656e2d48);
|
||||
uint32_t cC = SPH_C32(0x65766572), cD = SPH_C32(0x6c65652c), cE = SPH_C32(0x2042656c), cF = SPH_C32(0x6769756d);
|
||||
uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
uint32_t c0 = 0x73746565, c1 = 0x6c706172, c2 = 0x6b204172, c3 = 0x656e6265;
|
||||
uint32_t c4 = 0x72672031, c5 = 0x302c2062, c6 = 0x75732032, c7 = 0x3434362c;
|
||||
uint32_t c8 = 0x20422d33, c9 = 0x30303120, cA = 0x4c657576, cB = 0x656e2d48;
|
||||
uint32_t cC = 0x65766572, cD = 0x6c65652c, cE = 0x2042656c, cF = 0x6769756d;
|
||||
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
|
||||
uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
uint32_t *tp, db, dm;
|
||||
|
||||
for(int i = 0; i < 64; i += 8) {
|
||||
@ -637,16 +367,16 @@ void x13_hamsi512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *
|
||||
T_BIG;
|
||||
}
|
||||
|
||||
// precomputed for 64 bytes blocks ?
|
||||
tp = &d_T512[0][0] + 112;
|
||||
|
||||
m0 = *(tp+ 0); m1 = *(tp+ 1);
|
||||
m2 = *(tp+ 2); m3 = *(tp+ 3);
|
||||
m4 = *(tp+ 4); m5 = *(tp+ 5);
|
||||
m6 = *(tp+ 6); m7 = *(tp+ 7);
|
||||
m8 = *(tp+ 8); m9 = *(tp+ 9);
|
||||
mA = *(tp+10); mB = *(tp+11);
|
||||
mC = *(tp+12); mD = *(tp+13);
|
||||
mE = *(tp+14); mF = *(tp+15);
|
||||
m0 = tp[ 0]; m1 = tp[ 1];
|
||||
m2 = tp[ 2]; m3 = tp[ 3];
|
||||
m4 = tp[ 4]; m5 = tp[ 5];
|
||||
m6 = tp[ 6]; m7 = tp[ 7];
|
||||
m8 = tp[ 8]; m9 = tp[ 9];
|
||||
mA = tp[10]; mB = tp[11];
|
||||
mC = tp[12]; mD = tp[13];
|
||||
mE = tp[14]; mF = tp[15];
|
||||
|
||||
for( int r = 0; r < 6; r += 2 ) {
|
||||
ROUND_BIG(r, d_alpha_n);
|
||||
@ -655,15 +385,14 @@ void x13_hamsi512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *
|
||||
T_BIG;
|
||||
|
||||
tp = &d_T512[0][0] + 784;
|
||||
|
||||
m0 = *(tp+ 0); m1 = *(tp+ 1);
|
||||
m2 = *(tp+ 2); m3 = *(tp+ 3);
|
||||
m4 = *(tp+ 4); m5 = *(tp+ 5);
|
||||
m6 = *(tp+ 6); m7 = *(tp+ 7);
|
||||
m8 = *(tp+ 8); m9 = *(tp+ 9);
|
||||
mA = *(tp+10); mB = *(tp+11);
|
||||
mC = *(tp+12); mD = *(tp+13);
|
||||
mE = *(tp+14); mF = *(tp+15);
|
||||
m0 = tp[ 0]; m1 = tp[ 1];
|
||||
m2 = tp[ 2]; m3 = tp[ 3];
|
||||
m4 = tp[ 4]; m5 = tp[ 5];
|
||||
m6 = tp[ 6]; m7 = tp[ 7];
|
||||
m8 = tp[ 8]; m9 = tp[ 9];
|
||||
mA = tp[10]; mB = tp[11];
|
||||
mC = tp[12]; mD = tp[13];
|
||||
mE = tp[14]; mF = tp[15];
|
||||
|
||||
for( int r = 0; r < 12; r += 2 ) {
|
||||
ROUND_BIG(r, d_alpha_f);
|
||||
@ -696,3 +425,127 @@ void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce
|
||||
x13_hamsi512_gpu_hash_64<<<grid, block>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
__constant__ static uint64_t c_PaddedMessage80[10];
|
||||
|
||||
__host__
|
||||
void x16_hamsi512_setBlock_80(void *pdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, pdata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__global__
|
||||
void x16_hamsi512_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint64_t *g_hash)
|
||||
{
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
unsigned char h1[80];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 10; i++)
|
||||
((uint2*)h1)[i] = ((uint2*)c_PaddedMessage80)[i];
|
||||
//((uint64_t*)h1)[9] = REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(startNonce + thread));
|
||||
((uint32_t*)h1)[19] = cuda_swab32(startNonce + thread);
|
||||
|
||||
uint32_t c0 = 0x73746565, c1 = 0x6c706172, c2 = 0x6b204172, c3 = 0x656e6265;
|
||||
uint32_t c4 = 0x72672031, c5 = 0x302c2062, c6 = 0x75732032, c7 = 0x3434362c;
|
||||
uint32_t c8 = 0x20422d33, c9 = 0x30303120, cA = 0x4c657576, cB = 0x656e2d48;
|
||||
uint32_t cC = 0x65766572, cD = 0x6c65652c, cE = 0x2042656c, cF = 0x6769756d;
|
||||
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
|
||||
uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
uint32_t *tp, db, dm;
|
||||
|
||||
for(int i = 0; i < 80; i += 8)
|
||||
{
|
||||
m0 = 0; m1 = 0; m2 = 0; m3 = 0; m4 = 0; m5 = 0; m6 = 0; m7 = 0;
|
||||
m8 = 0; m9 = 0; mA = 0; mB = 0; mC = 0; mD = 0; mE = 0; mF = 0;
|
||||
tp = &d_T512[0][0];
|
||||
|
||||
#pragma unroll
|
||||
for (int u = 0; u < 8; u++) {
|
||||
db = h1[i + u];
|
||||
#pragma unroll 2
|
||||
for (int v = 0; v < 8; v++, db >>= 1) {
|
||||
dm = -(uint32_t)(db & 1);
|
||||
m0 ^= dm & tp[ 0]; m1 ^= dm & tp[ 1];
|
||||
m2 ^= dm & tp[ 2]; m3 ^= dm & tp[ 3];
|
||||
m4 ^= dm & tp[ 4]; m5 ^= dm & tp[ 5];
|
||||
m6 ^= dm & tp[ 6]; m7 ^= dm & tp[ 7];
|
||||
m8 ^= dm & tp[ 8]; m9 ^= dm & tp[ 9];
|
||||
mA ^= dm & tp[10]; mB ^= dm & tp[11];
|
||||
mC ^= dm & tp[12]; mD ^= dm & tp[13];
|
||||
mE ^= dm & tp[14]; mF ^= dm & tp[15];
|
||||
tp += 16;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for (int r = 0; r < 6; r++) {
|
||||
ROUND_BIG(r, d_alpha_n);
|
||||
}
|
||||
T_BIG;
|
||||
}
|
||||
|
||||
#define INPUT_BIG { \
|
||||
m0 = 0; m1 = 0; m2 = 0; m3 = 0; m4 = 0; m5 = 0; m6 = 0; m7 = 0; \
|
||||
m8 = 0; m9 = 0; mA = 0; mB = 0; mC = 0; mD = 0; mE = 0; mF = 0; \
|
||||
tp = &d_T512[0][0]; \
|
||||
for (int u = 0; u < 8; u++) { \
|
||||
db = endtag[u]; \
|
||||
for (int v = 0; v < 8; v++, db >>= 1) { \
|
||||
dm = -(uint32_t)(db & 1); \
|
||||
m0 ^= dm & tp[ 0]; m1 ^= dm & tp[ 1]; \
|
||||
m2 ^= dm & tp[ 2]; m3 ^= dm & tp[ 3]; \
|
||||
m4 ^= dm & tp[ 4]; m5 ^= dm & tp[ 5]; \
|
||||
m6 ^= dm & tp[ 6]; m7 ^= dm & tp[ 7]; \
|
||||
m8 ^= dm & tp[ 8]; m9 ^= dm & tp[ 9]; \
|
||||
mA ^= dm & tp[10]; mB ^= dm & tp[11]; \
|
||||
mC ^= dm & tp[12]; mD ^= dm & tp[13]; \
|
||||
mE ^= dm & tp[14]; mF ^= dm & tp[15]; \
|
||||
tp += 16; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// close
|
||||
uint8_t endtag[8] = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
INPUT_BIG;
|
||||
|
||||
#pragma unroll
|
||||
for (int r = 0; r < 6; r++) {
|
||||
ROUND_BIG(r, d_alpha_n);
|
||||
}
|
||||
T_BIG;
|
||||
|
||||
endtag[0] = endtag[1] = 0x00;
|
||||
endtag[6] = 0x02;
|
||||
endtag[7] = 0x80;
|
||||
INPUT_BIG;
|
||||
|
||||
// PF_BIG
|
||||
#pragma unroll
|
||||
for(int r = 0; r < 12; r++) {
|
||||
ROUND_BIG(r, d_alpha_f);
|
||||
}
|
||||
T_BIG;
|
||||
|
||||
uint64_t hashPosition = thread;
|
||||
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition << 3];
|
||||
#pragma unroll 16
|
||||
for(int i = 0; i < 16; i++)
|
||||
Hash[i] = cuda_swab32(h[i]);
|
||||
|
||||
#undef INPUT_BIG
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_hamsi512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
x16_hamsi512_gpu_hash_80 <<<grid, block>>> (threads, startNounce, (uint64_t*)d_hash);
|
||||
}
|
||||
|
@ -1998,7 +1998,7 @@ const int i0, const int i1, const int i2, const int i3, const int i4, const int
|
||||
|
||||
|
||||
__global__
|
||||
void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash, int swab)
|
||||
void oldwhirlpool_gpu_hash_80(const uint32_t threads, const uint32_t startNounce, void *outputHash, int swab)
|
||||
{
|
||||
__shared__ uint64_t sharedMemory[2048];
|
||||
|
||||
@ -2014,7 +2014,8 @@ void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outp
|
||||
sharedMemory[threadIdx.x+1792] = mixTob7Tox[threadIdx.x];
|
||||
#endif
|
||||
}
|
||||
__threadfence_block(); // ensure shared mem is ready
|
||||
//__threadfence_block(); // ensure shared mem is ready
|
||||
__syncthreads();
|
||||
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
@ -2028,7 +2029,8 @@ void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outp
|
||||
uint64_t state[8];
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++) {
|
||||
state[i] = c_PaddedMessage80[i];
|
||||
//state[i] = c_PaddedMessage80[i];
|
||||
AS_UINT2(&state[i]) = AS_UINT2(&c_PaddedMessage80[i]);
|
||||
}
|
||||
#else
|
||||
#pragma unroll 8
|
||||
@ -2050,6 +2052,7 @@ void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outp
|
||||
state[i] = xor1(n[i],c_PaddedMessage80[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// round 2 ///////
|
||||
//////////////////////////////////
|
||||
n[0] = c_PaddedMessage80[8]; //read data
|
||||
@ -2331,7 +2334,7 @@ extern uint32_t whirlpool512_finalhash_64(int thr_id, uint32_t threads, uint32_t
|
||||
}
|
||||
|
||||
__host__
|
||||
void whirlpool512_hash_80_sm3(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash)
|
||||
void whirlpool512_hash_80_sm3(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash)
|
||||
{
|
||||
dim3 grid((threads + threadsperblock-1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
@ -2339,7 +2342,7 @@ void whirlpool512_hash_80_sm3(int thr_id, uint32_t threads, uint32_t startNounce
|
||||
if (threads < 256)
|
||||
applog(LOG_WARNING, "whirlpool requires a minimum of 256 threads to fetch constant tables!");
|
||||
|
||||
oldwhirlpool_gpu_hash_80<<<grid, block>>>(threads, startNounce, d_outputHash, 1);
|
||||
oldwhirlpool_gpu_hash_80<<<grid, block>>>(threads, startNonce, d_outputHash, 1);
|
||||
}
|
||||
|
||||
extern void whirl_midstate(void *state, const void *input);
|
||||
@ -2363,3 +2366,54 @@ void whirlpool512_setBlock_80_sm3(void *pdata, const void *ptarget)
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 128, 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(pTarget, ptarget, 32, 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
__host__
|
||||
void x16_whirlpool512_init(int thr_id, uint32_t threads)
|
||||
{
|
||||
cudaMemcpyToSymbol(InitVector_RC, plain_RC, sizeof(plain_RC), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob0Tox, plain_T0, sizeof(plain_T0), 0, cudaMemcpyHostToDevice);
|
||||
#if USE_ALL_TABLES
|
||||
cudaMemcpyToSymbol(mixTob1Tox, plain_T1, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob2Tox, plain_T2, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob3Tox, plain_T3, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob4Tox, plain_T4, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob5Tox, plain_T5, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob6Tox, plain_T6, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(mixTob7Tox, plain_T7, (256 * 8), 0, cudaMemcpyHostToDevice);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void whirlpool_midstate(void *state, const void *input);
|
||||
|
||||
__host__
|
||||
void x16_whirlpool512_setBlock_80(void *pdata)
|
||||
{
|
||||
unsigned char PaddedMessage[128];
|
||||
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage + 80, 0, 48);
|
||||
PaddedMessage[80] = 0x80; /* ending */
|
||||
|
||||
#if HOST_MIDSTATE
|
||||
// compute constant first block
|
||||
unsigned char midstate[64] = { 0 };
|
||||
whirlpool_midstate(midstate, pdata);
|
||||
memcpy(PaddedMessage, midstate, 64);
|
||||
#endif
|
||||
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 128, 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_whirlpool512_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_outputHash)
|
||||
{
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
if (threads < 256)
|
||||
applog(LOG_WARNING, "whirlpool requires a minimum of 256 threads to fetch constant tables!");
|
||||
|
||||
oldwhirlpool_gpu_hash_80 <<<grid, block>>> (threads, startNonce, d_outputHash, 1);
|
||||
}
|
||||
|
214
x16r/cuda_x16_echo512.cu
Normal file
214
x16r/cuda_x16_echo512.cu
Normal file
@ -0,0 +1,214 @@
|
||||
/**
|
||||
* echo512-80 cuda kernel for X16R algorithm
|
||||
*
|
||||
* tpruvot 2018 - GPL code
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
extern __device__ __device_builtin__ void __threadfence_block(void);
|
||||
|
||||
#include "../x11/cuda_x11_aes.cuh"
|
||||
|
||||
__device__ __forceinline__ void AES_2ROUND(const uint32_t* __restrict__ sharedMemory,
|
||||
uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3,
|
||||
uint32_t &k0)
|
||||
{
|
||||
uint32_t y0, y1, y2, y3;
|
||||
|
||||
aes_round(sharedMemory,
|
||||
x0, x1, x2, x3,
|
||||
k0,
|
||||
y0, y1, y2, y3);
|
||||
|
||||
aes_round(sharedMemory,
|
||||
y0, y1, y2, y3,
|
||||
x0, x1, x2, x3);
|
||||
|
||||
k0++;
|
||||
}
|
||||
|
||||
__device__
|
||||
static void echo_round(uint32_t* const sharedMemory, uint32_t *W, uint32_t &k0)
|
||||
{
|
||||
// Big Sub Words
|
||||
#pragma unroll 16
|
||||
for (int idx = 0; idx < 16; idx++) {
|
||||
AES_2ROUND(sharedMemory, W[(idx << 2) + 0], W[(idx << 2) + 1], W[(idx << 2) + 2], W[(idx << 2) + 3], k0);
|
||||
}
|
||||
|
||||
// Shift Rows
|
||||
#pragma unroll 4
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t t[4];
|
||||
/// 1, 5, 9, 13
|
||||
t[0] = W[i + 4];
|
||||
t[1] = W[i + 8];
|
||||
t[2] = W[i + 24];
|
||||
t[3] = W[i + 60];
|
||||
|
||||
W[i + 4] = W[i + 20];
|
||||
W[i + 8] = W[i + 40];
|
||||
W[i + 24] = W[i + 56];
|
||||
W[i + 60] = W[i + 44];
|
||||
|
||||
W[i + 20] = W[i + 36];
|
||||
W[i + 40] = t[1];
|
||||
W[i + 56] = t[2];
|
||||
W[i + 44] = W[i + 28];
|
||||
|
||||
W[i + 28] = W[i + 12];
|
||||
W[i + 12] = t[3];
|
||||
W[i + 36] = W[i + 52];
|
||||
W[i + 52] = t[0];
|
||||
}
|
||||
|
||||
// Mix Columns
|
||||
#pragma unroll 4
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
#pragma unroll 4
|
||||
for (int idx = 0; idx < 64; idx += 16)
|
||||
{
|
||||
uint32_t a[4];
|
||||
a[0] = W[idx + i];
|
||||
a[1] = W[idx + i + 4];
|
||||
a[2] = W[idx + i + 8];
|
||||
a[3] = W[idx + i + 12];
|
||||
|
||||
uint32_t ab = a[0] ^ a[1];
|
||||
uint32_t bc = a[1] ^ a[2];
|
||||
uint32_t cd = a[2] ^ a[3];
|
||||
|
||||
uint32_t t, t2, t3;
|
||||
t = (ab & 0x80808080);
|
||||
t2 = (bc & 0x80808080);
|
||||
t3 = (cd & 0x80808080);
|
||||
|
||||
uint32_t abx = (t >> 7) * 27U ^ ((ab^t) << 1);
|
||||
uint32_t bcx = (t2 >> 7) * 27U ^ ((bc^t2) << 1);
|
||||
uint32_t cdx = (t3 >> 7) * 27U ^ ((cd^t3) << 1);
|
||||
|
||||
W[idx + i] = bc ^ a[3] ^ abx;
|
||||
W[idx + i + 4] = a[0] ^ cd ^ bcx;
|
||||
W[idx + i + 8] = ab ^ a[3] ^ cdx;
|
||||
W[idx + i + 12] = ab ^ a[2] ^ (abx ^ bcx ^ cdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __forceinline__
|
||||
void cuda_echo_round_80(uint32_t *const __restrict__ sharedMemory, uint32_t *const __restrict__ data, const uint32_t nonce, uint32_t *hash)
|
||||
{
|
||||
uint32_t h[29]; // <= 127 bytes input
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i = 0; i < 18; i += 2)
|
||||
AS_UINT2(&h[i]) = AS_UINT2(&data[i]);
|
||||
h[18] = data[18];
|
||||
h[19] = cuda_swab32(nonce);
|
||||
h[20] = 0x80;
|
||||
h[21] = h[22] = h[23] = h[24] = h[25] = h[26] = 0;
|
||||
//((uint8_t*)h)[80] = 0x80;
|
||||
//((uint8_t*)h)[128-17] = 0x02;
|
||||
//((uint8_t*)h)[128-16] = 0x80;
|
||||
//((uint8_t*)h)[128-15] = 0x02;
|
||||
h[27] = 0x2000000;
|
||||
h[28] = 0x280;
|
||||
//h[29] = h[30] = h[31] = 0;
|
||||
|
||||
uint32_t k0 = 640; // bitlen
|
||||
uint32_t W[64];
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i = 0; i < 32; i+=4) {
|
||||
W[i] = 512; // L
|
||||
W[i+1] = 0; // H
|
||||
W[i+2] = 0; // X
|
||||
W[i+3] = 0;
|
||||
}
|
||||
|
||||
uint32_t Z[16];
|
||||
#pragma unroll
|
||||
for (int i = 0; i<16; i++) Z[i] = W[i];
|
||||
#pragma unroll
|
||||
for (int i = 32; i<61; i++) W[i] = h[i - 32];
|
||||
#pragma unroll
|
||||
for (int i = 61; i<64; i++) W[i] = 0;
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
echo_round(sharedMemory, W, k0);
|
||||
|
||||
#pragma unroll 16
|
||||
for (int i = 0; i < 16; i++) {
|
||||
Z[i] ^= h[i] ^ W[i] ^ W[i + 32];
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i = 0; i < 16; i += 2)
|
||||
AS_UINT2(&hash[i]) = AS_UINT2(&Z[i]);
|
||||
}
|
||||
|
||||
__device__ __forceinline__
|
||||
void echo_gpu_init(uint32_t *const __restrict__ sharedMemory)
|
||||
{
|
||||
/* each thread startup will fill a uint32 */
|
||||
if (threadIdx.x < 128) {
|
||||
sharedMemory[threadIdx.x] = d_AES0[threadIdx.x];
|
||||
sharedMemory[threadIdx.x + 256] = d_AES1[threadIdx.x];
|
||||
sharedMemory[threadIdx.x + 512] = d_AES2[threadIdx.x];
|
||||
sharedMemory[threadIdx.x + 768] = d_AES3[threadIdx.x];
|
||||
|
||||
sharedMemory[threadIdx.x + 64 * 2] = d_AES0[threadIdx.x + 64 * 2];
|
||||
sharedMemory[threadIdx.x + 64 * 2 + 256] = d_AES1[threadIdx.x + 64 * 2];
|
||||
sharedMemory[threadIdx.x + 64 * 2 + 512] = d_AES2[threadIdx.x + 64 * 2];
|
||||
sharedMemory[threadIdx.x + 64 * 2 + 768] = d_AES3[threadIdx.x + 64 * 2];
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_echo512_cuda_init(int thr_id, const uint32_t threads)
|
||||
{
|
||||
aes_cpu_init(thr_id);
|
||||
}
|
||||
|
||||
__constant__ static uint32_t c_PaddedMessage80[20];
|
||||
|
||||
__host__
|
||||
void x16_echo512_setBlock_80(void *endiandata)
|
||||
{
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, endiandata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__global__ __launch_bounds__(128, 7) /* will force 72 registers */
|
||||
void x16_echo512_gpu_hash_80(uint32_t threads, uint32_t startNonce, uint64_t *g_hash)
|
||||
{
|
||||
__shared__ uint32_t sharedMemory[1024];
|
||||
|
||||
echo_gpu_init(sharedMemory);
|
||||
__threadfence_block();
|
||||
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint64_t hashPosition = thread;
|
||||
uint32_t *pHash = (uint32_t*)&g_hash[hashPosition<<3];
|
||||
|
||||
cuda_echo_round_80(sharedMemory, c_PaddedMessage80, startNonce + thread, pHash);
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_echo512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
x16_echo512_gpu_hash_80<<<grid, block>>>(threads, startNonce, (uint64_t*)d_hash);
|
||||
}
|
467
x16r/cuda_x16_fugue512.cu
Normal file
467
x16r/cuda_x16_fugue512.cu
Normal file
@ -0,0 +1,467 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cuda_helper.h>
|
||||
|
||||
#define TPB 256
|
||||
|
||||
/*
|
||||
* fugue512-80 x16r kernel implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2018 tpruvot
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*/
|
||||
|
||||
#ifdef __INTELLISENSE__
|
||||
#define __byte_perm(x, y, m) (x|y)
|
||||
#define tex1Dfetch(t, n) (n)
|
||||
#define __CUDACC__
|
||||
#include <cuda_texture_types.h>
|
||||
#endif
|
||||
|
||||
// store allocated textures device addresses
|
||||
static unsigned int* d_textures[MAX_GPUS][1];
|
||||
|
||||
#define mixtab0(x) mixtabs[(x)]
|
||||
#define mixtab1(x) mixtabs[(x)+256]
|
||||
#define mixtab2(x) mixtabs[(x)+512]
|
||||
#define mixtab3(x) mixtabs[(x)+768]
|
||||
|
||||
static texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
|
||||
|
||||
static const uint32_t mixtab0[] = {
|
||||
0x63633297, 0x7c7c6feb, 0x77775ec7, 0x7b7b7af7, 0xf2f2e8e5, 0x6b6b0ab7, 0x6f6f16a7, 0xc5c56d39,
|
||||
0x303090c0, 0x01010704, 0x67672e87, 0x2b2bd1ac, 0xfefeccd5, 0xd7d71371, 0xabab7c9a, 0x767659c3,
|
||||
0xcaca4005, 0x8282a33e, 0xc9c94909, 0x7d7d68ef, 0xfafad0c5, 0x5959947f, 0x4747ce07, 0xf0f0e6ed,
|
||||
0xadad6e82, 0xd4d41a7d, 0xa2a243be, 0xafaf608a, 0x9c9cf946, 0xa4a451a6, 0x727245d3, 0xc0c0762d,
|
||||
0xb7b728ea, 0xfdfdc5d9, 0x9393d47a, 0x2626f298, 0x363682d8, 0x3f3fbdfc, 0xf7f7f3f1, 0xcccc521d,
|
||||
0x34348cd0, 0xa5a556a2, 0xe5e58db9, 0xf1f1e1e9, 0x71714cdf, 0xd8d83e4d, 0x313197c4, 0x15156b54,
|
||||
0x04041c10, 0xc7c76331, 0x2323e98c, 0xc3c37f21, 0x18184860, 0x9696cf6e, 0x05051b14, 0x9a9aeb5e,
|
||||
0x0707151c, 0x12127e48, 0x8080ad36, 0xe2e298a5, 0xebeba781, 0x2727f59c, 0xb2b233fe, 0x757550cf,
|
||||
0x09093f24, 0x8383a43a, 0x2c2cc4b0, 0x1a1a4668, 0x1b1b416c, 0x6e6e11a3, 0x5a5a9d73, 0xa0a04db6,
|
||||
0x5252a553, 0x3b3ba1ec, 0xd6d61475, 0xb3b334fa, 0x2929dfa4, 0xe3e39fa1, 0x2f2fcdbc, 0x8484b126,
|
||||
0x5353a257, 0xd1d10169, 0x00000000, 0xededb599, 0x2020e080, 0xfcfcc2dd, 0xb1b13af2, 0x5b5b9a77,
|
||||
0x6a6a0db3, 0xcbcb4701, 0xbebe17ce, 0x3939afe4, 0x4a4aed33, 0x4c4cff2b, 0x5858937b, 0xcfcf5b11,
|
||||
0xd0d0066d, 0xefefbb91, 0xaaaa7b9e, 0xfbfbd7c1, 0x4343d217, 0x4d4df82f, 0x333399cc, 0x8585b622,
|
||||
0x4545c00f, 0xf9f9d9c9, 0x02020e08, 0x7f7f66e7, 0x5050ab5b, 0x3c3cb4f0, 0x9f9ff04a, 0xa8a87596,
|
||||
0x5151ac5f, 0xa3a344ba, 0x4040db1b, 0x8f8f800a, 0x9292d37e, 0x9d9dfe42, 0x3838a8e0, 0xf5f5fdf9,
|
||||
0xbcbc19c6, 0xb6b62fee, 0xdada3045, 0x2121e784, 0x10107040, 0xffffcbd1, 0xf3f3efe1, 0xd2d20865,
|
||||
0xcdcd5519, 0x0c0c2430, 0x1313794c, 0xececb29d, 0x5f5f8667, 0x9797c86a, 0x4444c70b, 0x1717655c,
|
||||
0xc4c46a3d, 0xa7a758aa, 0x7e7e61e3, 0x3d3db3f4, 0x6464278b, 0x5d5d886f, 0x19194f64, 0x737342d7,
|
||||
0x60603b9b, 0x8181aa32, 0x4f4ff627, 0xdcdc225d, 0x2222ee88, 0x2a2ad6a8, 0x9090dd76, 0x88889516,
|
||||
0x4646c903, 0xeeeebc95, 0xb8b805d6, 0x14146c50, 0xdede2c55, 0x5e5e8163, 0x0b0b312c, 0xdbdb3741,
|
||||
0xe0e096ad, 0x32329ec8, 0x3a3aa6e8, 0x0a0a3628, 0x4949e43f, 0x06061218, 0x2424fc90, 0x5c5c8f6b,
|
||||
0xc2c27825, 0xd3d30f61, 0xacac6986, 0x62623593, 0x9191da72, 0x9595c662, 0xe4e48abd, 0x797974ff,
|
||||
0xe7e783b1, 0xc8c84e0d, 0x373785dc, 0x6d6d18af, 0x8d8d8e02, 0xd5d51d79, 0x4e4ef123, 0xa9a97292,
|
||||
0x6c6c1fab, 0x5656b943, 0xf4f4fafd, 0xeaeaa085, 0x6565208f, 0x7a7a7df3, 0xaeae678e, 0x08083820,
|
||||
0xbaba0bde, 0x787873fb, 0x2525fb94, 0x2e2ecab8, 0x1c1c5470, 0xa6a65fae, 0xb4b421e6, 0xc6c66435,
|
||||
0xe8e8ae8d, 0xdddd2559, 0x747457cb, 0x1f1f5d7c, 0x4b4bea37, 0xbdbd1ec2, 0x8b8b9c1a, 0x8a8a9b1e,
|
||||
0x70704bdb, 0x3e3ebaf8, 0xb5b526e2, 0x66662983, 0x4848e33b, 0x0303090c, 0xf6f6f4f5, 0x0e0e2a38,
|
||||
0x61613c9f, 0x35358bd4, 0x5757be47, 0xb9b902d2, 0x8686bf2e, 0xc1c17129, 0x1d1d5374, 0x9e9ef74e,
|
||||
0xe1e191a9, 0xf8f8decd, 0x9898e556, 0x11117744, 0x696904bf, 0xd9d93949, 0x8e8e870e, 0x9494c166,
|
||||
0x9b9bec5a, 0x1e1e5a78, 0x8787b82a, 0xe9e9a989, 0xcece5c15, 0x5555b04f, 0x2828d8a0, 0xdfdf2b51,
|
||||
0x8c8c8906, 0xa1a14ab2, 0x89899212, 0x0d0d2334, 0xbfbf10ca, 0xe6e684b5, 0x4242d513, 0x686803bb,
|
||||
0x4141dc1f, 0x9999e252, 0x2d2dc3b4, 0x0f0f2d3c, 0xb0b03df6, 0x5454b74b, 0xbbbb0cda, 0x16166258
|
||||
};
|
||||
|
||||
#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \
|
||||
x22 ^= x00; \
|
||||
x00 = (q); \
|
||||
x08 ^= x00; \
|
||||
x01 ^= x24; \
|
||||
x04 ^= x27; \
|
||||
x07 ^= x30; \
|
||||
}
|
||||
|
||||
#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \
|
||||
x00 ^= x04; \
|
||||
x01 ^= x05; \
|
||||
x02 ^= x06; \
|
||||
x18 ^= x04; \
|
||||
x19 ^= x05; \
|
||||
x20 ^= x06; \
|
||||
}
|
||||
|
||||
#define SMIX(x0, x1, x2, x3) { \
|
||||
uint32_t tmp; \
|
||||
uint32_t r0 = 0; \
|
||||
uint32_t r1 = 0; \
|
||||
uint32_t r2 = 0; \
|
||||
uint32_t r3 = 0; \
|
||||
uint32_t c0 = mixtab0(x0 >> 24); \
|
||||
tmp = mixtab1((x0 >> 16) & 0xFF); \
|
||||
c0 ^= tmp; \
|
||||
r1 ^= tmp; \
|
||||
tmp = mixtab2((x0 >> 8) & 0xFF); \
|
||||
c0 ^= tmp; \
|
||||
r2 ^= tmp; \
|
||||
tmp = mixtab3(x0 & 0xFF); \
|
||||
c0 ^= tmp; \
|
||||
r3 ^= tmp; \
|
||||
tmp = mixtab0(x1 >> 24); \
|
||||
uint32_t c1 = tmp; \
|
||||
r0 ^= tmp; \
|
||||
tmp = mixtab1((x1 >> 16) & 0xFF); \
|
||||
c1 ^= tmp; \
|
||||
tmp = mixtab2((x1 >> 8) & 0xFF); \
|
||||
c1 ^= tmp; \
|
||||
r2 ^= tmp; \
|
||||
tmp = mixtab3(x1 & 0xFF); \
|
||||
c1 ^= tmp; \
|
||||
r3 ^= tmp; \
|
||||
tmp = mixtab0(x2 >> 24); \
|
||||
uint32_t c2 = tmp; \
|
||||
r0 ^= tmp; \
|
||||
tmp = mixtab1((x2 >> 16) & 0xFF); \
|
||||
c2 ^= tmp; \
|
||||
r1 ^= tmp; \
|
||||
tmp = mixtab2((x2 >> 8) & 0xFF); \
|
||||
c2 ^= tmp; \
|
||||
tmp = mixtab3(x2 & 0xFF); \
|
||||
c2 ^= tmp; \
|
||||
r3 ^= tmp; \
|
||||
tmp = mixtab0(x3 >> 24); \
|
||||
uint32_t c3 = tmp; \
|
||||
r0 ^= tmp; \
|
||||
tmp = mixtab1((x3 >> 16) & 0xFF); \
|
||||
c3 ^= tmp; \
|
||||
r1 ^= tmp; \
|
||||
tmp = mixtab2((x3 >> 8) & 0xFF); \
|
||||
c3 ^= tmp; \
|
||||
r2 ^= tmp; \
|
||||
tmp = mixtab3(x3 & 0xFF); \
|
||||
c3 ^= tmp; \
|
||||
x0 = ((c0 ^ r0) & 0xFF000000) | ((c1 ^ r1) & 0x00FF0000) \
|
||||
| ((c2 ^ r2) & 0x0000FF00) | ((c3 ^ r3) & 0x000000FF); \
|
||||
x1 = ((c1 ^ (r0 << 8)) & 0xFF000000) | ((c2 ^ (r1 << 8)) & 0x00FF0000) \
|
||||
| ((c3 ^ (r2 << 8)) & 0x0000FF00) | ((c0 ^ (r3 >> 24)) & 0x000000FF); \
|
||||
x2 = ((c2 ^ (r0 << 16)) & 0xFF000000) | ((c3 ^ (r1 << 16)) & 0x00FF0000) \
|
||||
| ((c0 ^ (r2 >> 16)) & 0x0000FF00) | ((c1 ^ (r3 >> 16)) & 0x000000FF); \
|
||||
x3 = ((c3 ^ (r0 << 24)) & 0xFF000000) | ((c0 ^ (r1 >> 8)) & 0x00FF0000) \
|
||||
| ((c1 ^ (r2 >> 8)) & 0x0000FF00) | ((c2 ^ (r3 >> 8)) & 0x000000FF); \
|
||||
}
|
||||
|
||||
#define SUB_ROR3 { \
|
||||
B33 = S33, B34 = S34, B35 = S35; \
|
||||
S35 = S32; S34 = S31; S33 = S30; S32 = S29; S31 = S28; S30 = S27; S29 = S26; S28 = S25; S27 = S24; \
|
||||
S26 = S23; S25 = S22; S24 = S21; S23 = S20; S22 = S19; S21 = S18; S20 = S17; S19 = S16; S18 = S15; \
|
||||
S17 = S14; S16 = S13; S15 = S12; S14 = S11; S13 = S10; S12 = S09; S11 = S08; S10 = S07; S09 = S06; \
|
||||
S08 = S05; S07 = S04; S06 = S03; S05 = S02; S04 = S01; S03 = S00; S02 = B35; S01 = B34; S00 = B33; \
|
||||
}
|
||||
|
||||
#define SUB_ROR8 { \
|
||||
B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
|
||||
S35 = S27; S34 = S26; S33 = S25; S32 = S24; S31 = S23; S30 = S22; S29 = S21; S28 = S20; S27 = S19; \
|
||||
S26 = S18; S25 = S17; S24 = S16; S23 = S15; S22 = S14; S21 = S13; S20 = S12; S19 = S11; S18 = S10; \
|
||||
S17 = S09; S16 = S08; S15 = S07; S14 = S06; S13 = S05; S12 = S04; S11 = S03; S10 = S02; S09 = S01; \
|
||||
S08 = S00; S07 = B35; S06 = B34; S05 = B33; S04 = B32; S03 = B31; S02 = B30; S01 = B29; S00 = B28; \
|
||||
}
|
||||
|
||||
#define SUB_ROR9 { \
|
||||
B27 = S27, B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
|
||||
S35 = S26; S34 = S25; S33 = S24; S32 = S23; S31 = S22; S30 = S21; S29 = S20; S28 = S19; S27 = S18; \
|
||||
S26 = S17; S25 = S16; S24 = S15; S23 = S14; S22 = S13; S21 = S12; S20 = S11; S19 = S10; S18 = S09; \
|
||||
S17 = S08; S16 = S07; S15 = S06; S14 = S05; S13 = S04; S12 = S03; S11 = S02; S10 = S01; S09 = S00; \
|
||||
S08 = B35; S07 = B34; S06 = B33; S05 = B32; S04 = B31; S03 = B30; S02 = B29; S01 = B28; S00 = B27; \
|
||||
}
|
||||
|
||||
#define SUB_ROR9_3 { \
|
||||
SUB_ROR3; SUB_ROR3; SUB_ROR3; \
|
||||
}
|
||||
|
||||
#define SUB_ROR12 { /* to fix */ \
|
||||
B24 = S00; B25 = S01; B26 = S02; B27 = S03; B28 = S04; B29 = S05; B30 = S06; B31 = S07; B32 = S08; B33 = S09; B34 = S10; B35 = S11; \
|
||||
S00 = S12; S01 = S13; S02 = S14; S03 = S15; S04 = S16; S05 = S17; S06 = S18; S07 = S19; S08 = S20; S09 = S21; S10 = S22; S11 = S23; \
|
||||
S12 = S24; S13 = S25; S14 = S26; S15 = S27; S16 = S28; S17 = S29; S18 = S30; S19 = S31; S20 = S32; S21 = S33; S22 = S34; S23 = S35; \
|
||||
S24 = B24; S25 = B25; S26 = B26; S27 = B27; S28 = B28; S29 = B29; S30 = B30; S31 = B31; S32 = B32; S33 = B33; S34 = B34; S35 = B35; \
|
||||
}
|
||||
|
||||
#define FUGUE512_3(x, y, z) { \
|
||||
TIX4(x, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
|
||||
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
|
||||
SMIX(S33, S34, S35, S00); \
|
||||
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
|
||||
SMIX(S30, S31, S32, S33); \
|
||||
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
|
||||
SMIX(S27, S28, S29, S30); \
|
||||
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
|
||||
SMIX(S24, S25, S26, S27); \
|
||||
\
|
||||
TIX4(y, S24, S25, S28, S31, S32, S10, S12, S15, S18); \
|
||||
CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); \
|
||||
SMIX(S21, S22, S23, S24); \
|
||||
CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); \
|
||||
SMIX(S18, S19, S20, S21); \
|
||||
CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); \
|
||||
SMIX(S15, S16, S17, S18); \
|
||||
CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); \
|
||||
SMIX(S12, S13, S14, S15); \
|
||||
\
|
||||
TIX4(z, S12, S13, S16, S19, S20, S34, S00, S03, S06); \
|
||||
CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); \
|
||||
SMIX(S09, S10, S11, S12); \
|
||||
CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); \
|
||||
SMIX(S06, S07, S08, S09); \
|
||||
CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); \
|
||||
SMIX(S03, S04, S05, S06); \
|
||||
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); \
|
||||
SMIX(S00, S01, S02, S03); \
|
||||
}
|
||||
|
||||
#define FUGUE512_F(w, x, y, z) { \
|
||||
TIX4(w, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
|
||||
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
|
||||
SMIX(S33, S34, S35, S00); \
|
||||
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
|
||||
SMIX(S30, S31, S32, S33); \
|
||||
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
|
||||
SMIX(S27, S28, S29, S30); \
|
||||
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
|
||||
SMIX(S24, S25, S26, S27); \
|
||||
\
|
||||
TIX4(x, S24, S25, S28, S31, S32, S10, S12, S15, S18); \
|
||||
CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); \
|
||||
SMIX(S21, S22, S23, S24); \
|
||||
CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); \
|
||||
SMIX(S18, S19, S20, S21); \
|
||||
CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); \
|
||||
SMIX(S15, S16, S17, S18); \
|
||||
CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); \
|
||||
SMIX(S12, S13, S14, S15); \
|
||||
\
|
||||
TIX4(y, S12, S13, S16, S19, S20, S34, S00, S03, S06); \
|
||||
CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); \
|
||||
SMIX(S09, S10, S11, S12); \
|
||||
CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); \
|
||||
SMIX(S06, S07, S08, S09); \
|
||||
CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); \
|
||||
SMIX(S03, S04, S05, S06); \
|
||||
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); \
|
||||
SMIX(S00, S01, S02, S03); \
|
||||
\
|
||||
TIX4(z, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
|
||||
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
|
||||
SMIX(S33, S34, S35, S00); \
|
||||
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
|
||||
SMIX(S30, S31, S32, S33); \
|
||||
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
|
||||
SMIX(S27, S28, S29, S30); \
|
||||
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
|
||||
SMIX(S24, S25, S26, S27); \
|
||||
}
|
||||
|
||||
#undef ROL8
|
||||
#ifdef __CUDA_ARCH__
|
||||
__device__ __forceinline__
|
||||
uint32_t ROL8(const uint32_t a) {
|
||||
return __byte_perm(a, 0, 0x2103);
|
||||
}
|
||||
__device__ __forceinline__
|
||||
uint32_t ROR8(const uint32_t a) {
|
||||
return __byte_perm(a, 0, 0x0321);
|
||||
}
|
||||
__device__ __forceinline__
|
||||
uint32_t ROL16(const uint32_t a) {
|
||||
return __byte_perm(a, 0, 0x1032);
|
||||
}
|
||||
#else
|
||||
#define ROL8(u) ROTL32(u, 8)
|
||||
#define ROR8(u) ROTR32(u, 8)
|
||||
#define ROL16(u) ROTL32(u,16)
|
||||
#endif
|
||||
|
||||
//#define AS_UINT4(addr) *((uint4*)(addr))
|
||||
|
||||
__constant__ static uint64_t c_PaddedMessage80[10];
|
||||
|
||||
__host__
|
||||
void x16_fugue512_setBlock_80(void *pdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, pdata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/***************************************************/
|
||||
|
||||
__global__
|
||||
__launch_bounds__(TPB)
|
||||
void x16_fugue512_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint64_t *g_hash)
|
||||
{
|
||||
__shared__ uint32_t mixtabs[1024];
|
||||
|
||||
// load shared mem (with 256 threads)
|
||||
const uint32_t thr = threadIdx.x & 0xFF;
|
||||
const uint32_t tmp = tex1Dfetch(mixTab0Tex, thr);
|
||||
mixtabs[thr] = tmp;
|
||||
mixtabs[thr+256] = ROR8(tmp);
|
||||
mixtabs[thr+512] = ROL16(tmp);
|
||||
mixtabs[thr+768] = ROL8(tmp);
|
||||
#if TPB <= 256
|
||||
if (blockDim.x < 256) {
|
||||
const uint32_t thr = (threadIdx.x + 0x80) & 0xFF;
|
||||
const uint32_t tmp = tex1Dfetch(mixTab0Tex, thr);
|
||||
mixtabs[thr] = tmp;
|
||||
mixtabs[thr + 256] = ROR8(tmp);
|
||||
mixtabs[thr + 512] = ROL16(tmp);
|
||||
mixtabs[thr + 768] = ROL8(tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
__syncthreads();
|
||||
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t Data[20];
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < 10; i++)
|
||||
AS_UINT2(&Data[i * 2]) = AS_UINT2(&c_PaddedMessage80[i]);
|
||||
Data[19] = (startNonce + thread);
|
||||
|
||||
uint32_t S00, S01, S02, S03, S04, S05, S06, S07, S08, S09, S10, S11;
|
||||
uint32_t S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23;
|
||||
uint32_t S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35;
|
||||
//uint32_t B24, B25, B26,
|
||||
uint32_t B27, B28, B29, B30, B31, B32, B33, B34, B35;
|
||||
//const uint64_t bc = 640 bits to hash
|
||||
//const uint32_t bclo = (uint32_t)(bc);
|
||||
//const uint32_t bchi = (uint32_t)(bc >> 32);
|
||||
|
||||
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = 0;
|
||||
S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
|
||||
S20 = 0x8807a57e; S21 = 0xe616af75; S22 = 0xc5d3e4db; S23 = 0xac9ab027;
|
||||
S24 = 0xd915f117; S25 = 0xb6eecc54; S26 = 0x06e8020b; S27 = 0x4a92efd1;
|
||||
S28 = 0xaac6e2c9; S29 = 0xddb21398; S30 = 0xcae65838; S31 = 0x437f203f;
|
||||
S32 = 0x25ea78e7; S33 = 0x951fddd6; S34 = 0xda6ed11d; S35 = 0xe13e3567;
|
||||
|
||||
FUGUE512_3((Data[ 0]), (Data[ 1]), (Data[ 2]));
|
||||
FUGUE512_3((Data[ 3]), (Data[ 4]), (Data[ 5]));
|
||||
FUGUE512_3((Data[ 6]), (Data[ 7]), (Data[ 8]));
|
||||
FUGUE512_3((Data[ 9]), (Data[10]), (Data[11]));
|
||||
FUGUE512_3((Data[12]), (Data[13]), (Data[14]));
|
||||
FUGUE512_3((Data[15]), (Data[16]), (Data[17]));
|
||||
FUGUE512_F((Data[18]), (Data[19]), 0/*bchi*/, (80*8)/*bclo*/);
|
||||
|
||||
// rotate right state by 3 dwords (S00 = S33, S03 = S00)
|
||||
SUB_ROR3;
|
||||
SUB_ROR9;
|
||||
|
||||
#pragma unroll 32
|
||||
for (int i = 0; i < 32; i++) {
|
||||
SUB_ROR3;
|
||||
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
|
||||
SMIX(S00, S01, S02, S03);
|
||||
}
|
||||
#pragma unroll 13
|
||||
for (int i = 0; i < 13; i++) {
|
||||
S04 ^= S00;
|
||||
S09 ^= S00;
|
||||
S18 ^= S00;
|
||||
S27 ^= S00;
|
||||
SUB_ROR9;
|
||||
SMIX(S00, S01, S02, S03);
|
||||
S04 ^= S00;
|
||||
S10 ^= S00;
|
||||
S18 ^= S00;
|
||||
S27 ^= S00;
|
||||
SUB_ROR9;
|
||||
SMIX(S00, S01, S02, S03);
|
||||
S04 ^= S00;
|
||||
S10 ^= S00;
|
||||
S19 ^= S00;
|
||||
S27 ^= S00;
|
||||
SUB_ROR9;
|
||||
SMIX(S00, S01, S02, S03);
|
||||
S04 ^= S00;
|
||||
S10 ^= S00;
|
||||
S19 ^= S00;
|
||||
S28 ^= S00;
|
||||
SUB_ROR8;
|
||||
SMIX(S00, S01, S02, S03);
|
||||
}
|
||||
S04 ^= S00;
|
||||
S09 ^= S00;
|
||||
S18 ^= S00;
|
||||
S27 ^= S00;
|
||||
|
||||
Data[ 0] = cuda_swab32(S01);
|
||||
Data[ 1] = cuda_swab32(S02);
|
||||
Data[ 2] = cuda_swab32(S03);
|
||||
Data[ 3] = cuda_swab32(S04);
|
||||
Data[ 4] = cuda_swab32(S09);
|
||||
Data[ 5] = cuda_swab32(S10);
|
||||
Data[ 6] = cuda_swab32(S11);
|
||||
Data[ 7] = cuda_swab32(S12);
|
||||
Data[ 8] = cuda_swab32(S18);
|
||||
Data[ 9] = cuda_swab32(S19);
|
||||
Data[10] = cuda_swab32(S20);
|
||||
Data[11] = cuda_swab32(S21);
|
||||
Data[12] = cuda_swab32(S27);
|
||||
Data[13] = cuda_swab32(S28);
|
||||
Data[14] = cuda_swab32(S29);
|
||||
Data[15] = cuda_swab32(S30);
|
||||
|
||||
const size_t hashPosition = thread;
|
||||
uint64_t* pHash = &g_hash[hashPosition << 3];
|
||||
#pragma unroll 4
|
||||
for(int i = 0; i < 4; i++)
|
||||
AS_UINT4(&pHash[i * 2]) = AS_UINT4(&Data[i * 4]);
|
||||
}
|
||||
}
|
||||
|
||||
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||
unsigned int *texmem; \
|
||||
cudaMalloc(&texmem, texsize); \
|
||||
d_textures[thr_id][id] = texmem; \
|
||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||
texname.normalized = 0; \
|
||||
texname.filterMode = cudaFilterModePoint; \
|
||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||
} \
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_fugue512_cpu_init(int thr_id, uint32_t threads)
|
||||
{
|
||||
texDef(0, mixTab0Tex, mixTab0m, mixtab0, sizeof(uint32_t)*256);
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_fugue512_cpu_free(int thr_id)
|
||||
{
|
||||
cudaFree(d_textures[thr_id][0]);
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_fugue512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = TPB;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
x16_fugue512_gpu_hash_80 <<<grid, block>>> (threads, startNonce, (uint64_t*)d_hash);
|
||||
}
|
350
x16r/cuda_x16_shabal512.cu
Normal file
350
x16r/cuda_x16_shabal512.cu
Normal file
@ -0,0 +1,350 @@
|
||||
/*
|
||||
* Shabal-512 for X16R
|
||||
* tpruvot 2018, based on alexis x14 and xevan kernlx code
|
||||
*/
|
||||
|
||||
#include <cuda_helper.h>
|
||||
#include <cuda_vectors.h>
|
||||
#include <cuda_vector_uint2x4.h>
|
||||
|
||||
typedef uint32_t sph_u32;
|
||||
|
||||
#define C32(x) (x)
|
||||
#define T32(x) (x)
|
||||
|
||||
#define INPUT_BLOCK_ADD do { \
|
||||
B0 = T32(B0 + M0); \
|
||||
B1 = T32(B1 + M1); \
|
||||
B2 = T32(B2 + M2); \
|
||||
B3 = T32(B3 + M3); \
|
||||
B4 = T32(B4 + M4); \
|
||||
B5 = T32(B5 + M5); \
|
||||
B6 = T32(B6 + M6); \
|
||||
B7 = T32(B7 + M7); \
|
||||
B8 = T32(B8 + M8); \
|
||||
B9 = T32(B9 + M9); \
|
||||
BA = T32(BA + MA); \
|
||||
BB = T32(BB + MB); \
|
||||
BC = T32(BC + MC); \
|
||||
BD = T32(BD + MD); \
|
||||
BE = T32(BE + ME); \
|
||||
BF = T32(BF + MF); \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BLOCK_SUB do { \
|
||||
C0 = T32(C0 - M0); \
|
||||
C1 = T32(C1 - M1); \
|
||||
C2 = T32(C2 - M2); \
|
||||
C3 = T32(C3 - M3); \
|
||||
C4 = T32(C4 - M4); \
|
||||
C5 = T32(C5 - M5); \
|
||||
C6 = T32(C6 - M6); \
|
||||
C7 = T32(C7 - M7); \
|
||||
C8 = T32(C8 - M8); \
|
||||
C9 = T32(C9 - M9); \
|
||||
CA = T32(CA - MA); \
|
||||
CB = T32(CB - MB); \
|
||||
CC = T32(CC - MC); \
|
||||
CD = T32(CD - MD); \
|
||||
CE = T32(CE - ME); \
|
||||
CF = T32(CF - MF); \
|
||||
} while (0)
|
||||
|
||||
#define XOR_W do { \
|
||||
A00 ^= Wlow; \
|
||||
A01 ^= Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP(v1, v2) do { \
|
||||
sph_u32 tmp = (v1); \
|
||||
(v1) = (v2); \
|
||||
(v2) = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP_BC do { \
|
||||
SWAP(B0, C0); \
|
||||
SWAP(B1, C1); \
|
||||
SWAP(B2, C2); \
|
||||
SWAP(B3, C3); \
|
||||
SWAP(B4, C4); \
|
||||
SWAP(B5, C5); \
|
||||
SWAP(B6, C6); \
|
||||
SWAP(B7, C7); \
|
||||
SWAP(B8, C8); \
|
||||
SWAP(B9, C9); \
|
||||
SWAP(BA, CA); \
|
||||
SWAP(BB, CB); \
|
||||
SWAP(BC, CC); \
|
||||
SWAP(BD, CD); \
|
||||
SWAP(BE, CE); \
|
||||
SWAP(BF, CF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
|
||||
xa0 = T32((xa0 \
|
||||
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
|
||||
^ xc) * 3U) \
|
||||
^ xb1 ^ (xb2 & ~xb3) ^ xm; \
|
||||
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_0 do { \
|
||||
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_1 do { \
|
||||
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_2 do { \
|
||||
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define APPLY_P do { \
|
||||
B0 = T32(B0 << 17) | (B0 >> 15); \
|
||||
B1 = T32(B1 << 17) | (B1 >> 15); \
|
||||
B2 = T32(B2 << 17) | (B2 >> 15); \
|
||||
B3 = T32(B3 << 17) | (B3 >> 15); \
|
||||
B4 = T32(B4 << 17) | (B4 >> 15); \
|
||||
B5 = T32(B5 << 17) | (B5 >> 15); \
|
||||
B6 = T32(B6 << 17) | (B6 >> 15); \
|
||||
B7 = T32(B7 << 17) | (B7 >> 15); \
|
||||
B8 = T32(B8 << 17) | (B8 >> 15); \
|
||||
B9 = T32(B9 << 17) | (B9 >> 15); \
|
||||
BA = T32(BA << 17) | (BA >> 15); \
|
||||
BB = T32(BB << 17) | (BB >> 15); \
|
||||
BC = T32(BC << 17) | (BC >> 15); \
|
||||
BD = T32(BD << 17) | (BD >> 15); \
|
||||
BE = T32(BE << 17) | (BE >> 15); \
|
||||
BF = T32(BF << 17) | (BF >> 15); \
|
||||
PERM_STEP_0; \
|
||||
PERM_STEP_1; \
|
||||
PERM_STEP_2; \
|
||||
A0B = T32(A0B + C6); \
|
||||
A0A = T32(A0A + C5); \
|
||||
A09 = T32(A09 + C4); \
|
||||
A08 = T32(A08 + C3); \
|
||||
A07 = T32(A07 + C2); \
|
||||
A06 = T32(A06 + C1); \
|
||||
A05 = T32(A05 + C0); \
|
||||
A04 = T32(A04 + CF); \
|
||||
A03 = T32(A03 + CE); \
|
||||
A02 = T32(A02 + CD); \
|
||||
A01 = T32(A01 + CC); \
|
||||
A00 = T32(A00 + CB); \
|
||||
A0B = T32(A0B + CA); \
|
||||
A0A = T32(A0A + C9); \
|
||||
A09 = T32(A09 + C8); \
|
||||
A08 = T32(A08 + C7); \
|
||||
A07 = T32(A07 + C6); \
|
||||
A06 = T32(A06 + C5); \
|
||||
A05 = T32(A05 + C4); \
|
||||
A04 = T32(A04 + C3); \
|
||||
A03 = T32(A03 + C2); \
|
||||
A02 = T32(A02 + C1); \
|
||||
A01 = T32(A01 + C0); \
|
||||
A00 = T32(A00 + CF); \
|
||||
A0B = T32(A0B + CE); \
|
||||
A0A = T32(A0A + CD); \
|
||||
A09 = T32(A09 + CC); \
|
||||
A08 = T32(A08 + CB); \
|
||||
A07 = T32(A07 + CA); \
|
||||
A06 = T32(A06 + C9); \
|
||||
A05 = T32(A05 + C8); \
|
||||
A04 = T32(A04 + C7); \
|
||||
A03 = T32(A03 + C6); \
|
||||
A02 = T32(A02 + C5); \
|
||||
A01 = T32(A01 + C4); \
|
||||
A00 = T32(A00 + C3); \
|
||||
} while (0)
|
||||
|
||||
#define INCR_W do { \
|
||||
if ((Wlow = T32(Wlow + 1)) == 0) \
|
||||
Whigh = T32(Whigh + 1); \
|
||||
} while (0)
|
||||
|
||||
__constant__ static const sph_u32 A_init_512[] = {
|
||||
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
|
||||
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
|
||||
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
|
||||
};
|
||||
|
||||
__constant__ static const sph_u32 B_init_512[] = {
|
||||
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
|
||||
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
|
||||
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
|
||||
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
|
||||
};
|
||||
|
||||
__constant__ static const sph_u32 C_init_512[] = {
|
||||
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
|
||||
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
|
||||
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
|
||||
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
|
||||
};
|
||||
|
||||
__constant__ static uint32_t c_PaddedMessage80[20];
|
||||
|
||||
__host__
|
||||
void x16_shabal512_setBlock_80(void *pdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, pdata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#define TPB_SHABAL 256
|
||||
|
||||
__global__ __launch_bounds__(TPB_SHABAL, 2)
|
||||
void x16_shabal512_gpu_hash_80(uint32_t threads, const uint32_t startNonce, uint32_t *g_hash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
|
||||
uint32_t B[] = {
|
||||
0xC1099CB7, 0x07B385F3, 0xE7442C26, 0xCC8AD640, 0xEB6F56C7, 0x1EA81AA9, 0x73B9D314, 0x1DE85D08,
|
||||
0x48910A5A, 0x893B22DB, 0xC5A0DF44, 0xBBC4324E, 0x72D2F240, 0x75941D99, 0x6D8BDE82, 0xA1A7502B
|
||||
};
|
||||
uint32_t M[16];
|
||||
|
||||
if (thread < threads)
|
||||
{
|
||||
// todo: try __ldc
|
||||
*(uint2x4*)&M[0] = *(uint2x4*)&c_PaddedMessage80[0];
|
||||
*(uint2x4*)&M[8] = *(uint2x4*)&c_PaddedMessage80[8];
|
||||
|
||||
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[ 2], A03 = A_init_512[ 3];
|
||||
sph_u32 A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[ 6], A07 = A_init_512[ 7];
|
||||
sph_u32 A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11];
|
||||
|
||||
sph_u32 B0 = B_init_512[ 0], B1 = B_init_512[ 1], B2 = B_init_512[ 2], B3 = B_init_512 [3];
|
||||
sph_u32 B4 = B_init_512[ 4], B5 = B_init_512[ 5], B6 = B_init_512[ 6], B7 = B_init_512[ 7];
|
||||
sph_u32 B8 = B_init_512[ 8], B9 = B_init_512[ 9], BA = B_init_512[10], BB = B_init_512[11];
|
||||
sph_u32 BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15];
|
||||
|
||||
sph_u32 C0 = C_init_512[ 0], C1 = C_init_512[ 1], C2 = C_init_512[ 2], C3 = C_init_512[ 3];
|
||||
sph_u32 C4 = C_init_512[ 4], C5 = C_init_512[ 5], C6 = C_init_512[ 6], C7 = C_init_512[ 7];
|
||||
sph_u32 C8 = C_init_512[ 8], C9 = C_init_512[ 9], CA = C_init_512[10], CB = C_init_512[11];
|
||||
sph_u32 CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15];
|
||||
|
||||
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
|
||||
sph_u32 Wlow = 1, Whigh = 0;
|
||||
|
||||
M0 = M[ 0];
|
||||
M1 = M[ 1];
|
||||
M2 = M[ 2];
|
||||
M3 = M[ 3];
|
||||
M4 = M[ 4];
|
||||
M5 = M[ 5];
|
||||
M6 = M[ 6];
|
||||
M7 = M[ 7];
|
||||
M8 = M[ 8];
|
||||
M9 = M[ 9];
|
||||
MA = M[10];
|
||||
MB = M[11];
|
||||
MC = M[12];
|
||||
MD = M[13];
|
||||
ME = M[14];
|
||||
MF = M[15];
|
||||
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
INPUT_BLOCK_SUB;
|
||||
SWAP_BC;
|
||||
INCR_W;
|
||||
|
||||
M0 = c_PaddedMessage80[16];
|
||||
M1 = c_PaddedMessage80[17];
|
||||
M2 = c_PaddedMessage80[18];
|
||||
M3 = cuda_swab32(startNonce + thread);
|
||||
M4 = 0x80;
|
||||
M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0;
|
||||
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
SWAP_BC;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
}
|
||||
|
||||
B[ 0] = B0;
|
||||
B[ 1] = B1;
|
||||
B[ 2] = B2;
|
||||
B[ 3] = B3;
|
||||
B[ 4] = B4;
|
||||
B[ 5] = B5;
|
||||
B[ 6] = B6;
|
||||
B[ 7] = B7;
|
||||
B[ 8] = B8;
|
||||
B[ 9] = B9;
|
||||
B[10] = BA;
|
||||
B[11] = BB;
|
||||
B[12] = BC;
|
||||
B[13] = BD;
|
||||
B[14] = BE;
|
||||
B[15] = BF;
|
||||
|
||||
// output
|
||||
uint64_t hashPosition = thread;
|
||||
uint32_t *Hash = &g_hash[hashPosition << 4];
|
||||
*(uint2x4*)&Hash[0] = *(uint2x4*)&B[0];
|
||||
*(uint2x4*)&Hash[8] = *(uint2x4*)&B[8];
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_shabal512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = TPB_SHABAL;
|
||||
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
x16_shabal512_gpu_hash_80 <<<grid, block >>>(threads, startNonce, d_hash);
|
||||
}
|
1836
x16r/cuda_x16_simd512_80.cu
Normal file
1836
x16r/cuda_x16_simd512_80.cu
Normal file
File diff suppressed because it is too large
Load Diff
75
x16r/cuda_x16r.h
Normal file
75
x16r/cuda_x16r.h
Normal file
@ -0,0 +1,75 @@
|
||||
#include "x11/cuda_x11.h"
|
||||
|
||||
extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern void x13_fugue512_cpu_free(int thr_id);
|
||||
|
||||
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int flag);
|
||||
extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern void x15_whirlpool_cpu_free(int thr_id);
|
||||
|
||||
extern void x17_sha512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
extern void x17_haval256_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, const int outlen);
|
||||
|
||||
void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
|
||||
|
||||
// ---- 80 bytes kernels
|
||||
|
||||
void quark_bmw512_cpu_setBlock_80(void *pdata);
|
||||
void quark_bmw512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, int order);
|
||||
|
||||
void groestl512_setBlock_80(int thr_id, uint32_t *endiandata);
|
||||
void groestl512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void skein512_cpu_setBlock_80(void *pdata);
|
||||
void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, int swap);
|
||||
|
||||
void qubit_luffa512_cpu_init(int thr_id, uint32_t threads);
|
||||
void qubit_luffa512_cpu_setBlock_80(void *pdata);
|
||||
void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, int order);
|
||||
|
||||
void jh512_setBlock_80(int thr_id, uint32_t *endiandata);
|
||||
void jh512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void keccak512_setBlock_80(int thr_id, uint32_t *endiandata);
|
||||
void keccak512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void cubehash512_setBlock_80(int thr_id, uint32_t* endiandata);
|
||||
void cubehash512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x11_shavite512_setBlock_80(void *pdata);
|
||||
void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, int order);
|
||||
|
||||
void x16_shabal512_setBlock_80(void *pdata);
|
||||
void x16_shabal512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_simd512_setBlock_80(void *pdata);
|
||||
void x16_simd512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_echo512_cuda_init(int thr_id, const uint32_t threads);
|
||||
void x16_echo512_setBlock_80(void *pdata);
|
||||
void x16_echo512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_hamsi512_setBlock_80(void *pdata);
|
||||
void x16_hamsi512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||
void x16_fugue512_cpu_free(int thr_id);
|
||||
void x16_fugue512_setBlock_80(void *pdata);
|
||||
void x16_fugue512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_whirlpool512_init(int thr_id, uint32_t threads);
|
||||
void x16_whirlpool512_setBlock_80(void* endiandata);
|
||||
void x16_whirlpool512_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
||||
|
||||
void x16_sha512_setBlock_80(void *pdata);
|
||||
void x16_sha512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNonce, uint32_t *d_hash);
|
625
x16r/x16r.cu
Normal file
625
x16r/x16r.cu
Normal file
@ -0,0 +1,625 @@
|
||||
/**
|
||||
* X16R algorithm (X16 with Randomized chain order)
|
||||
*
|
||||
* tpruvot 2018 - GPL code
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
|
||||
extern "C" {
|
||||
#include "sph/sph_blake.h"
|
||||
#include "sph/sph_bmw.h"
|
||||
#include "sph/sph_groestl.h"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_keccak.h"
|
||||
|
||||
#include "sph/sph_luffa.h"
|
||||
#include "sph/sph_cubehash.h"
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
|
||||
#include "sph/sph_hamsi.h"
|
||||
#include "sph/sph_fugue.h"
|
||||
#include "sph/sph_shabal.h"
|
||||
#include "sph/sph_whirlpool.h"
|
||||
#include "sph/sph_sha2.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
#include "cuda_x16r.h"
|
||||
|
||||
static uint32_t *d_hash[MAX_GPUS];
|
||||
|
||||
enum Algo {
|
||||
BLAKE = 0,
|
||||
BMW,
|
||||
GROESTL,
|
||||
JH,
|
||||
KECCAK,
|
||||
SKEIN,
|
||||
LUFFA,
|
||||
CUBEHASH,
|
||||
SHAVITE,
|
||||
SIMD,
|
||||
ECHO,
|
||||
HAMSI,
|
||||
FUGUE,
|
||||
SHABAL,
|
||||
WHIRLPOOL,
|
||||
SHA512,
|
||||
HASH_FUNC_COUNT
|
||||
};
|
||||
|
||||
static const char* algo_strings[] = {
|
||||
"blake",
|
||||
"bmw512",
|
||||
"groestl",
|
||||
"jh512",
|
||||
"keccak",
|
||||
"skein",
|
||||
"luffa",
|
||||
"cube",
|
||||
"shavite",
|
||||
"simd",
|
||||
"echo",
|
||||
"hamsi",
|
||||
"fugue",
|
||||
"shabal",
|
||||
"whirlpool",
|
||||
"sha512",
|
||||
NULL
|
||||
};
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
static __thread bool s_implemented = false;
|
||||
static __thread char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
|
||||
|
||||
static void getAlgoString(const uint32_t* prevblock, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
uint8_t* data = (uint8_t*)prevblock;
|
||||
|
||||
for (uint8_t j = 0; j < HASH_FUNC_COUNT; j++) {
|
||||
uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;
|
||||
if (algoDigit >= 10)
|
||||
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
||||
else
|
||||
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
||||
sptr++;
|
||||
}
|
||||
*sptr = '\0';
|
||||
}
|
||||
|
||||
// X16R CPU Hash (Validation)
|
||||
extern "C" void x16r_hash(void *output, const void *input)
|
||||
{
|
||||
unsigned char _ALIGN(64) hash[128];
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
sph_bmw512_context ctx_bmw;
|
||||
sph_groestl512_context ctx_groestl;
|
||||
sph_jh512_context ctx_jh;
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_skein512_context ctx_skein;
|
||||
sph_luffa512_context ctx_luffa;
|
||||
sph_cubehash512_context ctx_cubehash;
|
||||
sph_shavite512_context ctx_shavite;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_echo512_context ctx_echo;
|
||||
sph_hamsi512_context ctx_hamsi;
|
||||
sph_fugue512_context ctx_fugue;
|
||||
sph_shabal512_context ctx_shabal;
|
||||
sph_whirlpool_context ctx_whirlpool;
|
||||
sph_sha512_context ctx_sha512;
|
||||
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
|
||||
uint32_t *in32 = (uint32_t*) input;
|
||||
getAlgoString(&in32[1], hashOrder);
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch (algo) {
|
||||
case BLAKE:
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, in, size);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
break;
|
||||
case BMW:
|
||||
sph_bmw512_init(&ctx_bmw);
|
||||
sph_bmw512(&ctx_bmw, in, size);
|
||||
sph_bmw512_close(&ctx_bmw, hash);
|
||||
break;
|
||||
case GROESTL:
|
||||
sph_groestl512_init(&ctx_groestl);
|
||||
sph_groestl512(&ctx_groestl, in, size);
|
||||
sph_groestl512_close(&ctx_groestl, hash);
|
||||
break;
|
||||
case SKEIN:
|
||||
sph_skein512_init(&ctx_skein);
|
||||
sph_skein512(&ctx_skein, in, size);
|
||||
sph_skein512_close(&ctx_skein, hash);
|
||||
break;
|
||||
case JH:
|
||||
sph_jh512_init(&ctx_jh);
|
||||
sph_jh512(&ctx_jh, in, size);
|
||||
sph_jh512_close(&ctx_jh, hash);
|
||||
break;
|
||||
case KECCAK:
|
||||
sph_keccak512_init(&ctx_keccak);
|
||||
sph_keccak512(&ctx_keccak, in, size);
|
||||
sph_keccak512_close(&ctx_keccak, hash);
|
||||
break;
|
||||
case LUFFA:
|
||||
sph_luffa512_init(&ctx_luffa);
|
||||
sph_luffa512(&ctx_luffa, in, size);
|
||||
sph_luffa512_close(&ctx_luffa, hash);
|
||||
break;
|
||||
case CUBEHASH:
|
||||
sph_cubehash512_init(&ctx_cubehash);
|
||||
sph_cubehash512(&ctx_cubehash, in, size);
|
||||
sph_cubehash512_close(&ctx_cubehash, hash);
|
||||
break;
|
||||
case SHAVITE:
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, in, size);
|
||||
sph_shavite512_close(&ctx_shavite, hash);
|
||||
break;
|
||||
case SIMD:
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512(&ctx_simd, in, size);
|
||||
sph_simd512_close(&ctx_simd, hash);
|
||||
break;
|
||||
case ECHO:
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512(&ctx_echo, in, size);
|
||||
sph_echo512_close(&ctx_echo, hash);
|
||||
break;
|
||||
case HAMSI:
|
||||
sph_hamsi512_init(&ctx_hamsi);
|
||||
sph_hamsi512(&ctx_hamsi, in, size);
|
||||
sph_hamsi512_close(&ctx_hamsi, hash);
|
||||
break;
|
||||
case FUGUE:
|
||||
sph_fugue512_init(&ctx_fugue);
|
||||
sph_fugue512(&ctx_fugue, in, size);
|
||||
sph_fugue512_close(&ctx_fugue, hash);
|
||||
break;
|
||||
case SHABAL:
|
||||
sph_shabal512_init(&ctx_shabal);
|
||||
sph_shabal512(&ctx_shabal, in, size);
|
||||
sph_shabal512_close(&ctx_shabal, hash);
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
sph_whirlpool_init(&ctx_whirlpool);
|
||||
sph_whirlpool(&ctx_whirlpool, in, size);
|
||||
sph_whirlpool_close(&ctx_whirlpool, hash);
|
||||
break;
|
||||
case SHA512:
|
||||
sph_sha512_init(&ctx_sha512);
|
||||
sph_sha512(&ctx_sha512,(const void*) in, size);
|
||||
sph_sha512_close(&ctx_sha512,(void*) hash);
|
||||
break;
|
||||
}
|
||||
in = (void*) hash;
|
||||
size = 64;
|
||||
}
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
void whirlpool_midstate(void *state, const void *input)
|
||||
{
|
||||
sph_whirlpool_context ctx;
|
||||
|
||||
sph_whirlpool_init(&ctx);
|
||||
sph_whirlpool(&ctx, input, 64);
|
||||
|
||||
memcpy(state, ctx.state, 64);
|
||||
}
|
||||
|
||||
static bool init[MAX_GPUS] = { 0 };
|
||||
|
||||
//#define _DEBUG
|
||||
#define _DEBUG_PREFIX "x16r-"
|
||||
#include "cuda_debug.cuh"
|
||||
|
||||
//static int algo80_tests[HASH_FUNC_COUNT] = { 0 };
|
||||
//static int algo64_tests[HASH_FUNC_COUNT] = { 0 };
|
||||
static int algo80_fails[HASH_FUNC_COUNT] = { 0 };
|
||||
|
||||
extern "C" int scanhash_x16r(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int dev_id = device_map[thr_id];
|
||||
int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 20 : 19;
|
||||
if (strstr(device_name[dev_id], "GTX 1080")) intensity = 20;
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
||||
cudaDeviceReset();
|
||||
// reduce cpu usage
|
||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||
}
|
||||
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
|
||||
|
||||
quark_blake512_cpu_init(thr_id, throughput);
|
||||
quark_bmw512_cpu_init(thr_id, throughput);
|
||||
quark_groestl512_cpu_init(thr_id, throughput);
|
||||
quark_skein512_cpu_init(thr_id, throughput);
|
||||
quark_jh512_cpu_init(thr_id, throughput);
|
||||
quark_keccak512_cpu_init(thr_id, throughput);
|
||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||
x11_luffa512_cpu_init(thr_id, throughput); // 64
|
||||
x11_shavite512_cpu_init(thr_id, throughput);
|
||||
x11_simd512_cpu_init(thr_id, throughput); // 64
|
||||
x11_echo512_cpu_init(thr_id, throughput);
|
||||
x16_echo512_cuda_init(thr_id, throughput);
|
||||
x13_hamsi512_cpu_init(thr_id, throughput);
|
||||
x13_fugue512_cpu_init(thr_id, throughput);
|
||||
x16_fugue512_cpu_init(thr_id, throughput);
|
||||
x14_shabal512_cpu_init(thr_id, throughput);
|
||||
x15_whirlpool_cpu_init(thr_id, throughput, 0);
|
||||
x16_whirlpool512_init(thr_id, throughput);
|
||||
x17_sha512_cpu_init(thr_id, throughput);
|
||||
|
||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
if (opt_benchmark) {
|
||||
((uint32_t*)ptarget)[7] = 0x003f;
|
||||
((uint8_t*)pdata)[8] = 0x90; // hashOrder[0] = '9'; for simd 80 + blake512 64
|
||||
//((uint8_t*)pdata)[8] = 0xA0; // hashOrder[0] = 'A'; for echo 80 + blake512 64
|
||||
//((uint8_t*)pdata)[8] = 0xB0; // hashOrder[0] = 'B'; for hamsi 80 + blake512 64
|
||||
//((uint8_t*)pdata)[8] = 0xC0; // hashOrder[0] = 'C'; for fugue 80 + blake512 64
|
||||
//((uint8_t*)pdata)[8] = 0xE0; // hashOrder[0] = 'E'; for whirlpool 80 + blake512 64
|
||||
}
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
|
||||
for (int k=0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
if (s_ntime != ntime) {
|
||||
getAlgoString(&endiandata[1], hashOrder);
|
||||
s_ntime = ntime;
|
||||
s_implemented = true;
|
||||
if (opt_debug && !thr_id) applog(LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime);
|
||||
}
|
||||
|
||||
if (!s_implemented) {
|
||||
sleep(1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
|
||||
char elem = hashOrder[0];
|
||||
const uint8_t algo80 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch (algo80) {
|
||||
case BLAKE:
|
||||
quark_blake512_cpu_setBlock_80(thr_id, endiandata);
|
||||
break;
|
||||
case BMW:
|
||||
quark_bmw512_cpu_setBlock_80(endiandata);
|
||||
break;
|
||||
case GROESTL:
|
||||
groestl512_setBlock_80(thr_id, endiandata);
|
||||
break;
|
||||
case JH:
|
||||
jh512_setBlock_80(thr_id, endiandata);
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_setBlock_80(thr_id, endiandata);
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_cpu_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case LUFFA:
|
||||
qubit_luffa512_cpu_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehash512_setBlock_80(thr_id, endiandata);
|
||||
break;
|
||||
case SHAVITE:
|
||||
x11_shavite512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case SIMD:
|
||||
x16_simd512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case ECHO:
|
||||
x16_echo512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case HAMSI:
|
||||
x16_hamsi512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case FUGUE:
|
||||
x16_fugue512_setBlock_80((void*)pdata);
|
||||
break;
|
||||
case SHABAL:
|
||||
x16_shabal512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
x16_whirlpool512_setBlock_80((void*)endiandata);
|
||||
break;
|
||||
case SHA512:
|
||||
x16_sha512_setBlock_80(endiandata);
|
||||
break;
|
||||
default: {
|
||||
if (!thr_id)
|
||||
applog(LOG_WARNING, "kernel %s %c unimplemented, order %s", algo_strings[algo80], elem, hashOrder);
|
||||
s_implemented = false;
|
||||
sleep(5);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int warn = 0;
|
||||
|
||||
do {
|
||||
int order = 0;
|
||||
|
||||
// Hash with CUDA
|
||||
|
||||
switch (algo80) {
|
||||
case BLAKE:
|
||||
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("blake80:");
|
||||
break;
|
||||
case BMW:
|
||||
quark_bmw512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
TRACE("bmw80 :");
|
||||
break;
|
||||
case GROESTL:
|
||||
groestl512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("grstl80:");
|
||||
break;
|
||||
case JH:
|
||||
jh512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("jh51280:");
|
||||
break;
|
||||
case KECCAK:
|
||||
keccak512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("kecck80:");
|
||||
break;
|
||||
case SKEIN:
|
||||
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
|
||||
TRACE("skein80:");
|
||||
break;
|
||||
case LUFFA:
|
||||
qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
TRACE("luffa80:");
|
||||
break;
|
||||
case CUBEHASH:
|
||||
cubehash512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("cube 80:");
|
||||
break;
|
||||
case SHAVITE:
|
||||
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
TRACE("shavite:");
|
||||
break;
|
||||
case SIMD:
|
||||
x16_simd512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("simd512:");
|
||||
break;
|
||||
case ECHO:
|
||||
x16_echo512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("echo :");
|
||||
break;
|
||||
case HAMSI:
|
||||
x16_hamsi512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("hamsi :");
|
||||
break;
|
||||
case FUGUE:
|
||||
x16_fugue512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("fugue :");
|
||||
break;
|
||||
case SHABAL:
|
||||
x16_shabal512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("shabal :");
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
x16_whirlpool512_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("whirl :");
|
||||
break;
|
||||
case SHA512:
|
||||
x16_sha512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("sha512 :");
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 1; i < 16; i++)
|
||||
{
|
||||
const char elem = hashOrder[i];
|
||||
const uint8_t algo64 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch (algo64) {
|
||||
case BLAKE:
|
||||
quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("blake :");
|
||||
break;
|
||||
case BMW:
|
||||
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("bmw :");
|
||||
break;
|
||||
case GROESTL:
|
||||
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("groestl:");
|
||||
break;
|
||||
case JH:
|
||||
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("jh512 :");
|
||||
break;
|
||||
case KECCAK:
|
||||
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("keccak :");
|
||||
break;
|
||||
case SKEIN:
|
||||
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("skein :");
|
||||
break;
|
||||
case LUFFA:
|
||||
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("luffa :");
|
||||
break;
|
||||
case CUBEHASH:
|
||||
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("cube :");
|
||||
break;
|
||||
case SHAVITE:
|
||||
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("shavite:");
|
||||
break;
|
||||
case SIMD:
|
||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("simd :");
|
||||
break;
|
||||
case ECHO:
|
||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("echo :");
|
||||
break;
|
||||
case HAMSI:
|
||||
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("hamsi :");
|
||||
break;
|
||||
case FUGUE:
|
||||
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("fugue :");
|
||||
break;
|
||||
case SHABAL:
|
||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("shabal :");
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("shabal :");
|
||||
break;
|
||||
case SHA512:
|
||||
x17_sha512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
||||
TRACE("sha512 :");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||
|
||||
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||
#ifdef _DEBUG
|
||||
uint32_t _ALIGN(64) dhash[8];
|
||||
be32enc(&endiandata[19], pdata[19]);
|
||||
x16r_hash(dhash, endiandata);
|
||||
applog_hash(dhash);
|
||||
return -1;
|
||||
#endif
|
||||
if (work->nonces[0] != UINT32_MAX)
|
||||
{
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
be32enc(&endiandata[19], work->nonces[0]);
|
||||
x16r_hash(vhash, endiandata);
|
||||
|
||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
|
||||
work->valid_nonces = 1;
|
||||
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
||||
work_set_target_ratio(work, vhash);
|
||||
if (work->nonces[1] != 0) {
|
||||
be32enc(&endiandata[19], work->nonces[1]);
|
||||
x16r_hash(vhash, endiandata);
|
||||
bn_set_target_ratio(work, vhash, 1);
|
||||
work->valid_nonces++;
|
||||
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
|
||||
} else {
|
||||
pdata[19] = work->nonces[0] + 1; // cursor
|
||||
}
|
||||
#if 0
|
||||
gpulog(LOG_INFO, thr_id, "hash found with %s 80!", algo_strings[algo80]);
|
||||
|
||||
algo80_tests[algo80] += work->valid_nonces;
|
||||
char oks64[128] = { 0 };
|
||||
char oks80[128] = { 0 };
|
||||
char fails[128] = { 0 };
|
||||
for (int a = 0; a < HASH_FUNC_COUNT; a++) {
|
||||
const char elem = hashOrder[a];
|
||||
const uint8_t algo64 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
if (a > 0) algo64_tests[algo64] += work->valid_nonces;
|
||||
sprintf(&oks64[strlen(oks64)], "|%X:%2d", a, algo64_tests[a] < 100 ? algo64_tests[a] : 99);
|
||||
sprintf(&oks80[strlen(oks80)], "|%X:%2d", a, algo80_tests[a] < 100 ? algo80_tests[a] : 99);
|
||||
sprintf(&fails[strlen(fails)], "|%X:%2d", a, algo80_fails[a] < 100 ? algo80_fails[a] : 99);
|
||||
}
|
||||
applog(LOG_INFO, "K64: %s", oks64);
|
||||
applog(LOG_INFO, "K80: %s", oks80);
|
||||
applog(LOG_ERR, "F80: %s", fails);
|
||||
#endif
|
||||
return work->valid_nonces;
|
||||
}
|
||||
else if (vhash[7] > Htarg) {
|
||||
// x11+ coins could do some random error, but not on retry
|
||||
gpu_increment_reject(thr_id);
|
||||
algo80_fails[algo80]++;
|
||||
if (!warn) {
|
||||
warn++;
|
||||
pdata[19] = work->nonces[0] + 1;
|
||||
continue;
|
||||
} else {
|
||||
if (!opt_quiet) gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU! %s %s",
|
||||
work->nonces[0], algo_strings[algo80], hashOrder);
|
||||
warn = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||
pdata[19] = max_nonce;
|
||||
break;
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
extern "C" void free_x16r(int thr_id)
|
||||
{
|
||||
if (!init[thr_id])
|
||||
return;
|
||||
|
||||
cudaThreadSynchronize();
|
||||
|
||||
cudaFree(d_hash[thr_id]);
|
||||
|
||||
quark_blake512_cpu_free(thr_id);
|
||||
quark_groestl512_cpu_free(thr_id);
|
||||
x11_simd512_cpu_free(thr_id);
|
||||
x13_fugue512_cpu_free(thr_id);
|
||||
x16_fugue512_cpu_free(thr_id); // to merge with x13_fugue512 ?
|
||||
x15_whirlpool_cpu_free(thr_id);
|
||||
|
||||
cuda_check_cpu_free(thr_id);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
init[thr_id] = false;
|
||||
}
|
@ -169,3 +169,80 @@ void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce,
|
||||
|
||||
x17_sha512_gpu_hash_64 <<<grid, block>>> (threads, (uint64_t*)d_hash);
|
||||
}
|
||||
|
||||
__constant__
|
||||
static uint64_t c_PaddedMessage80[10];
|
||||
|
||||
__global__
|
||||
/*__launch_bounds__(256, 4)*/
|
||||
void x16_sha512_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint64_t *g_hash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint64_t W[80];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 9; i ++) {
|
||||
W[i] = SWAP64(c_PaddedMessage80[i]);
|
||||
}
|
||||
const uint32_t nonce = startNonce + thread;
|
||||
//((uint32_t*)W)[19] = cuda_swab32(nonce);
|
||||
W[9] = REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(nonce));
|
||||
W[9] = cuda_swab64(W[9]);
|
||||
W[10] = 0x8000000000000000;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 11; i<15; i++) {
|
||||
W[i] = 0U;
|
||||
}
|
||||
W[15] = 0x0000000000000280;
|
||||
|
||||
#pragma unroll 64
|
||||
for (int i = 16; i < 80; i ++) {
|
||||
W[i] = SSG5_1(W[i-2]) + W[i-7];
|
||||
W[i] += SSG5_0(W[i-15]) + W[i-16];
|
||||
}
|
||||
|
||||
const uint64_t IV512[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
uint64_t r[8];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++) {
|
||||
r[i] = IV512[i];
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 80; i++) {
|
||||
SHA3_STEP(c_WB, r, W, i&7, i);
|
||||
}
|
||||
|
||||
const uint64_t hashPosition = thread;
|
||||
uint64_t *pHash = &g_hash[hashPosition << 3];
|
||||
#pragma unroll
|
||||
for (int u = 0; u < 8; u ++) {
|
||||
pHash[u] = SWAP64(r[u] + IV512[u]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_sha512_cuda_hash_80(int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = 256;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
x16_sha512_gpu_hash_80 <<<grid, block >>> (threads, startNounce, (uint64_t*)d_hash);
|
||||
}
|
||||
|
||||
__host__
|
||||
void x16_sha512_setBlock_80(void *pdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, pdata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user