Browse Source

adding third party X13 and Diamond Groestl code contributions.

2upstream
Christian Buchner 10 years ago
parent
commit
d99b91ea65
  1. 34
      ccminer.vcxproj
  2. 23
      ccminer.vcxproj.filters
  3. 18
      cpu-miner.c
  4. 4
      miner.h
  5. 867
      sph/hamsi.c
  6. 39648
      sph/hamsi_helper.c
  7. 321
      sph/sph_hamsi.h
  8. 8
      x11/cuda_x11_echo.cu
  9. 711
      x13/cuda_x13_fugue512.cu
  10. 764
      x13/cuda_x13_hamsi512.cu
  11. 284
      x13/x13.cu

34
ccminer.vcxproj

@ -245,6 +245,8 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command> @@ -245,6 +245,8 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClCompile Include="sph\shavite.c" />
<ClCompile Include="sph\simd.c" />
<ClCompile Include="sph\skein.c" />
<ClCompile Include="sph\hamsi.c" />
<ClCompile Include="sph\hamsi_helper.c" />
<ClCompile Include="util.c">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
@ -275,7 +277,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command> @@ -275,7 +277,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="sph\sph_bmw.h" />
<ClInclude Include="sph\sph_cubehash.h" />
<ClInclude Include="sph\sph_echo.h" />
<ClInclude Include="sph\sph_fugue.h" />
<ClInclude Include="sph\sph_groestl.h" />
<ClInclude Include="sph\sph_jh.h" />
<ClInclude Include="sph\sph_keccak.h" />
@ -283,16 +284,11 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command> @@ -283,16 +284,11 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="sph\sph_shavite.h" />
<ClInclude Include="sph\sph_simd.h" />
<ClInclude Include="sph\sph_skein.h" />
<ClInclude Include="sph\sph_hamsi.h" />
<ClInclude Include="sph\sph_types.h" />
<ClInclude Include="uint256.h" />
</ItemGroup>
<ItemGroup>
<CudaCompile Include="bitslice_transformations_quad.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</CudaCompile>
<CudaCompile Include="cuda_fugue256.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@ -317,12 +313,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command> @@ -317,12 +313,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="groestl_functions_quad.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</CudaCompile>
<CudaCompile Include="heavy\cuda_blake512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@ -501,6 +491,24 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command> @@ -501,6 +491,24 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_hamsi512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

23
ccminer.vcxproj.filters

@ -55,6 +55,9 @@ @@ -55,6 +55,9 @@
<Filter Include="Source Files\CUDA\x11">
<UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\CUDA\x13">
<UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="compat\jansson\dump.c">
@ -144,6 +147,12 @@ @@ -144,6 +147,12 @@
<ClCompile Include="myriadgroestl.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="sph\hamsi.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
<ClCompile Include="sph\hamsi_helper.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="compat.h">
@ -242,6 +251,9 @@ @@ -242,6 +251,9 @@
<ClInclude Include="cuda_helper.h">
<Filter>Header Files\CUDA</Filter>
</ClInclude>
<ClInclude Include="sph\sph_hamsi.h">
<Filter>Header Files\sph</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<CudaCompile Include="cuda_fugue256.cu">
@ -340,11 +352,14 @@ @@ -340,11 +352,14 @@
<CudaCompile Include="x11\simd_functions.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
<CudaCompile Include="bitslice_transformations_quad.cu">
<Filter>Source Files\CUDA</Filter>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
<Filter>Source Files\CUDA\x13</Filter>
</CudaCompile>
<CudaCompile Include="groestl_functions_quad.cu">
<Filter>Source Files\CUDA</Filter>
<CudaCompile Include="x13\cuda_x13_hamsi512.cu">
<Filter>Source Files\CUDA\x13</Filter>
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
<Filter>Source Files\CUDA\x13</Filter>
</CudaCompile>
</ItemGroup>
</Project>

18
cpu-miner.c

@ -131,7 +131,9 @@ typedef enum { @@ -131,7 +131,9 @@ typedef enum {
ALGO_QUARK,
ALGO_ANIME,
ALGO_NIST5,
ALGO_X11
ALGO_X11,
ALGO_X13,
ALGO_DMD_GR,
} sha256_algos;
static const char *algo_names[] = {
@ -144,7 +146,9 @@ static const char *algo_names[] = { @@ -144,7 +146,9 @@ static const char *algo_names[] = {
"quark",
"anime",
"nist5",
"x11"
"x11",
"x13",
"dmd-gr",
};
bool opt_debug = false;
@ -217,6 +221,8 @@ Options:\n\ @@ -217,6 +221,8 @@ Options:\n\
anime Animecoin hash\n\
nist5 NIST5 (TalkCoin) hash\n\
x11 X11 (DarkCoin) hash\n\
x13 X13 (MaruCoin) hash\n\
dmd-gr Diamond-Groestl hash\n\
-d, --devices takes a comma separated list of CUDA devices to use.\n\
Device IDs start counting from 0! Alternatively takes\n\
string names of your cards like gtx780ti or gt640#2\n\
@ -766,7 +772,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) @@ -766,7 +772,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
if (opt_algo == ALGO_JACKPOT)
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR)
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
else
diff_to_target(work->target, sctx->job.diff / opt_difficulty);
@ -878,6 +884,7 @@ static void *miner_thread(void *userdata) @@ -878,6 +884,7 @@ static void *miner_thread(void *userdata)
break;
case ALGO_GROESTL:
case ALGO_DMD_GR:
rc = scanhash_groestlcoin(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
@ -912,6 +919,11 @@ static void *miner_thread(void *userdata) @@ -912,6 +919,11 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done);
break;
case ALGO_X13:
rc = scanhash_x13(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default:
/* should never happen */
goto out;

4
miner.h

@ -239,6 +239,10 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata, @@ -239,6 +239,10 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);

867
sph/hamsi.c

@ -0,0 +1,867 @@ @@ -0,0 +1,867 @@
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
/*
* Hamsi implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_hamsi.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_SMALL_FOOTPRINT_HAMSI 1
#endif
/*
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
* table lookup during message expansion (1 to 8, inclusive). If we note
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
* then we will get t tables (where t=ceil(w/n)) of individual size
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
* n=5, there are 7 tables, but the last one uses only two bits on
* input, not five).
*
* Also, we read t rows of r words from RAM. Words in a given row are
* concatenated in RAM in that order, so most of the cost is about
* reading the first row word; comparatively, cache misses are thus
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
*
* When n=1, tables are "special" in that we omit the first entry of
* each table (which always contains 0), so that total table size is
* halved.
*
* We thus have the following (size1 is the cumulative table size of
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
*
* n size1 size2 t1 t2
* ---------------------------------------
* 1 1024 4096 32 64
* 2 2048 8192 16 32
* 3 2688 10880 11 22
* 4 4096 16384 8 16
* 5 6272 25600 7 13
* 6 10368 41984 6 11
* 7 16896 73856 5 10
* 8 32768 131072 4 8
*
* So there is a trade-off: a lower n makes the tables fit better in
* L1 cache, but increases the number of memory accesses. The optimal
* value depends on the amount of available L1 cache and the relative
* impact of a cache miss.
*
* Experimentally, in ideal benchmark conditions (which are not necessarily
* realistic with regards to L1 cache contention), it seems that n=8 is
* the best value on "big" architectures (those with 32 kB or more of L1
* cache), while n=4 is better on "small" architectures. This was tested
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
* (8 kB L1 cache).
*
* Note: with n=1, the 32 tables (actually implemented as one big table)
* are read entirely and sequentially, regardless of the input data,
* thus avoiding any data-dependent table access pattern.
*/
#if !defined SPH_HAMSI_EXPAND_SMALL
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#endif
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 8
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
#include "hamsi_helper.c"
static const sph_u32 IV224[] = {
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
/*
* This version is the one used in the Hamsi submission package for
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
* shall soon be corrected in the official Hamsi specification.
*
static const sph_u32 IV224[] = {
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
*/
static const sph_u32 IV256[] = {
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
};
static const sph_u32 IV384[] = {
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
SPH_C32(0x2c204b61)
};
static const sph_u32 IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define DECL_STATE_SMALL \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
#define READ_STATE_SMALL(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
} while (0)
#define WRITE_STATE_SMALL(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
} while (0)
#define s0 m0
#define s1 m1
#define s2 c0
#define s3 c1
#define s4 c2
#define s5 c3
#define s6 m2
#define s7 m3
#define s8 m4
#define s9 m5
#define sA c4
#define sB c5
#define sC c6
#define sD c7
#define sE m6
#define sF m7
#define SBOX(a, b, c, d) do { \
sph_u32 t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
} while (0)
#define L(a, b, c, d) do { \
(a) = SPH_ROTL32(a, 13); \
(c) = SPH_ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = SPH_ROTL32(b, 1); \
(d) = SPH_ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = SPH_ROTL32(a, 5); \
(c) = SPH_ROTL32(c, 22); \
} while (0)
#define ROUND_SMALL(rc, alpha) do { \
s0 ^= alpha[0x00]; \
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
s2 ^= alpha[0x02]; \
s3 ^= alpha[0x03]; \
s4 ^= alpha[0x08]; \
s5 ^= alpha[0x09]; \
s6 ^= alpha[0x0A]; \
s7 ^= alpha[0x0B]; \
s8 ^= alpha[0x10]; \
s9 ^= alpha[0x11]; \
sA ^= alpha[0x12]; \
sB ^= alpha[0x13]; \
sC ^= alpha[0x18]; \
sD ^= alpha[0x19]; \
sE ^= alpha[0x1A]; \
sF ^= alpha[0x1B]; \
SBOX(s0, s4, s8, sC); \
SBOX(s1, s5, s9, sD); \
SBOX(s2, s6, sA, sE); \
SBOX(s3, s7, sB, sF); \
L(s0, s5, sA, sF); \
L(s1, s6, sB, sC); \
L(s2, s7, s8, sD); \
L(s3, s4, s9, sE); \
} while (0)
#define P_SMALL do { \
ROUND_SMALL(0, alpha_n); \
ROUND_SMALL(1, alpha_n); \
ROUND_SMALL(2, alpha_n); \
} while (0)
#define PF_SMALL do { \
ROUND_SMALL(0, alpha_f); \
ROUND_SMALL(1, alpha_f); \
ROUND_SMALL(2, alpha_f); \
ROUND_SMALL(3, alpha_f); \
ROUND_SMALL(4, alpha_f); \
ROUND_SMALL(5, alpha_f); \
} while (0)
#define T_SMALL do { \
/* order is important */ \
c7 = (sc->h[7] ^= sB); \
c6 = (sc->h[6] ^= sA); \
c5 = (sc->h[5] ^= s9); \
c4 = (sc->h[4] ^= s8); \
c3 = (sc->h[3] ^= s3); \
c2 = (sc->h[2] ^= s2); \
c1 = (sc->h[1] ^= s1); \
c0 = (sc->h[0] ^= s0); \
} while (0)
static void
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_SMALL
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 5;
#else
tmp = SPH_T32((sph_u32)num << 5);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 14);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_SMALL(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
INPUT_SMALL;
P_SMALL;
T_SMALL;
buf += 4;
}
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
DECL_STATE_SMALL
READ_STATE_SMALL(sc);
INPUT_SMALL;
PF_SMALL;
T_SMALL;
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 4 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_small(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_small(sc, data, (len >> 2));
data = (const unsigned char *)data + (len & ~(size_t)3);
len &= (size_t)3;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_small_close(sph_hamsi_small_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[12];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
memcpy(pad, sc->partial, ptr);
#if SPH_64
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad + 4, sc->count_high);
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 4)
pad[ptr ++] = 0;
hamsi_small(sc, pad, 2);
hamsi_small_final(sc, pad + 8);
out = dst;
for (u = 0; u < out_size_w32; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
#define DECL_STATE_BIG \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
#define READ_STATE_BIG(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
c8 = sc->h[0x8]; \
c9 = sc->h[0x9]; \
cA = sc->h[0xA]; \
cB = sc->h[0xB]; \
cC = sc->h[0xC]; \
cD = sc->h[0xD]; \
cE = sc->h[0xE]; \
cF = sc->h[0xF]; \
} while (0)
#define WRITE_STATE_BIG(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
sc->h[0x8] = c8; \
sc->h[0x9] = c9; \
sc->h[0xA] = cA; \
sc->h[0xB] = cB; \
sc->h[0xC] = cC; \
sc->h[0xD] = cD; \
sc->h[0xE] = cE; \
sc->h[0xF] = cF; \
} while (0)
#define s00 m0
#define s01 m1
#define s02 c0
#define s03 c1
#define s04 m2
#define s05 m3
#define s06 c2
#define s07 c3
#define s08 c4
#define s09 c5
#define s0A m4
#define s0B m5
#define s0C c6
#define s0D c7
#define s0E m6
#define s0F m7
#define s10 m8
#define s11 m9
#define s12 c8
#define s13 c9
#define s14 mA
#define s15 mB
#define s16 cA
#define s17 cB
#define s18 cC
#define s19 cD
#define s1A mC
#define s1B mD
#define s1C cE
#define s1D cF
#define s1E mE
#define s1F mF
#define ROUND_BIG(rc, alpha) do { \
s00 ^= alpha[0x00]; \
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
s02 ^= alpha[0x02]; \
s03 ^= alpha[0x03]; \
s04 ^= alpha[0x04]; \
s05 ^= alpha[0x05]; \
s06 ^= alpha[0x06]; \
s07 ^= alpha[0x07]; \
s08 ^= alpha[0x08]; \
s09 ^= alpha[0x09]; \
s0A ^= alpha[0x0A]; \
s0B ^= alpha[0x0B]; \
s0C ^= alpha[0x0C]; \
s0D ^= alpha[0x0D]; \
s0E ^= alpha[0x0E]; \
s0F ^= alpha[0x0F]; \
s10 ^= alpha[0x10]; \
s11 ^= alpha[0x11]; \
s12 ^= alpha[0x12]; \
s13 ^= alpha[0x13]; \
s14 ^= alpha[0x14]; \
s15 ^= alpha[0x15]; \
s16 ^= alpha[0x16]; \
s17 ^= alpha[0x17]; \
s18 ^= alpha[0x18]; \
s19 ^= alpha[0x19]; \
s1A ^= alpha[0x1A]; \
s1B ^= alpha[0x1B]; \
s1C ^= alpha[0x1C]; \
s1D ^= alpha[0x1D]; \
s1E ^= alpha[0x1E]; \
s1F ^= alpha[0x1F]; \
SBOX(s00, s08, s10, s18); \
SBOX(s01, s09, s11, s19); \
SBOX(s02, s0A, s12, s1A); \
SBOX(s03, s0B, s13, s1B); \
SBOX(s04, s0C, s14, s1C); \
SBOX(s05, s0D, s15, s1D); \
SBOX(s06, s0E, s16, s1E); \
SBOX(s07, s0F, s17, s1F); \
L(s00, s09, s12, s1B); \
L(s01, s0A, s13, s1C); \
L(s02, s0B, s14, s1D); \
L(s03, s0C, s15, s1E); \
L(s04, s0D, s16, s1F); \
L(s05, s0E, s17, s18); \
L(s06, s0F, s10, s19); \
L(s07, s08, s11, s1A); \
L(s00, s02, s05, s07); \
L(s10, s13, s15, s16); \
L(s09, s0B, s0C, s0E); \
L(s19, s1A, s1C, s1F); \
} while (0)
#if SPH_SMALL_FOOTPRINT_HAMSI
#define P_BIG do { \
unsigned r; \
for (r = 0; r < 6; r ++) \
ROUND_BIG(r, alpha_n); \
} while (0)
#define PF_BIG do { \
unsigned r; \
for (r = 0; r < 12; r ++) \
ROUND_BIG(r, alpha_f); \
} while (0)
#else
#define P_BIG do { \
ROUND_BIG(0, alpha_n); \
ROUND_BIG(1, alpha_n); \
ROUND_BIG(2, alpha_n); \
ROUND_BIG(3, alpha_n); \
ROUND_BIG(4, alpha_n); \
ROUND_BIG(5, alpha_n); \
} while (0)
#define PF_BIG do { \
ROUND_BIG(0, alpha_f); \
ROUND_BIG(1, alpha_f); \
ROUND_BIG(2, alpha_f); \
ROUND_BIG(3, alpha_f); \
ROUND_BIG(4, alpha_f); \
ROUND_BIG(5, alpha_f); \
ROUND_BIG(6, alpha_f); \
ROUND_BIG(7, alpha_f); \
ROUND_BIG(8, alpha_f); \
ROUND_BIG(9, alpha_f); \
ROUND_BIG(10, alpha_f); \
ROUND_BIG(11, alpha_f); \
} while (0)
#endif
#define T_BIG do { \
/* order is important */ \
cF = (sc->h[0xF] ^= s17); \
cE = (sc->h[0xE] ^= s16); \
cD = (sc->h[0xD] ^= s15); \
cC = (sc->h[0xC] ^= s14); \
cB = (sc->h[0xB] ^= s13); \
cA = (sc->h[0xA] ^= s12); \
c9 = (sc->h[0x9] ^= s11); \
c8 = (sc->h[0x8] ^= s10); \
c7 = (sc->h[0x7] ^= s07); \
c6 = (sc->h[0x6] ^= s06); \
c5 = (sc->h[0x5] ^= s05); \
c4 = (sc->h[0x4] ^= s04); \
c3 = (sc->h[0x3] ^= s03); \
c2 = (sc->h[0x2] ^= s02); \
c1 = (sc->h[0x1] ^= s01); \
c0 = (sc->h[0x0] ^= s00); \
} while (0)
static void
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_BIG
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 6;
#else
tmp = SPH_T32((sph_u32)num << 6);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 13);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_BIG(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
INPUT_BIG;
P_BIG;
T_BIG;
buf += 8;
}
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
DECL_STATE_BIG
READ_STATE_BIG(sc);
INPUT_BIG;
PF_BIG;
T_BIG;
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 8 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_big(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_big(sc, data, (len >> 3));
data = (const unsigned char *)data + (len & ~(size_t)7);
len &= (size_t)7;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_big_close(sph_hamsi_big_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[8];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
#if SPH_64
sph_enc64be(pad, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad, sc->count_high);
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 8)
sc->partial[ptr ++] = 0;
hamsi_big(sc, sc->partial, 1);
hamsi_big_final(sc, pad);
out = dst;
if (out_size_w32 == 12) {
sph_enc32be(out + 0, sc->h[ 0]);
sph_enc32be(out + 4, sc->h[ 1]);
sph_enc32be(out + 8, sc->h[ 3]);
sph_enc32be(out + 12, sc->h[ 4]);
sph_enc32be(out + 16, sc->h[ 5]);
sph_enc32be(out + 20, sc->h[ 6]);
sph_enc32be(out + 24, sc->h[ 8]);
sph_enc32be(out + 28, sc->h[ 9]);
sph_enc32be(out + 32, sc->h[10]);
sph_enc32be(out + 36, sc->h[12]);
sph_enc32be(out + 40, sc->h[13]);
sph_enc32be(out + 44, sc->h[15]);
} else {
for (u = 0; u < 16; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
}
/* see sph_hamsi.h */
void
sph_hamsi224_init(void *cc)
{
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi224_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi256_init(void *cc)
{
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi256_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi384_init(void *cc)
{
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi384_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi512_init(void *cc)
{
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi512_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 16);
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 16);
hamsi_big_init(cc, IV512);
}
#ifdef __cplusplus
}
#endif

39648
sph/hamsi_helper.c

File diff suppressed because it is too large Load Diff

321
sph/sph_hamsi.h

@ -0,0 +1,321 @@ @@ -0,0 +1,321 @@
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Hamsi interface. This code implements Hamsi with the recommended
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_hamsi.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_HAMSI_H__
#define SPH_HAMSI_H__
#include <stddef.h>
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
/**
* Output size (in bits) for Hamsi-224.
*/
#define SPH_SIZE_hamsi224 224
/**
* Output size (in bits) for Hamsi-256.
*/
#define SPH_SIZE_hamsi256 256
/**
* Output size (in bits) for Hamsi-384.
*/
#define SPH_SIZE_hamsi384 384
/**
* Output size (in bits) for Hamsi-512.
*/
#define SPH_SIZE_hamsi512 512
/**
* This structure is a context for Hamsi-224 and Hamsi-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[4];
size_t partial_len;
sph_u32 h[8];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_small_context;
/**
* This structure is a context for Hamsi-224 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi224_context;
/**
* This structure is a context for Hamsi-256 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi256_context;
/**
* This structure is a context for Hamsi-384 and Hamsi-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[8];
size_t partial_len;
sph_u32 h[16];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_big_context;
/**
* This structure is a context for Hamsi-384 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi384_context;
/**
* This structure is a context for Hamsi-512 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi512_context;
/**
* Initialize a Hamsi-224 context. This process performs no memory allocation.
*
* @param cc the Hamsi-224 context (pointer to a
* <code>sph_hamsi224_context</code>)
*/
void sph_hamsi224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi224(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-224 context
* @param dst the destination buffer
*/
void sph_hamsi224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-256 context. This process performs no memory allocation.
*
* @param cc the Hamsi-256 context (pointer to a
* <code>sph_hamsi256_context</code>)
*/
void sph_hamsi256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi256(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-256 context
* @param dst the destination buffer
*/
void sph_hamsi256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-384 context. This process performs no memory allocation.
*
* @param cc the Hamsi-384 context (pointer to a
* <code>sph_hamsi384_context</code>)
*/
void sph_hamsi384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi384(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-384 context
* @param dst the destination buffer
*/
void sph_hamsi384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-512 context. This process performs no memory allocation.
*
* @param cc the Hamsi-512 context (pointer to a
* <code>sph_hamsi512_context</code>)
*/
void sph_hamsi512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi512(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-512 context
* @param dst the destination buffer
*/
void sph_hamsi512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

8
x11/cuda_x11_echo.cu

@ -198,7 +198,11 @@ __global__ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint6 @@ -198,7 +198,11 @@ __global__ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint6
for(int i=0;i<16;i++)
W[i] ^= Hash[i];
W[8] ^= 0x10;
// tsiv: I feel iffy about removing this, but it seems to break the full hash
// fortunately for X11 the flipped bit lands outside the first 32 bytes used as the final X11 hash
// try chaining more algos after echo (X13) and boom
//W[8] ^= 0x10;
W[27] ^= 0x02000000;
W[28] ^= k0;
@ -225,6 +229,8 @@ __host__ void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNou @@ -225,6 +229,8 @@ __host__ void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x11_echo512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

711
x13/cuda_x13_fugue512.cu

@ -0,0 +1,711 @@ @@ -0,0 +1,711 @@
/*
* Quick and dirty addition of Fugue-512 for X13
*
* Built on cbuchner1's implementation, actual hashing code
* heavily based on phm's sgminer
*
*/
/*
* X13 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 phm
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#include <stdint.h>
#define SPH_C64(x) ((uint64_t)(x ## ULL))
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
#if __CUDA_ARCH__ < 350
// Kepler (Compute 3.0)
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#else
// Kepler (Compute 3.5)
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
#endif
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
#define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
#define mixtab3(x) (*((uint32_t*)mixtabs + (768+(x))))
texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
static const uint32_t mixtab0_cpu[] = {
SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7),
SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0),
SPH_C32(0x01010704), SPH_C32(0x67672e87), SPH_C32(0x2b2bd1ac),
SPH_C32(0xfefeccd5), SPH_C32(0xd7d71371), SPH_C32(0xabab7c9a),
SPH_C32(0x767659c3), SPH_C32(0xcaca4005), SPH_C32(0x8282a33e),
SPH_C32(0xc9c94909), SPH_C32(0x7d7d68ef), SPH_C32(0xfafad0c5),
SPH_C32(0x5959947f), SPH_C32(0x4747ce07), SPH_C32(0xf0f0e6ed),
SPH_C32(0xadad6e82), SPH_C32(0xd4d41a7d), SPH_C32(0xa2a243be),
SPH_C32(0xafaf608a), SPH_C32(0x9c9cf946), SPH_C32(0xa4a451a6),
SPH_C32(0x727245d3), SPH_C32(0xc0c0762d), SPH_C32(0xb7b728ea),
SPH_C32(0xfdfdc5d9), SPH_C32(0x9393d47a), SPH_C32(0x2626f298),
SPH_C32(0x363682d8), SPH_C32(0x3f3fbdfc), SPH_C32(0xf7f7f3f1),
SPH_C32(0xcccc521d), SPH_C32(0x34348cd0), SPH_C32(0xa5a556a2),
SPH_C32(0xe5e58db9), SPH_C32(0xf1f1e1e9), SPH_C32(0x71714cdf),
SPH_C32(0xd8d83e4d), SPH_C32(0x313197c4), SPH_C32(0x15156b54),
SPH_C32(0x04041c10), SPH_C32(0xc7c76331), SPH_C32(0x2323e98c),
SPH_C32(0xc3c37f21), SPH_C32(0x18184860), SPH_C32(0x9696cf6e),
SPH_C32(0x05051b14), SPH_C32(0x9a9aeb5e), SPH_C32(0x0707151c),
SPH_C32(0x12127e48), SPH_C32(0x8080ad36), SPH_C32(0xe2e298a5),
SPH_C32(0xebeba781), SPH_C32(0x2727f59c), SPH_C32(0xb2b233fe),
SPH_C32(0x757550cf), SPH_C32(0x09093f24), SPH_C32(0x8383a43a),
SPH_C32(0x2c2cc4b0), SPH_C32(0x1a1a4668), SPH_C32(0x1b1b416c),
SPH_C32(0x6e6e11a3), SPH_C32(0x5a5a9d73), SPH_C32(0xa0a04db6),
SPH_C32(0x5252a553), SPH_C32(0x3b3ba1ec), SPH_C32(0xd6d61475),
SPH_C32(0xb3b334fa), SPH_C32(0x2929dfa4), SPH_C32(0xe3e39fa1),
SPH_C32(0x2f2fcdbc), SPH_C32(0x8484b126), SPH_C32(0x5353a257),
SPH_C32(0xd1d10169), SPH_C32(0x00000000), SPH_C32(0xededb599),
SPH_C32(0x2020e080), SPH_C32(0xfcfcc2dd), SPH_C32(0xb1b13af2),
SPH_C32(0x5b5b9a77), SPH_C32(0x6a6a0db3), SPH_C32(0xcbcb4701),
SPH_C32(0xbebe17ce), SPH_C32(0x3939afe4), SPH_C32(0x4a4aed33),
SPH_C32(0x4c4cff2b), SPH_C32(0x5858937b), SPH_C32(0xcfcf5b11),
SPH_C32(0xd0d0066d), SPH_C32(0xefefbb91), SPH_C32(0xaaaa7b9e),
SPH_C32(0xfbfbd7c1), SPH_C32(0x4343d217), SPH_C32(0x4d4df82f),
SPH_C32(0x333399cc), SPH_C32(0x8585b622), SPH_C32(0x4545c00f),
SPH_C32(0xf9f9d9c9), SPH_C32(0x02020e08), SPH_C32(0x7f7f66e7),
SPH_C32(0x5050ab5b), SPH_C32(0x3c3cb4f0), SPH_C32(0x9f9ff04a),
SPH_C32(0xa8a87596), SPH_C32(0x5151ac5f), SPH_C32(0xa3a344ba),
SPH_C32(0x4040db1b), SPH_C32(0x8f8f800a), SPH_C32(0x9292d37e),
SPH_C32(0x9d9dfe42), SPH_C32(0x3838a8e0), SPH_C32(0xf5f5fdf9),
SPH_C32(0xbcbc19c6), SPH_C32(0xb6b62fee), SPH_C32(0xdada3045),
SPH_C32(0x2121e784), SPH_C32(0x10107040), SPH_C32(0xffffcbd1),
SPH_C32(0xf3f3efe1), SPH_C32(0xd2d20865), SPH_C32(0xcdcd5519),
SPH_C32(0x0c0c2430), SPH_C32(0x1313794c), SPH_C32(0xececb29d),
SPH_C32(0x5f5f8667), SPH_C32(0x9797c86a), SPH_C32(0x4444c70b),
SPH_C32(0x1717655c), SPH_C32(0xc4c46a3d), SPH_C32(0xa7a758aa),
SPH_C32(0x7e7e61e3), SPH_C32(0x3d3db3f4), SPH_C32(0x6464278b),
SPH_C32(0x5d5d886f), SPH_C32(0x19194f64), SPH_C32(0x737342d7),
SPH_C32(0x60603b9b), SPH_C32(0x8181aa32), SPH_C32(0x4f4ff627),
SPH_C32(0xdcdc225d), SPH_C32(0x2222ee88), SPH_C32(0x2a2ad6a8),
SPH_C32(0x9090dd76), SPH_C32(0x88889516), SPH_C32(0x4646c903),
SPH_C32(0xeeeebc95), SPH_C32(0xb8b805d6), SPH_C32(0x14146c50),
SPH_C32(0xdede2c55), SPH_C32(0x5e5e8163), SPH_C32(0x0b0b312c),
SPH_C32(0xdbdb3741), SPH_C32(0xe0e096ad), SPH_C32(0x32329ec8),
SPH_C32(0x3a3aa6e8), SPH_C32(0x0a0a3628), SPH_C32(0x4949e43f),
SPH_C32(0x06061218), SPH_C32(0x2424fc90), SPH_C32(0x5c5c8f6b),
SPH_C32(0xc2c27825), SPH_C32(0xd3d30f61), SPH_C32(0xacac6986),
SPH_C32(0x62623593), SPH_C32(0x9191da72), SPH_C32(0x9595c662),
SPH_C32(0xe4e48abd), SPH_C32(0x797974ff), SPH_C32(0xe7e783b1),
SPH_C32(0xc8c84e0d), SPH_C32(0x373785dc), SPH_C32(0x6d6d18af),
SPH_C32(0x8d8d8e02), SPH_C32(0xd5d51d79), SPH_C32(0x4e4ef123),
SPH_C32(0xa9a97292), SPH_C32(0x6c6c1fab), SPH_C32(0x5656b943),
SPH_C32(0xf4f4fafd), SPH_C32(0xeaeaa085), SPH_C32(0x6565208f),
SPH_C32(0x7a7a7df3), SPH_C32(0xaeae678e), SPH_C32(0x08083820),
SPH_C32(0xbaba0bde), SPH_C32(0x787873fb), SPH_C32(0x2525fb94),
SPH_C32(0x2e2ecab8), SPH_C32(0x1c1c5470), SPH_C32(0xa6a65fae),
SPH_C32(0xb4b421e6), SPH_C32(0xc6c66435), SPH_C32(0xe8e8ae8d),
SPH_C32(0xdddd2559), SPH_C32(0x747457cb), SPH_C32(0x1f1f5d7c),
SPH_C32(0x4b4bea37), SPH_C32(0xbdbd1ec2), SPH_C32(0x8b8b9c1a),
SPH_C32(0x8a8a9b1e), SPH_C32(0x70704bdb), SPH_C32(0x3e3ebaf8),
SPH_C32(0xb5b526e2), SPH_C32(0x66662983), SPH_C32(0x4848e33b),
SPH_C32(0x0303090c), SPH_C32(0xf6f6f4f5), SPH_C32(0x0e0e2a38),
SPH_C32(0x61613c9f), SPH_C32(0x35358bd4), SPH_C32(0x5757be47),
SPH_C32(0xb9b902d2), SPH_C32(0x8686bf2e), SPH_C32(0xc1c17129),
SPH_C32(0x1d1d5374), SPH_C32(0x9e9ef74e), SPH_C32(0xe1e191a9),
SPH_C32(0xf8f8decd), SPH_C32(0x9898e556), SPH_C32(0x11117744),
SPH_C32(0x696904bf), SPH_C32(0xd9d93949), SPH_C32(0x8e8e870e),
SPH_C32(0x9494c166), SPH_C32(0x9b9bec5a), SPH_C32(0x1e1e5a78),
SPH_C32(0x8787b82a), SPH_C32(0xe9e9a989), SPH_C32(0xcece5c15),
SPH_C32(0x5555b04f), SPH_C32(0x2828d8a0), SPH_C32(0xdfdf2b51),
SPH_C32(0x8c8c8906), SPH_C32(0xa1a14ab2), SPH_C32(0x89899212),
SPH_C32(0x0d0d2334), SPH_C32(0xbfbf10ca), SPH_C32(0xe6e684b5),
SPH_C32(0x4242d513), SPH_C32(0x686803bb), SPH_C32(0x4141dc1f),
SPH_C32(0x9999e252), SPH_C32(0x2d2dc3b4), SPH_C32(0x0f0f2d3c),
SPH_C32(0xb0b03df6), SPH_C32(0x5454b74b), SPH_C32(0xbbbb0cda),
SPH_C32(0x16166258)
};
static const uint32_t mixtab1_cpu[] = {
SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e),
SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a),
SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090),
SPH_C32(0x04010107), SPH_C32(0x8767672e), SPH_C32(0xac2b2bd1),
SPH_C32(0xd5fefecc), SPH_C32(0x71d7d713), SPH_C32(0x9aabab7c),
SPH_C32(0xc3767659), SPH_C32(0x05caca40), SPH_C32(0x3e8282a3),
SPH_C32(0x09c9c949), SPH_C32(0xef7d7d68), SPH_C32(0xc5fafad0),
SPH_C32(0x7f595994), SPH_C32(0x074747ce), SPH_C32(0xedf0f0e6),
SPH_C32(0x82adad6e), SPH_C32(0x7dd4d41a), SPH_C32(0xbea2a243),
SPH_C32(0x8aafaf60), SPH_C32(0x469c9cf9), SPH_C32(0xa6a4a451),
SPH_C32(0xd3727245), SPH_C32(0x2dc0c076), SPH_C32(0xeab7b728),
SPH_C32(0xd9fdfdc5), SPH_C32(0x7a9393d4), SPH_C32(0x982626f2),
SPH_C32(0xd8363682), SPH_C32(0xfc3f3fbd), SPH_C32(0xf1f7f7f3),
SPH_C32(0x1dcccc52), SPH_C32(0xd034348c), SPH_C32(0xa2a5a556),
SPH_C32(0xb9e5e58d), SPH_C32(0xe9f1f1e1), SPH_C32(0xdf71714c),
SPH_C32(0x4dd8d83e), SPH_C32(0xc4313197), SPH_C32(0x5415156b),
SPH_C32(0x1004041c), SPH_C32(0x31c7c763), SPH_C32(0x8c2323e9),
SPH_C32(0x21c3c37f), SPH_C32(0x60181848), SPH_C32(0x6e9696cf),
SPH_C32(0x1405051b), SPH_C32(0x5e9a9aeb), SPH_C32(0x1c070715),
SPH_C32(0x4812127e), SPH_C32(0x368080ad), SPH_C32(0xa5e2e298),
SPH_C32(0x81ebeba7), SPH_C32(0x9c2727f5), SPH_C32(0xfeb2b233),
SPH_C32(0xcf757550), SPH_C32(0x2409093f), SPH_C32(0x3a8383a4),
SPH_C32(0xb02c2cc4), SPH_C32(0x681a1a46), SPH_C32(0x6c1b1b41),
SPH_C32(0xa36e6e11), SPH_C32(0x735a5a9d), SPH_C32(0xb6a0a04d),
SPH_C32(0x535252a5), SPH_C32(0xec3b3ba1), SPH_C32(0x75d6d614),
SPH_C32(0xfab3b334), SPH_C32(0xa42929df), SPH_C32(0xa1e3e39f),
SPH_C32(0xbc2f2fcd), SPH_C32(0x268484b1), SPH_C32(0x575353a2),
SPH_C32(0x69d1d101), SPH_C32(0x00000000), SPH_C32(0x99ededb5),
SPH_C32(0x802020e0), SPH_C32(0xddfcfcc2), SPH_C32(0xf2b1b13a),
SPH_C32(0x775b5b9a), SPH_C32(0xb36a6a0d), SPH_C32(0x01cbcb47),
SPH_C32(0xcebebe17), SPH_C32(0xe43939af), SPH_C32(0x334a4aed),
SPH_C32(0x2b4c4cff), SPH_C32(0x7b585893), SPH_C32(0x11cfcf5b),
SPH_C32(0x6dd0d006), SPH_C32(0x91efefbb), SPH_C32(0x9eaaaa7b),
SPH_C32(0xc1fbfbd7), SPH_C32(0x174343d2), SPH_C32(0x2f4d4df8),
SPH_C32(0xcc333399), SPH_C32(0x228585b6), SPH_C32(0x0f4545c0),
SPH_C32(0xc9f9f9d9), SPH_C32(0x0802020e), SPH_C32(0xe77f7f66),
SPH_C32(0x5b5050ab), SPH_C32(0xf03c3cb4), SPH_C32(0x4a9f9ff0),
SPH_C32(0x96a8a875), SPH_C32(0x5f5151ac), SPH_C32(0xbaa3a344),
SPH_C32(0x1b4040db), SPH_C32(0x0a8f8f80), SPH_C32(0x7e9292d3),
SPH_C32(0x429d9dfe), SPH_C32(0xe03838a8), SPH_C32(0xf9f5f5fd),
SPH_C32(0xc6bcbc19), SPH_C32(0xeeb6b62f), SPH_C32(0x45dada30),
SPH_C32(0x842121e7), SPH_C32(0x40101070), SPH_C32(0xd1ffffcb),
SPH_C32(0xe1f3f3ef), SPH_C32(0x65d2d208), SPH_C32(0x19cdcd55),
SPH_C32(0x300c0c24), SPH_C32(0x4c131379), SPH_C32(0x9dececb2),
SPH_C32(0x675f5f86), SPH_C32(0x6a9797c8), SPH_C32(0x0b4444c7),
SPH_C32(0x5c171765), SPH_C32(0x3dc4c46a), SPH_C32(0xaaa7a758),
SPH_C32(0xe37e7e61), SPH_C32(0xf43d3db3), SPH_C32(0x8b646427),
SPH_C32(0x6f5d5d88), SPH_C32(0x6419194f), SPH_C32(0xd7737342),
SPH_C32(0x9b60603b), SPH_C32(0x328181aa), SPH_C32(0x274f4ff6),
SPH_C32(0x5ddcdc22), SPH_C32(0x882222ee), SPH_C32(0xa82a2ad6),
SPH_C32(0x769090dd), SPH_C32(0x16888895), SPH_C32(0x034646c9),
SPH_C32(0x95eeeebc), SPH_C32(0xd6b8b805), SPH_C32(0x5014146c),
SPH_C32(0x55dede2c), SPH_C32(0x635e5e81), SPH_C32(0x2c0b0b31),
SPH_C32(0x41dbdb37), SPH_C32(0xade0e096), SPH_C32(0xc832329e),
SPH_C32(0xe83a3aa6), SPH_C32(0x280a0a36), SPH_C32(0x3f4949e4),
SPH_C32(0x18060612), SPH_C32(0x902424fc), SPH_C32(0x6b5c5c8f),
SPH_C32(0x25c2c278), SPH_C32(0x61d3d30f), SPH_C32(0x86acac69),
SPH_C32(0x93626235), SPH_C32(0x729191da), SPH_C32(0x629595c6),
SPH_C32(0xbde4e48a), SPH_C32(0xff797974), SPH_C32(0xb1e7e783),
SPH_C32(0x0dc8c84e), SPH_C32(0xdc373785), SPH_C32(0xaf6d6d18),
SPH_C32(0x028d8d8e), SPH_C32(0x79d5d51d), SPH_C32(0x234e4ef1),
SPH_C32(0x92a9a972), SPH_C32(0xab6c6c1f), SPH_C32(0x435656b9),
SPH_C32(0xfdf4f4fa), SPH_C32(0x85eaeaa0), SPH_C32(0x8f656520),
SPH_C32(0xf37a7a7d), SPH_C32(0x8eaeae67), SPH_C32(0x20080838),
SPH_C32(0xdebaba0b), SPH_C32(0xfb787873), SPH_C32(0x942525fb),
SPH_C32(0xb82e2eca), SPH_C32(0x701c1c54), SPH_C32(0xaea6a65f),
SPH_C32(0xe6b4b421), SPH_C32(0x35c6c664), SPH_C32(0x8de8e8ae),
SPH_C32(0x59dddd25), SPH_C32(0xcb747457), SPH_C32(0x7c1f1f5d),
SPH_C32(0x374b4bea), SPH_C32(0xc2bdbd1e), SPH_C32(0x1a8b8b9c),
SPH_C32(0x1e8a8a9b), SPH_C32(0xdb70704b), SPH_C32(0xf83e3eba),
SPH_C32(0xe2b5b526), SPH_C32(0x83666629), SPH_C32(0x3b4848e3),
SPH_C32(0x0c030309), SPH_C32(0xf5f6f6f4), SPH_C32(0x380e0e2a),
SPH_C32(0x9f61613c), SPH_C32(0xd435358b), SPH_C32(0x475757be),
SPH_C32(0xd2b9b902), SPH_C32(0x2e8686bf), SPH_C32(0x29c1c171),
SPH_C32(0x741d1d53), SPH_C32(0x4e9e9ef7), SPH_C32(0xa9e1e191),
SPH_C32(0xcdf8f8de), SPH_C32(0x569898e5), SPH_C32(0x44111177),
SPH_C32(0xbf696904), SPH_C32(0x49d9d939), SPH_C32(0x0e8e8e87),
SPH_C32(0x669494c1), SPH_C32(0x5a9b9bec), SPH_C32(0x781e1e5a),
SPH_C32(0x2a8787b8), SPH_C32(0x89e9e9a9), SPH_C32(0x15cece5c),
SPH_C32(0x4f5555b0), SPH_C32(0xa02828d8), SPH_C32(0x51dfdf2b),
SPH_C32(0x068c8c89), SPH_C32(0xb2a1a14a), SPH_C32(0x12898992),
SPH_C32(0x340d0d23), SPH_C32(0xcabfbf10), SPH_C32(0xb5e6e684),
SPH_C32(0x134242d5), SPH_C32(0xbb686803), SPH_C32(0x1f4141dc),
SPH_C32(0x529999e2), SPH_C32(0xb42d2dc3), SPH_C32(0x3c0f0f2d),
SPH_C32(0xf6b0b03d), SPH_C32(0x4b5454b7), SPH_C32(0xdabbbb0c),
SPH_C32(0x58161662)
};
static const uint32_t mixtab2_cpu[] = {
SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777),
SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b),
SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030),
SPH_C32(0x07040101), SPH_C32(0x2e876767), SPH_C32(0xd1ac2b2b),
SPH_C32(0xccd5fefe), SPH_C32(0x1371d7d7), SPH_C32(0x7c9aabab),
SPH_C32(0x59c37676), SPH_C32(0x4005caca), SPH_C32(0xa33e8282),
SPH_C32(0x4909c9c9), SPH_C32(0x68ef7d7d), SPH_C32(0xd0c5fafa),
SPH_C32(0x947f5959), SPH_C32(0xce074747), SPH_C32(0xe6edf0f0),
SPH_C32(0x6e82adad), SPH_C32(0x1a7dd4d4), SPH_C32(0x43bea2a2),
SPH_C32(0x608aafaf), SPH_C32(0xf9469c9c), SPH_C32(0x51a6a4a4),
SPH_C32(0x45d37272), SPH_C32(0x762dc0c0), SPH_C32(0x28eab7b7),
SPH_C32(0xc5d9fdfd), SPH_C32(0xd47a9393), SPH_C32(0xf2982626),
SPH_C32(0x82d83636), SPH_C32(0xbdfc3f3f), SPH_C32(0xf3f1f7f7),
SPH_C32(0x521dcccc), SPH_C32(0x8cd03434), SPH_C32(0x56a2a5a5),
SPH_C32(0x8db9e5e5), SPH_C32(0xe1e9f1f1), SPH_C32(0x4cdf7171),
SPH_C32(0x3e4dd8d8), SPH_C32(0x97c43131), SPH_C32(0x6b541515),
SPH_C32(0x1c100404), SPH_C32(0x6331c7c7), SPH_C32(0xe98c2323),
SPH_C32(0x7f21c3c3), SPH_C32(0x48601818), SPH_C32(0xcf6e9696),
SPH_C32(0x1b140505), SPH_C32(0xeb5e9a9a), SPH_C32(0x151c0707),
SPH_C32(0x7e481212), SPH_C32(0xad368080), SPH_C32(0x98a5e2e2),
SPH_C32(0xa781ebeb), SPH_C32(0xf59c2727), SPH_C32(0x33feb2b2),
SPH_C32(0x50cf7575), SPH_C32(0x3f240909), SPH_C32(0xa43a8383),
SPH_C32(0xc4b02c2c), SPH_C32(0x46681a1a), SPH_C32(0x416c1b1b),
SPH_C32(0x11a36e6e), SPH_C32(0x9d735a5a), SPH_C32(0x4db6a0a0),
SPH_C32(0xa5535252), SPH_C32(0xa1ec3b3b), SPH_C32(0x1475d6d6),
SPH_C32(0x34fab3b3), SPH_C32(0xdfa42929), SPH_C32(0x9fa1e3e3),
SPH_C32(0xcdbc2f2f), SPH_C32(0xb1268484), SPH_C32(0xa2575353),
SPH_C32(0x0169d1d1), SPH_C32(0x00000000), SPH_C32(0xb599eded),
SPH_C32(0xe0802020), SPH_C32(0xc2ddfcfc), SPH_C32(0x3af2b1b1),
SPH_C32(0x9a775b5b), SPH_C32(0x0db36a6a), SPH_C32(0x4701cbcb),
SPH_C32(0x17cebebe), SPH_C32(0xafe43939), SPH_C32(0xed334a4a),
SPH_C32(0xff2b4c4c), SPH_C32(0x937b5858), SPH_C32(0x5b11cfcf),
SPH_C32(0x066dd0d0), SPH_C32(0xbb91efef), SPH_C32(0x7b9eaaaa),
SPH_C32(0xd7c1fbfb), SPH_C32(0xd2174343), SPH_C32(0xf82f4d4d),
SPH_C32(0x99cc3333), SPH_C32(0xb6228585), SPH_C32(0xc00f4545),
SPH_C32(0xd9c9f9f9), SPH_C32(0x0e080202), SPH_C32(0x66e77f7f),
SPH_C32(0xab5b5050), SPH_C32(0xb4f03c3c), SPH_C32(0xf04a9f9f),
SPH_C32(0x7596a8a8), SPH_C32(0xac5f5151), SPH_C32(0x44baa3a3),
SPH_C32(0xdb1b4040), SPH_C32(0x800a8f8f), SPH_C32(0xd37e9292),
SPH_C32(0xfe429d9d), SPH_C32(0xa8e03838), SPH_C32(0xfdf9f5f5),
SPH_C32(0x19c6bcbc), SPH_C32(0x2feeb6b6), SPH_C32(0x3045dada),
SPH_C32(0xe7842121), SPH_C32(0x70401010), SPH_C32(0xcbd1ffff),
SPH_C32(0xefe1f3f3), SPH_C32(0x0865d2d2), SPH_C32(0x5519cdcd),
SPH_C32(0x24300c0c), SPH_C32(0x794c1313), SPH_C32(0xb29decec),
SPH_C32(0x86675f5f), SPH_C32(0xc86a9797), SPH_C32(0xc70b4444),
SPH_C32(0x655c1717), SPH_C32(0x6a3dc4c4), SPH_C32(0x58aaa7a7),
SPH_C32(0x61e37e7e), SPH_C32(0xb3f43d3d), SPH_C32(0x278b6464),
SPH_C32(0x886f5d5d), SPH_C32(0x4f641919), SPH_C32(0x42d77373),
SPH_C32(0x3b9b6060), SPH_C32(0xaa328181), SPH_C32(0xf6274f4f),
SPH_C32(0x225ddcdc), SPH_C32(0xee882222), SPH_C32(0xd6a82a2a),
SPH_C32(0xdd769090), SPH_C32(0x95168888), SPH_C32(0xc9034646),
SPH_C32(0xbc95eeee), SPH_C32(0x05d6b8b8), SPH_C32(0x6c501414),
SPH_C32(0x2c55dede), SPH_C32(0x81635e5e), SPH_C32(0x312c0b0b),
SPH_C32(0x3741dbdb), SPH_C32(0x96ade0e0), SPH_C32(0x9ec83232),
SPH_C32(0xa6e83a3a), SPH_C32(0x36280a0a), SPH_C32(0xe43f4949),
SPH_C32(0x12180606), SPH_C32(0xfc902424), SPH_C32(0x8f6b5c5c),
SPH_C32(0x7825c2c2), SPH_C32(0x0f61d3d3), SPH_C32(0x6986acac),
SPH_C32(0x35936262), SPH_C32(0xda729191), SPH_C32(0xc6629595),
SPH_C32(0x8abde4e4), SPH_C32(0x74ff7979), SPH_C32(0x83b1e7e7),
SPH_C32(0x4e0dc8c8), SPH_C32(0x85dc3737), SPH_C32(0x18af6d6d),
SPH_C32(0x8e028d8d), SPH_C32(0x1d79d5d5), SPH_C32(0xf1234e4e),
SPH_C32(0x7292a9a9), SPH_C32(0x1fab6c6c), SPH_C32(0xb9435656),
SPH_C32(0xfafdf4f4), SPH_C32(0xa085eaea), SPH_C32(0x208f6565),
SPH_C32(0x7df37a7a), SPH_C32(0x678eaeae), SPH_C32(0x38200808),
SPH_C32(0x0bdebaba), SPH_C32(0x73fb7878), SPH_C32(0xfb942525),
SPH_C32(0xcab82e2e), SPH_C32(0x54701c1c), SPH_C32(0x5faea6a6),
SPH_C32(0x21e6b4b4), SPH_C32(0x6435c6c6), SPH_C32(0xae8de8e8),
SPH_C32(0x2559dddd), SPH_C32(0x57cb7474), SPH_C32(0x5d7c1f1f),
SPH_C32(0xea374b4b), SPH_C32(0x1ec2bdbd), SPH_C32(0x9c1a8b8b),
SPH_C32(0x9b1e8a8a), SPH_C32(0x4bdb7070), SPH_C32(0xbaf83e3e),
SPH_C32(0x26e2b5b5), SPH_C32(0x29836666), SPH_C32(0xe33b4848),
SPH_C32(0x090c0303), SPH_C32(0xf4f5f6f6), SPH_C32(0x2a380e0e),
SPH_C32(0x3c9f6161), SPH_C32(0x8bd43535), SPH_C32(0xbe475757),
SPH_C32(0x02d2b9b9), SPH_C32(0xbf2e8686), SPH_C32(0x7129c1c1),
SPH_C32(0x53741d1d), SPH_C32(0xf74e9e9e), SPH_C32(0x91a9e1e1),
SPH_C32(0xdecdf8f8), SPH_C32(0xe5569898), SPH_C32(0x77441111),
SPH_C32(0x04bf6969), SPH_C32(0x3949d9d9), SPH_C32(0x870e8e8e),
SPH_C32(0xc1669494), SPH_C32(0xec5a9b9b), SPH_C32(0x5a781e1e),
SPH_C32(0xb82a8787), SPH_C32(0xa989e9e9), SPH_C32(0x5c15cece),
SPH_C32(0xb04f5555), SPH_C32(0xd8a02828), SPH_C32(0x2b51dfdf),
SPH_C32(0x89068c8c), SPH_C32(0x4ab2a1a1), SPH_C32(0x92128989),
SPH_C32(0x23340d0d), SPH_C32(0x10cabfbf), SPH_C32(0x84b5e6e6),
SPH_C32(0xd5134242), SPH_C32(0x03bb6868), SPH_C32(0xdc1f4141),
SPH_C32(0xe2529999), SPH_C32(0xc3b42d2d), SPH_C32(0x2d3c0f0f),
SPH_C32(0x3df6b0b0), SPH_C32(0xb74b5454), SPH_C32(0x0cdabbbb),
SPH_C32(0x62581616)
};
static const uint32_t mixtab3_cpu[] = {
SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777),
SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b),
SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030),
SPH_C32(0x01070401), SPH_C32(0x672e8767), SPH_C32(0x2bd1ac2b),
SPH_C32(0xfeccd5fe), SPH_C32(0xd71371d7), SPH_C32(0xab7c9aab),
SPH_C32(0x7659c376), SPH_C32(0xca4005ca), SPH_C32(0x82a33e82),
SPH_C32(0xc94909c9), SPH_C32(0x7d68ef7d), SPH_C32(0xfad0c5fa),
SPH_C32(0x59947f59), SPH_C32(0x47ce0747), SPH_C32(0xf0e6edf0),
SPH_C32(0xad6e82ad), SPH_C32(0xd41a7dd4), SPH_C32(0xa243bea2),
SPH_C32(0xaf608aaf), SPH_C32(0x9cf9469c), SPH_C32(0xa451a6a4),
SPH_C32(0x7245d372), SPH_C32(0xc0762dc0), SPH_C32(0xb728eab7),
SPH_C32(0xfdc5d9fd), SPH_C32(0x93d47a93), SPH_C32(0x26f29826),
SPH_C32(0x3682d836), SPH_C32(0x3fbdfc3f), SPH_C32(0xf7f3f1f7),
SPH_C32(0xcc521dcc), SPH_C32(0x348cd034), SPH_C32(0xa556a2a5),
SPH_C32(0xe58db9e5), SPH_C32(0xf1e1e9f1), SPH_C32(0x714cdf71),
SPH_C32(0xd83e4dd8), SPH_C32(0x3197c431), SPH_C32(0x156b5415),
SPH_C32(0x041c1004), SPH_C32(0xc76331c7), SPH_C32(0x23e98c23),
SPH_C32(0xc37f21c3), SPH_C32(0x18486018), SPH_C32(0x96cf6e96),
SPH_C32(0x051b1405), SPH_C32(0x9aeb5e9a), SPH_C32(0x07151c07),
SPH_C32(0x127e4812), SPH_C32(0x80ad3680), SPH_C32(0xe298a5e2),
SPH_C32(0xeba781eb), SPH_C32(0x27f59c27), SPH_C32(0xb233feb2),
SPH_C32(0x7550cf75), SPH_C32(0x093f2409), SPH_C32(0x83a43a83),
SPH_C32(0x2cc4b02c), SPH_C32(0x1a46681a), SPH_C32(0x1b416c1b),
SPH_C32(0x6e11a36e), SPH_C32(0x5a9d735a), SPH_C32(0xa04db6a0),
SPH_C32(0x52a55352), SPH_C32(0x3ba1ec3b), SPH_C32(0xd61475d6),
SPH_C32(0xb334fab3), SPH_C32(0x29dfa429), SPH_C32(0xe39fa1e3),
SPH_C32(0x2fcdbc2f), SPH_C32(0x84b12684), SPH_C32(0x53a25753),
SPH_C32(0xd10169d1), SPH_C32(0x00000000), SPH_C32(0xedb599ed),
SPH_C32(0x20e08020), SPH_C32(0xfcc2ddfc), SPH_C32(0xb13af2b1),
SPH_C32(0x5b9a775b), SPH_C32(0x6a0db36a), SPH_C32(0xcb4701cb),
SPH_C32(0xbe17cebe), SPH_C32(0x39afe439), SPH_C32(0x4aed334a),
SPH_C32(0x4cff2b4c), SPH_C32(0x58937b58), SPH_C32(0xcf5b11cf),
SPH_C32(0xd0066dd0), SPH_C32(0xefbb91ef), SPH_C32(0xaa7b9eaa),
SPH_C32(0xfbd7c1fb), SPH_C32(0x43d21743), SPH_C32(0x4df82f4d),
SPH_C32(0x3399cc33), SPH_C32(0x85b62285), SPH_C32(0x45c00f45),
SPH_C32(0xf9d9c9f9), SPH_C32(0x020e0802), SPH_C32(0x7f66e77f),
SPH_C32(0x50ab5b50), SPH_C32(0x3cb4f03c), SPH_C32(0x9ff04a9f),
SPH_C32(0xa87596a8), SPH_C32(0x51ac5f51), SPH_C32(0xa344baa3),
SPH_C32(0x40db1b40), SPH_C32(0x8f800a8f), SPH_C32(0x92d37e92),
SPH_C32(0x9dfe429d), SPH_C32(0x38a8e038), SPH_C32(0xf5fdf9f5),
SPH_C32(0xbc19c6bc), SPH_C32(0xb62feeb6), SPH_C32(0xda3045da),
SPH_C32(0x21e78421), SPH_C32(0x10704010), SPH_C32(0xffcbd1ff),
SPH_C32(0xf3efe1f3), SPH_C32(0xd20865d2), SPH_C32(0xcd5519cd),
SPH_C32(0x0c24300c), SPH_C32(0x13794c13), SPH_C32(0xecb29dec),
SPH_C32(0x5f86675f), SPH_C32(0x97c86a97), SPH_C32(0x44c70b44),
SPH_C32(0x17655c17), SPH_C32(0xc46a3dc4), SPH_C32(0xa758aaa7),
SPH_C32(0x7e61e37e), SPH_C32(0x3db3f43d), SPH_C32(0x64278b64),
SPH_C32(0x5d886f5d), SPH_C32(0x194f6419), SPH_C32(0x7342d773),
SPH_C32(0x603b9b60), SPH_C32(0x81aa3281), SPH_C32(0x4ff6274f),
SPH_C32(0xdc225ddc), SPH_C32(0x22ee8822), SPH_C32(0x2ad6a82a),
SPH_C32(0x90dd7690), SPH_C32(0x88951688), SPH_C32(0x46c90346),
SPH_C32(0xeebc95ee), SPH_C32(0xb805d6b8), SPH_C32(0x146c5014),
SPH_C32(0xde2c55de), SPH_C32(0x5e81635e), SPH_C32(0x0b312c0b),
SPH_C32(0xdb3741db), SPH_C32(0xe096ade0), SPH_C32(0x329ec832),
SPH_C32(0x3aa6e83a), SPH_C32(0x0a36280a), SPH_C32(0x49e43f49),
SPH_C32(0x06121806), SPH_C32(0x24fc9024), SPH_C32(0x5c8f6b5c),
SPH_C32(0xc27825c2), SPH_C32(0xd30f61d3), SPH_C32(0xac6986ac),
SPH_C32(0x62359362), SPH_C32(0x91da7291), SPH_C32(0x95c66295),
SPH_C32(0xe48abde4), SPH_C32(0x7974ff79), SPH_C32(0xe783b1e7),
SPH_C32(0xc84e0dc8), SPH_C32(0x3785dc37), SPH_C32(0x6d18af6d),
SPH_C32(0x8d8e028d), SPH_C32(0xd51d79d5), SPH_C32(0x4ef1234e),
SPH_C32(0xa97292a9), SPH_C32(0x6c1fab6c), SPH_C32(0x56b94356),
SPH_C32(0xf4fafdf4), SPH_C32(0xeaa085ea), SPH_C32(0x65208f65),
SPH_C32(0x7a7df37a), SPH_C32(0xae678eae), SPH_C32(0x08382008),
SPH_C32(0xba0bdeba), SPH_C32(0x7873fb78), SPH_C32(0x25fb9425),
SPH_C32(0x2ecab82e), SPH_C32(0x1c54701c), SPH_C32(0xa65faea6),
SPH_C32(0xb421e6b4), SPH_C32(0xc66435c6), SPH_C32(0xe8ae8de8),
SPH_C32(0xdd2559dd), SPH_C32(0x7457cb74), SPH_C32(0x1f5d7c1f),
SPH_C32(0x4bea374b), SPH_C32(0xbd1ec2bd), SPH_C32(0x8b9c1a8b),
SPH_C32(0x8a9b1e8a), SPH_C32(0x704bdb70), SPH_C32(0x3ebaf83e),
SPH_C32(0xb526e2b5), SPH_C32(0x66298366), SPH_C32(0x48e33b48),
SPH_C32(0x03090c03), SPH_C32(0xf6f4f5f6), SPH_C32(0x0e2a380e),
SPH_C32(0x613c9f61), SPH_C32(0x358bd435), SPH_C32(0x57be4757),
SPH_C32(0xb902d2b9), SPH_C32(0x86bf2e86), SPH_C32(0xc17129c1),
SPH_C32(0x1d53741d), SPH_C32(0x9ef74e9e), SPH_C32(0xe191a9e1),
SPH_C32(0xf8decdf8), SPH_C32(0x98e55698), SPH_C32(0x11774411),
SPH_C32(0x6904bf69), SPH_C32(0xd93949d9), SPH_C32(0x8e870e8e),
SPH_C32(0x94c16694), SPH_C32(0x9bec5a9b), SPH_C32(0x1e5a781e),
SPH_C32(0x87b82a87), SPH_C32(0xe9a989e9), SPH_C32(0xce5c15ce),
SPH_C32(0x55b04f55), SPH_C32(0x28d8a028), SPH_C32(0xdf2b51df),
SPH_C32(0x8c89068c), SPH_C32(0xa14ab2a1), SPH_C32(0x89921289),
SPH_C32(0x0d23340d), SPH_C32(0xbf10cabf), SPH_C32(0xe684b5e6),
SPH_C32(0x42d51342), SPH_C32(0x6803bb68), SPH_C32(0x41dc1f41),
SPH_C32(0x99e25299), SPH_C32(0x2dc3b42d), SPH_C32(0x0f2d3c0f),
SPH_C32(0xb03df6b0), SPH_C32(0x54b74b54), SPH_C32(0xbb0cdabb),
SPH_C32(0x16625816)
};
#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \
x22 ^= x00; \
x00 = (q); \
x08 ^= x00; \
x01 ^= x24; \
x04 ^= x27; \
x07 ^= x30; \
}
#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \
x00 ^= x04; \
x01 ^= x05; \
x02 ^= x06; \
x18 ^= x04; \
x19 ^= x05; \
x20 ^= x06; \
}
#define SMIX(x0, x1, x2, x3) { \
uint32_t c0 = 0; \
uint32_t c1 = 0; \
uint32_t c2 = 0; \
uint32_t c3 = 0; \
uint32_t r0 = 0; \
uint32_t r1 = 0; \
uint32_t r2 = 0; \
uint32_t r3 = 0; \
uint32_t tmp; \
tmp = mixtab0(x0 >> 24); \
c0 ^= tmp; \
tmp = mixtab1((x0 >> 16) & 0xFF); \
c0 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x0 >> 8) & 0xFF); \
c0 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x0 & 0xFF); \
c0 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x1 >> 24); \
c1 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x1 >> 16) & 0xFF); \
c1 ^= tmp; \
tmp = mixtab2((x1 >> 8) & 0xFF); \
c1 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x1 & 0xFF); \
c1 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x2 >> 24); \
c2 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x2 >> 16) & 0xFF); \
c2 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x2 >> 8) & 0xFF); \
c2 ^= tmp; \
tmp = mixtab3(x2 & 0xFF); \
c2 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x3 >> 24); \
c3 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x3 >> 16) & 0xFF); \
c3 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x3 >> 8) & 0xFF); \
c3 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x3 & 0xFF); \
c3 ^= tmp; \
x0 = ((c0 ^ r0) & SPH_C32(0xFF000000)) \
| ((c1 ^ r1) & SPH_C32(0x00FF0000)) \
| ((c2 ^ r2) & SPH_C32(0x0000FF00)) \
| ((c3 ^ r3) & SPH_C32(0x000000FF)); \
x1 = ((c1 ^ (r0 << 8)) & SPH_C32(0xFF000000)) \
| ((c2 ^ (r1 << 8)) & SPH_C32(0x00FF0000)) \
| ((c3 ^ (r2 << 8)) & SPH_C32(0x0000FF00)) \
| ((c0 ^ (r3 >> 24)) & SPH_C32(0x000000FF)); \
x2 = ((c2 ^ (r0 << 16)) & SPH_C32(0xFF000000)) \
| ((c3 ^ (r1 << 16)) & SPH_C32(0x00FF0000)) \
| ((c0 ^ (r2 >> 16)) & SPH_C32(0x0000FF00)) \
| ((c1 ^ (r3 >> 16)) & SPH_C32(0x000000FF)); \
x3 = ((c3 ^ (r0 << 24)) & SPH_C32(0xFF000000)) \
| ((c0 ^ (r1 >> 8)) & SPH_C32(0x00FF0000)) \
| ((c1 ^ (r2 >> 8)) & SPH_C32(0x0000FF00)) \
| ((c2 ^ (r3 >> 8)) & SPH_C32(0x000000FF)); \
}
#define ROR3 { \
B33 = S33, B34 = S34, B35 = S35; \
S35 = S32; S34 = S31; S33 = S30; S32 = S29; S31 = S28; S30 = S27; S29 = S26; S28 = S25; S27 = S24; \
S26 = S23; S25 = S22; S24 = S21; S23 = S20; S22 = S19; S21 = S18; S20 = S17; S19 = S16; S18 = S15; \
S17 = S14; S16 = S13; S15 = S12; S14 = S11; S13 = S10; S12 = S09; S11 = S08; S10 = S07; S09 = S06; \
S08 = S05; S07 = S04; S06 = S03; S05 = S02; S04 = S01; S03 = S00; S02 = B35; S01 = B34; S00 = B33; \
}
#define ROR8 { \
B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S27; S34 = S26; S33 = S25; S32 = S24; S31 = S23; S30 = S22; S29 = S21; S28 = S20; S27 = S19; \
S26 = S18; S25 = S17; S24 = S16; S23 = S15; S22 = S14; S21 = S13; S20 = S12; S19 = S11; S18 = S10; \
S17 = S09; S16 = S08; S15 = S07; S14 = S06; S13 = S05; S12 = S04; S11 = S03; S10 = S02; S09 = S01; \
S08 = S00; S07 = B35; S06 = B34; S05 = B33; S04 = B32; S03 = B31; S02 = B30; S01 = B29; S00 = B28; \
}
#define ROR9 { \
B27 = S27, B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S26; S34 = S25; S33 = S24; S32 = S23; S31 = S22; S30 = S21; S29 = S20; S28 = S19; S27 = S18; \
S26 = S17; S25 = S16; S24 = S15; S23 = S14; S22 = S13; S21 = S12; S20 = S11; S19 = S10; S18 = S09; \
S17 = S08; S16 = S07; S15 = S06; S14 = S05; S13 = S04; S12 = S03; S11 = S02; S10 = S01; S09 = S00; \
S08 = B35; S07 = B34; S06 = B33; S05 = B32; S04 = B31; S03 = B30; S02 = B29; S01 = B28; S00 = B27; \
}
#define FUGUE512_3(x, y, z) { \
TIX4(x, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
SMIX(S33, S34, S35, S00); \
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
SMIX(S30, S31, S32, S33); \
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
SMIX(S27, S28, S29, S30); \
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
SMIX(S24, S25, S26, S27); \
\
TIX4(y, S24, S25, S28, S31, S32, S10, S12, S15, S18); \
CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); \
SMIX(S21, S22, S23, S24); \
CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); \
SMIX(S18, S19, S20, S21); \
CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); \
SMIX(S15, S16, S17, S18); \
CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); \
SMIX(S12, S13, S14, S15); \
\
TIX4(z, S12, S13, S16, S19, S20, S34, S00, S03, S06); \
CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); \
SMIX(S09, S10, S11, S12); \
CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); \
SMIX(S06, S07, S08, S09); \
CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); \
SMIX(S03, S04, S05, S06); \
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); \
SMIX(S00, S01, S02, S03); \
}
/***************************************************/
// Die Hash-Funktion
__global__ void x13_fugue512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
extern __shared__ char mixtabs[];
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(mixTab0Tex, threadIdx.x);
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x);
__syncthreads();
int i;
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
#pragma unroll 16
for( i = 0; i < 16; i++ )
Hash[i] = SWAB32(Hash[i]);
uint32_t S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
uint32_t S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
uint32_t S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
uint32_t S30, S31, S32, S33, S34, S35;
uint32_t B27, B28, B29, B30, B31, B32, B33, B34, B35;
uint64_t bc = (uint64_t) 64 << 3;
uint32_t bclo = (uint32_t)(bc & 0xFFFFFFFFULL);
uint32_t bchi = (uint32_t)(bc >> 32);
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1);
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((Hash[0x0]), (Hash[0x1]), (Hash[0x2]));
FUGUE512_3((Hash[0x3]), (Hash[0x4]), (Hash[0x5]));
FUGUE512_3((Hash[0x6]), (Hash[0x7]), (Hash[0x8]));
FUGUE512_3((Hash[0x9]), (Hash[0xA]), (Hash[0xB]));
FUGUE512_3((Hash[0xC]), (Hash[0xD]), (Hash[0xE]));
FUGUE512_3((Hash[0xF]), bchi, bclo);
#pragma unroll 32
for (i = 0; i < 32; i ++) {
ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03);
}
#pragma unroll 13
for (i = 0; i < 13; i ++) {
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S28 ^= S00;
ROR8;
SMIX(S00, S01, S02, S03);
}
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
Hash[0] = SWAB32(S01);
Hash[1] = SWAB32(S02);
Hash[2] = SWAB32(S03);
Hash[3] = SWAB32(S04);
Hash[4] = SWAB32(S09);
Hash[5] = SWAB32(S10);
Hash[6] = SWAB32(S11);
Hash[7] = SWAB32(S12);
Hash[8] = SWAB32(S18);
Hash[9] = SWAB32(S19);
Hash[10] = SWAB32(S20);
Hash[11] = SWAB32(S21);
Hash[12] = SWAB32(S27);
Hash[13] = SWAB32(S28);
Hash[14] = SWAB32(S29);
Hash[15] = SWAB32(S30);
}
}
#define texDef(texname, texmem, texsource, texsize) \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
__host__ void x13_fugue512_cpu_init(int thr_id, int threads)
{
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
}
__host__ void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 4 * 256 * sizeof(uint32_t);
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x13_fugue512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

764
x13/cuda_x13_hamsi512.cu

@ -0,0 +1,764 @@ @@ -0,0 +1,764 @@
/*
* Quick and dirty addition of Hamsi-512 for X13
*
* Built on cbuchner1's implementation, actual hashing code
* heavily based on phm's sgminer
*
*/
/*
* X13 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 phm
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#define SPH_C64(x) ((uint64_t)(x ## ULL))
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
#if __CUDA_ARCH__ < 350
// Kepler (Compute 3.0)
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#else
// Kepler (Compute 3.5)
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
#endif
static __constant__ uint32_t d_alpha_n[32];
static __constant__ uint32_t d_alpha_f[32];
static __constant__ uint32_t d_T512[64][16];
static const uint32_t alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const uint32_t alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define hamsi_s00 m0
#define hamsi_s01 m1
#define hamsi_s02 c0
#define hamsi_s03 c1
#define hamsi_s04 m2
#define hamsi_s05 m3
#define hamsi_s06 c2
#define hamsi_s07 c3
#define hamsi_s08 c4
#define hamsi_s09 c5
#define hamsi_s0A m4
#define hamsi_s0B m5
#define hamsi_s0C c6
#define hamsi_s0D c7
#define hamsi_s0E m6
#define hamsi_s0F m7
#define hamsi_s10 m8
#define hamsi_s11 m9
#define hamsi_s12 c8
#define hamsi_s13 c9
#define hamsi_s14 mA
#define hamsi_s15 mB
#define hamsi_s16 cA
#define hamsi_s17 cB
#define hamsi_s18 cC
#define hamsi_s19 cD
#define hamsi_s1A mC
#define hamsi_s1B mD
#define hamsi_s1C cE
#define hamsi_s1D cF
#define hamsi_s1E mE
#define hamsi_s1F mF
#define SBOX(a, b, c, d) { \
uint32_t t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
}
#define HAMSI_L(a, b, c, d) { \
(a) = ROTL32(a, 13); \
(c) = ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = ROTL32(b, 1); \
(d) = ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = ROTL32(a, 5); \
(c) = ROTL32(c, 22); \
}
#define ROUND_BIG(rc, alpha) { \
hamsi_s00 ^= alpha[0x00]; \
hamsi_s01 ^= alpha[0x01] ^ (uint32_t)(rc); \
hamsi_s02 ^= alpha[0x02]; \
hamsi_s03 ^= alpha[0x03]; \
hamsi_s04 ^= alpha[0x04]; \
hamsi_s05 ^= alpha[0x05]; \
hamsi_s06 ^= alpha[0x06]; \
hamsi_s07 ^= alpha[0x07]; \
hamsi_s08 ^= alpha[0x08]; \
hamsi_s09 ^= alpha[0x09]; \
hamsi_s0A ^= alpha[0x0A]; \
hamsi_s0B ^= alpha[0x0B]; \
hamsi_s0C ^= alpha[0x0C]; \
hamsi_s0D ^= alpha[0x0D]; \
hamsi_s0E ^= alpha[0x0E]; \
hamsi_s0F ^= alpha[0x0F]; \
hamsi_s10 ^= alpha[0x10]; \
hamsi_s11 ^= alpha[0x11]; \
hamsi_s12 ^= alpha[0x12]; \
hamsi_s13 ^= alpha[0x13]; \
hamsi_s14 ^= alpha[0x14]; \
hamsi_s15 ^= alpha[0x15]; \
hamsi_s16 ^= alpha[0x16]; \
hamsi_s17 ^= alpha[0x17]; \
hamsi_s18 ^= alpha[0x18]; \
hamsi_s19 ^= alpha[0x19]; \
hamsi_s1A ^= alpha[0x1A]; \
hamsi_s1B ^= alpha[0x1B]; \
hamsi_s1C ^= alpha[0x1C]; \
hamsi_s1D ^= alpha[0x1D]; \
hamsi_s1E ^= alpha[0x1E]; \
hamsi_s1F ^= alpha[0x1F]; \
SBOX(hamsi_s00, hamsi_s08, hamsi_s10, hamsi_s18); \
SBOX(hamsi_s01, hamsi_s09, hamsi_s11, hamsi_s19); \
SBOX(hamsi_s02, hamsi_s0A, hamsi_s12, hamsi_s1A); \
SBOX(hamsi_s03, hamsi_s0B, hamsi_s13, hamsi_s1B); \
SBOX(hamsi_s04, hamsi_s0C, hamsi_s14, hamsi_s1C); \
SBOX(hamsi_s05, hamsi_s0D, hamsi_s15, hamsi_s1D); \
SBOX(hamsi_s06, hamsi_s0E, hamsi_s16, hamsi_s1E); \
SBOX(hamsi_s07, hamsi_s0F, hamsi_s17, hamsi_s1F); \
HAMSI_L(hamsi_s00, hamsi_s09, hamsi_s12, hamsi_s1B); \
HAMSI_L(hamsi_s01, hamsi_s0A, hamsi_s13, hamsi_s1C); \
HAMSI_L(hamsi_s02, hamsi_s0B, hamsi_s14, hamsi_s1D); \
HAMSI_L(hamsi_s03, hamsi_s0C, hamsi_s15, hamsi_s1E); \
HAMSI_L(hamsi_s04, hamsi_s0D, hamsi_s16, hamsi_s1F); \
HAMSI_L(hamsi_s05, hamsi_s0E, hamsi_s17, hamsi_s18); \
HAMSI_L(hamsi_s06, hamsi_s0F, hamsi_s10, hamsi_s19); \
HAMSI_L(hamsi_s07, hamsi_s08, hamsi_s11, hamsi_s1A); \
HAMSI_L(hamsi_s00, hamsi_s02, hamsi_s05, hamsi_s07); \
HAMSI_L(hamsi_s10, hamsi_s13, hamsi_s15, hamsi_s16); \
HAMSI_L(hamsi_s09, hamsi_s0B, hamsi_s0C, hamsi_s0E); \
HAMSI_L(hamsi_s19, hamsi_s1A, hamsi_s1C, hamsi_s1F); \
}
#define P_BIG { \
ROUND_BIG(0, d_alpha_n); \
ROUND_BIG(1, d_alpha_n); \
ROUND_BIG(2, d_alpha_n); \
ROUND_BIG(3, d_alpha_n); \
ROUND_BIG(4, d_alpha_n); \
ROUND_BIG(5, d_alpha_n); \
}
#define PF_BIG { \
ROUND_BIG(0, d_alpha_f); \
ROUND_BIG(1, d_alpha_f); \
ROUND_BIG(2, d_alpha_f); \
ROUND_BIG(3, d_alpha_f); \
ROUND_BIG(4, d_alpha_f); \
ROUND_BIG(5, d_alpha_f); \
ROUND_BIG(6, d_alpha_f); \
ROUND_BIG(7, d_alpha_f); \
ROUND_BIG(8, d_alpha_f); \
ROUND_BIG(9, d_alpha_f); \
ROUND_BIG(10, d_alpha_f); \
ROUND_BIG(11, d_alpha_f); \
}
#define T_BIG { \
/* order is important */ \
cF = (h[0xF] ^= hamsi_s17); \
cE = (h[0xE] ^= hamsi_s16); \
cD = (h[0xD] ^= hamsi_s15); \
cC = (h[0xC] ^= hamsi_s14); \
cB = (h[0xB] ^= hamsi_s13); \
cA = (h[0xA] ^= hamsi_s12); \
c9 = (h[0x9] ^= hamsi_s11); \
c8 = (h[0x8] ^= hamsi_s10); \
c7 = (h[0x7] ^= hamsi_s07); \
c6 = (h[0x6] ^= hamsi_s06); \
c5 = (h[0x5] ^= hamsi_s05); \
c4 = (h[0x4] ^= hamsi_s04); \
c3 = (h[0x3] ^= hamsi_s03); \
c2 = (h[0x2] ^= hamsi_s02); \
c1 = (h[0x1] ^= hamsi_s01); \
c0 = (h[0x0] ^= hamsi_s00); \
}
static const uint32_t T512[64][16] = {
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
SPH_C32(0x9e69af68) },
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
SPH_C32(0x0c26f262) },
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
SPH_C32(0xdc24e61f) },
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
SPH_C32(0x3daac2da) },
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
SPH_C32(0x78cace29) },
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
SPH_C32(0x2dd1f9ab) },
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
SPH_C32(0xbf2c0be2) },
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
SPH_C32(0x32219526) },
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
SPH_C32(0xac8e6c88) },
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
SPH_C32(0x7b1bd6b9) },
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
SPH_C32(0xf746c320) },
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
SPH_C32(0x69505b3a) },
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
SPH_C32(0x8a341574) },
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
SPH_C32(0x450360bf) },
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
SPH_C32(0xf3d45758) },
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
SPH_C32(0x925c44e9) },
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
SPH_C32(0xa123ff9f) },
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
SPH_C32(0x1568ff0f) },
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
SPH_C32(0xc5c1eb3e) },
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
SPH_C32(0x1af21fe1) },
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
SPH_C32(0x857f3c2b) },
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
SPH_C32(0x2ba05a55) },
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
SPH_C32(0xfeabf254) },
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
SPH_C32(0xfe1cdc7f) },
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
SPH_C32(0xb0a51834) },
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
SPH_C32(0xa6b8c28d) },
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
SPH_C32(0x3a4e99d7) },
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
SPH_C32(0xe1844257) },
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
SPH_C32(0x2c3b504e) },
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
SPH_C32(0x524a0d59) },
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
SPH_C32(0x378dd173) },
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
SPH_C32(0x8b6c72bd) },
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
SPH_C32(0x8e67b7fa) },
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
SPH_C32(0x443d3004) },
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
SPH_C32(0xf4f6ea7b) },
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
SPH_C32(0x979961d0) },
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
SPH_C32(0x98aa496e) },
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
SPH_C32(0x094e3198) },
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
SPH_C32(0xe86cba2e) },
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
SPH_C32(0x4b7eec55) },
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
SPH_C32(0x1e7536a6) },
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
SPH_C32(0x24314f17) },
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
SPH_C32(0x9075b1ce) },
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
SPH_C32(0x9b6ef888) },
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
SPH_C32(0xd8b61463) },
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
SPH_C32(0x3ea660f7) },
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
SPH_C32(0x7f975691) },
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
SPH_C32(0x2c94459e) },
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
SPH_C32(0x56a7b19f) },
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
SPH_C32(0x81fdf908) },
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
SPH_C32(0x5bd61539) },
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
SPH_C32(0x15b961e7) },
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
SPH_C32(0x2a2c18f0) },
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
SPH_C32(0x551e3d6e) },
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
SPH_C32(0x33c5244f) },
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
SPH_C32(0x8a58e6a4) },
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
SPH_C32(0xda878000) },
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
SPH_C32(0x3c5dfffe) },
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
SPH_C32(0x7b1675d7) },
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
SPH_C32(0x2879ebac) },
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
SPH_C32(0xbe0a679e) },
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
SPH_C32(0x30aebcf7) },
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
SPH_C32(0xc7ff60f0) },
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
SPH_C32(0xe7e00a94) }
};
#define INPUT_BIG { \
const uint32_t *tp = &d_T512[0][0]; \
unsigned u, v; \
m0 = 0; \
m1 = 0; \
m2 = 0; \
m3 = 0; \
m4 = 0; \
m5 = 0; \
m6 = 0; \
m7 = 0; \
m8 = 0; \
m9 = 0; \
mA = 0; \
mB = 0; \
mC = 0; \
mD = 0; \
mE = 0; \
mF = 0; \
for (u = 0; u < 8; u ++) { \
unsigned db = buf(u); \
for (v = 0; v < 8; v ++, db >>= 1) { \
uint32_t dm = SPH_T32(-(uint32_t)(db & 1)); \
m0 ^= dm & *tp ++; \
m1 ^= dm & *tp ++; \
m2 ^= dm & *tp ++; \
m3 ^= dm & *tp ++; \
m4 ^= dm & *tp ++; \
m5 ^= dm & *tp ++; \
m6 ^= dm & *tp ++; \
m7 ^= dm & *tp ++; \
m8 ^= dm & *tp ++; \
m9 ^= dm & *tp ++; \
mA ^= dm & *tp ++; \
mB ^= dm & *tp ++; \
mC ^= dm & *tp ++; \
mD ^= dm & *tp ++; \
mE ^= dm & *tp ++; \
mF ^= dm & *tp ++; \
} \
} \
}
/***************************************************/
// Die Hash-Funktion
__global__ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
unsigned char *h1 = (unsigned char *)Hash;
uint32_t c0 = SPH_C32(0x73746565), c1 = SPH_C32(0x6c706172), c2 = SPH_C32(0x6b204172), c3 = SPH_C32(0x656e6265);
uint32_t c4 = SPH_C32(0x72672031), c5 = SPH_C32(0x302c2062), c6 = SPH_C32(0x75732032), c7 = SPH_C32(0x3434362c);
uint32_t c8 = SPH_C32(0x20422d33), c9 = SPH_C32(0x30303120), cA = SPH_C32(0x4c657576), cB = SPH_C32(0x656e2d48);
uint32_t cC = SPH_C32(0x65766572), cD = SPH_C32(0x6c65652c), cE = SPH_C32(0x2042656c), cF = SPH_C32(0x6769756d);
uint32_t m0, m1, m2, m3, m4, m5, m6, m7;
uint32_t m8, m9, mA, mB, mC, mD, mE, mF;
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
#define buf(u) (h1[i+u])
#pragma unroll 8
for(int i = 0; i < 64; i += 8) {
INPUT_BIG;
P_BIG;
T_BIG;
}
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG;
P_BIG;
T_BIG;
#undef buf
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG;
PF_BIG;
T_BIG;
#pragma unroll 16
for (int i = 0; i < 16; i++)
Hash[i] = SWAB32(h[i]);
}
}
__host__ void x13_hamsi512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol( d_alpha_n, alpha_n, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_alpha_f, alpha_f, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_T512, T512, sizeof(uint32_t)*64*16, 0, cudaMemcpyHostToDevice);
}
__host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x13_hamsi512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

284
x13/x13.cu

@ -0,0 +1,284 @@ @@ -0,0 +1,284 @@
/*
* X13 algorithm built on cbuchner1's original X11
*
*/
extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
#include "miner.h"
}
// aus cpu-miner.c
extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_init(int thr_id, int threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_keccak512_cpu_init(int thr_id, int threads);
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_luffa512_cpu_init(int thr_id, int threads);
extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_hamsi512_cpu_init(int thr_id, int threads);
extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_init(int thr_id, int threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_check_cpu_init(int thr_id, int threads);
extern void quark_check_cpu_setTarget(const void *ptarget);
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
int order);
// X13 Hashfunktion
inline void x13hash(void *state, const void *input)
{
// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13
sph_blake512_context ctx_blake;
sph_bmw512_context ctx_bmw;
sph_groestl512_context ctx_groestl;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
sph_luffa512_context ctx_luffa;
sph_cubehash512_context ctx_cubehash;
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_echo512_context ctx_echo;
sph_hamsi512_context ctx_hamsi;
sph_fugue512_context ctx_fugue;
uint32_t hash[16];
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, (void*) hash);
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash);
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_luffa512_init(&ctx_luffa);
// ZLUFFA;
sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
sph_luffa512_close (&ctx_luffa, (void*) hash);
sph_cubehash512_init(&ctx_cubehash);
// ZCUBEHASH;
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
sph_shavite512_init(&ctx_shavite);
// ZSHAVITE;
sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx_shavite, (void*) hash);
sph_simd512_init(&ctx_simd);
// ZSIMD
sph_simd512 (&ctx_simd, (const void*) hash, 64);
sph_simd512_close(&ctx_simd, (void*) hash);
sph_echo512_init(&ctx_echo);
// ZECHO
sph_echo512 (&ctx_echo, (const void*) hash, 64);
sph_echo512_close(&ctx_echo, (void*) hash);
sph_hamsi512_init(&ctx_hamsi);
sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
sph_hamsi512_close(&ctx_hamsi, (void*) hash);
sph_fugue512_init(&ctx_fugue);
sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
sph_fugue512_close(&ctx_fugue, (void*) hash);
memcpy(state, hash, 32);
}
extern bool opt_benchmark;
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
//unsigned char echobefore[64], echoafter[64];
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
quark_blake512_cpu_setBlock_80((void*)endiandata);
quark_check_cpu_setTarget(ptarget);
do {
int order = 0;
// erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
x13hash(vhash64, endiandata);
if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
Loading…
Cancel
Save