Browse Source

adding third party X13 and Diamond Groestl code contributions.

2upstream
Christian Buchner 10 years ago
parent
commit
d99b91ea65
  1. 34
      ccminer.vcxproj
  2. 23
      ccminer.vcxproj.filters
  3. 18
      cpu-miner.c
  4. 4
      miner.h
  5. 867
      sph/hamsi.c
  6. 39648
      sph/hamsi_helper.c
  7. 321
      sph/sph_hamsi.h
  8. 8
      x11/cuda_x11_echo.cu
  9. 711
      x13/cuda_x13_fugue512.cu
  10. 764
      x13/cuda_x13_hamsi512.cu
  11. 284
      x13/x13.cu

34
ccminer.vcxproj

@ -245,6 +245,8 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClCompile Include="sph\shavite.c" /> <ClCompile Include="sph\shavite.c" />
<ClCompile Include="sph\simd.c" /> <ClCompile Include="sph\simd.c" />
<ClCompile Include="sph\skein.c" /> <ClCompile Include="sph\skein.c" />
<ClCompile Include="sph\hamsi.c" />
<ClCompile Include="sph\hamsi_helper.c" />
<ClCompile Include="util.c"> <ClCompile Include="util.c">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
@ -275,7 +277,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="sph\sph_bmw.h" /> <ClInclude Include="sph\sph_bmw.h" />
<ClInclude Include="sph\sph_cubehash.h" /> <ClInclude Include="sph\sph_cubehash.h" />
<ClInclude Include="sph\sph_echo.h" /> <ClInclude Include="sph\sph_echo.h" />
<ClInclude Include="sph\sph_fugue.h" />
<ClInclude Include="sph\sph_groestl.h" /> <ClInclude Include="sph\sph_groestl.h" />
<ClInclude Include="sph\sph_jh.h" /> <ClInclude Include="sph\sph_jh.h" />
<ClInclude Include="sph\sph_keccak.h" /> <ClInclude Include="sph\sph_keccak.h" />
@ -283,16 +284,11 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="sph\sph_shavite.h" /> <ClInclude Include="sph\sph_shavite.h" />
<ClInclude Include="sph\sph_simd.h" /> <ClInclude Include="sph\sph_simd.h" />
<ClInclude Include="sph\sph_skein.h" /> <ClInclude Include="sph\sph_skein.h" />
<ClInclude Include="sph\sph_hamsi.h" />
<ClInclude Include="sph\sph_types.h" /> <ClInclude Include="sph\sph_types.h" />
<ClInclude Include="uint256.h" /> <ClInclude Include="uint256.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="bitslice_transformations_quad.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</CudaCompile>
<CudaCompile Include="cuda_fugue256.cu"> <CudaCompile Include="cuda_fugue256.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@ -317,12 +313,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile> </CudaCompile>
<CudaCompile Include="groestl_functions_quad.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</CudaCompile>
<CudaCompile Include="heavy\cuda_blake512.cu"> <CudaCompile Include="heavy\cuda_blake512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@ -501,6 +491,24 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile> </CudaCompile>
<CudaCompile Include="x13\cuda_x13_hamsi512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">

23
ccminer.vcxproj.filters

@ -55,6 +55,9 @@
<Filter Include="Source Files\CUDA\x11"> <Filter Include="Source Files\CUDA\x11">
<UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier> <UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
</Filter> </Filter>
<Filter Include="Source Files\CUDA\x13">
<UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
</Filter>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="compat\jansson\dump.c"> <ClCompile Include="compat\jansson\dump.c">
@ -144,6 +147,12 @@
<ClCompile Include="myriadgroestl.cpp"> <ClCompile Include="myriadgroestl.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="sph\hamsi.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
<ClCompile Include="sph\hamsi_helper.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="compat.h"> <ClInclude Include="compat.h">
@ -242,6 +251,9 @@
<ClInclude Include="cuda_helper.h"> <ClInclude Include="cuda_helper.h">
<Filter>Header Files\CUDA</Filter> <Filter>Header Files\CUDA</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="sph\sph_hamsi.h">
<Filter>Header Files\sph</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda_fugue256.cu"> <CudaCompile Include="cuda_fugue256.cu">
@ -340,11 +352,14 @@
<CudaCompile Include="x11\simd_functions.cu"> <CudaCompile Include="x11\simd_functions.cu">
<Filter>Source Files\CUDA\x11</Filter> <Filter>Source Files\CUDA\x11</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="bitslice_transformations_quad.cu"> <CudaCompile Include="x13\cuda_x13_fugue512.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA\x13</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="groestl_functions_quad.cu"> <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA\x13</Filter>
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
<Filter>Source Files\CUDA\x13</Filter>
</CudaCompile> </CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

18
cpu-miner.c

@ -131,7 +131,9 @@ typedef enum {
ALGO_QUARK, ALGO_QUARK,
ALGO_ANIME, ALGO_ANIME,
ALGO_NIST5, ALGO_NIST5,
ALGO_X11 ALGO_X11,
ALGO_X13,
ALGO_DMD_GR,
} sha256_algos; } sha256_algos;
static const char *algo_names[] = { static const char *algo_names[] = {
@ -144,7 +146,9 @@ static const char *algo_names[] = {
"quark", "quark",
"anime", "anime",
"nist5", "nist5",
"x11" "x11",
"x13",
"dmd-gr",
}; };
bool opt_debug = false; bool opt_debug = false;
@ -217,6 +221,8 @@ Options:\n\
anime Animecoin hash\n\ anime Animecoin hash\n\
nist5 NIST5 (TalkCoin) hash\n\ nist5 NIST5 (TalkCoin) hash\n\
x11 X11 (DarkCoin) hash\n\ x11 X11 (DarkCoin) hash\n\
x13 X13 (MaruCoin) hash\n\
dmd-gr Diamond-Groestl hash\n\
-d, --devices takes a comma separated list of CUDA devices to use.\n\ -d, --devices takes a comma separated list of CUDA devices to use.\n\
Device IDs start counting from 0! Alternatively takes\n\ Device IDs start counting from 0! Alternatively takes\n\
string names of your cards like gtx780ti or gt640#2\n\ string names of your cards like gtx780ti or gt640#2\n\
@ -766,7 +772,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
if (opt_algo == ALGO_JACKPOT) if (opt_algo == ALGO_JACKPOT)
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty)); diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL) else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR)
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty)); diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
else else
diff_to_target(work->target, sctx->job.diff / opt_difficulty); diff_to_target(work->target, sctx->job.diff / opt_difficulty);
@ -878,6 +884,7 @@ static void *miner_thread(void *userdata)
break; break;
case ALGO_GROESTL: case ALGO_GROESTL:
case ALGO_DMD_GR:
rc = scanhash_groestlcoin(thr_id, work.data, work.target, rc = scanhash_groestlcoin(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
@ -912,6 +919,11 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_X13:
rc = scanhash_x13(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default: default:
/* should never happen */ /* should never happen */
goto out; goto out;

4
miner.h

@ -239,6 +239,10 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len); extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len); extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);

867
sph/hamsi.c

@ -0,0 +1,867 @@
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
/*
* Hamsi implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_hamsi.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_SMALL_FOOTPRINT_HAMSI 1
#endif
/*
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
* table lookup during message expansion (1 to 8, inclusive). If we note
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
* then we will get t tables (where t=ceil(w/n)) of individual size
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
* n=5, there are 7 tables, but the last one uses only two bits on
* input, not five).
*
* Also, we read t rows of r words from RAM. Words in a given row are
* concatenated in RAM in that order, so most of the cost is about
* reading the first row word; comparatively, cache misses are thus
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
*
* When n=1, tables are "special" in that we omit the first entry of
* each table (which always contains 0), so that total table size is
* halved.
*
* We thus have the following (size1 is the cumulative table size of
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
*
* n size1 size2 t1 t2
* ---------------------------------------
* 1 1024 4096 32 64
* 2 2048 8192 16 32
* 3 2688 10880 11 22
* 4 4096 16384 8 16
* 5 6272 25600 7 13
* 6 10368 41984 6 11
* 7 16896 73856 5 10
* 8 32768 131072 4 8
*
* So there is a trade-off: a lower n makes the tables fit better in
* L1 cache, but increases the number of memory accesses. The optimal
* value depends on the amount of available L1 cache and the relative
* impact of a cache miss.
*
* Experimentally, in ideal benchmark conditions (which are not necessarily
* realistic with regards to L1 cache contention), it seems that n=8 is
* the best value on "big" architectures (those with 32 kB or more of L1
* cache), while n=4 is better on "small" architectures. This was tested
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
* (8 kB L1 cache).
*
* Note: with n=1, the 32 tables (actually implemented as one big table)
* are read entirely and sequentially, regardless of the input data,
* thus avoiding any data-dependent table access pattern.
*/
#if !defined SPH_HAMSI_EXPAND_SMALL
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#endif
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 8
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
#include "hamsi_helper.c"
static const sph_u32 IV224[] = {
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
/*
* This version is the one used in the Hamsi submission package for
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
* shall soon be corrected in the official Hamsi specification.
*
static const sph_u32 IV224[] = {
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
*/
static const sph_u32 IV256[] = {
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
};
static const sph_u32 IV384[] = {
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
SPH_C32(0x2c204b61)
};
static const sph_u32 IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define DECL_STATE_SMALL \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
#define READ_STATE_SMALL(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
} while (0)
#define WRITE_STATE_SMALL(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
} while (0)
#define s0 m0
#define s1 m1
#define s2 c0
#define s3 c1
#define s4 c2
#define s5 c3
#define s6 m2
#define s7 m3
#define s8 m4
#define s9 m5
#define sA c4
#define sB c5
#define sC c6
#define sD c7
#define sE m6
#define sF m7
#define SBOX(a, b, c, d) do { \
sph_u32 t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
} while (0)
#define L(a, b, c, d) do { \
(a) = SPH_ROTL32(a, 13); \
(c) = SPH_ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = SPH_ROTL32(b, 1); \
(d) = SPH_ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = SPH_ROTL32(a, 5); \
(c) = SPH_ROTL32(c, 22); \
} while (0)
#define ROUND_SMALL(rc, alpha) do { \
s0 ^= alpha[0x00]; \
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
s2 ^= alpha[0x02]; \
s3 ^= alpha[0x03]; \
s4 ^= alpha[0x08]; \
s5 ^= alpha[0x09]; \
s6 ^= alpha[0x0A]; \
s7 ^= alpha[0x0B]; \
s8 ^= alpha[0x10]; \
s9 ^= alpha[0x11]; \
sA ^= alpha[0x12]; \
sB ^= alpha[0x13]; \
sC ^= alpha[0x18]; \
sD ^= alpha[0x19]; \
sE ^= alpha[0x1A]; \
sF ^= alpha[0x1B]; \
SBOX(s0, s4, s8, sC); \
SBOX(s1, s5, s9, sD); \
SBOX(s2, s6, sA, sE); \
SBOX(s3, s7, sB, sF); \
L(s0, s5, sA, sF); \
L(s1, s6, sB, sC); \
L(s2, s7, s8, sD); \
L(s3, s4, s9, sE); \
} while (0)
#define P_SMALL do { \
ROUND_SMALL(0, alpha_n); \
ROUND_SMALL(1, alpha_n); \
ROUND_SMALL(2, alpha_n); \
} while (0)
#define PF_SMALL do { \
ROUND_SMALL(0, alpha_f); \
ROUND_SMALL(1, alpha_f); \
ROUND_SMALL(2, alpha_f); \
ROUND_SMALL(3, alpha_f); \
ROUND_SMALL(4, alpha_f); \
ROUND_SMALL(5, alpha_f); \
} while (0)
#define T_SMALL do { \
/* order is important */ \
c7 = (sc->h[7] ^= sB); \
c6 = (sc->h[6] ^= sA); \
c5 = (sc->h[5] ^= s9); \
c4 = (sc->h[4] ^= s8); \
c3 = (sc->h[3] ^= s3); \
c2 = (sc->h[2] ^= s2); \
c1 = (sc->h[1] ^= s1); \
c0 = (sc->h[0] ^= s0); \
} while (0)
static void
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_SMALL
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 5;
#else
tmp = SPH_T32((sph_u32)num << 5);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 14);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_SMALL(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
INPUT_SMALL;
P_SMALL;
T_SMALL;
buf += 4;
}
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
DECL_STATE_SMALL
READ_STATE_SMALL(sc);
INPUT_SMALL;
PF_SMALL;
T_SMALL;
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 4 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_small(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_small(sc, data, (len >> 2));
data = (const unsigned char *)data + (len & ~(size_t)3);
len &= (size_t)3;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_small_close(sph_hamsi_small_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[12];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
memcpy(pad, sc->partial, ptr);
#if SPH_64
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad + 4, sc->count_high);
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 4)
pad[ptr ++] = 0;
hamsi_small(sc, pad, 2);
hamsi_small_final(sc, pad + 8);
out = dst;
for (u = 0; u < out_size_w32; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
#define DECL_STATE_BIG \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
#define READ_STATE_BIG(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
c8 = sc->h[0x8]; \
c9 = sc->h[0x9]; \
cA = sc->h[0xA]; \
cB = sc->h[0xB]; \
cC = sc->h[0xC]; \
cD = sc->h[0xD]; \
cE = sc->h[0xE]; \
cF = sc->h[0xF]; \
} while (0)
#define WRITE_STATE_BIG(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
sc->h[0x8] = c8; \
sc->h[0x9] = c9; \
sc->h[0xA] = cA; \
sc->h[0xB] = cB; \
sc->h[0xC] = cC; \
sc->h[0xD] = cD; \
sc->h[0xE] = cE; \
sc->h[0xF] = cF; \
} while (0)
#define s00 m0
#define s01 m1
#define s02 c0
#define s03 c1
#define s04 m2
#define s05 m3
#define s06 c2
#define s07 c3
#define s08 c4
#define s09 c5
#define s0A m4
#define s0B m5
#define s0C c6
#define s0D c7
#define s0E m6
#define s0F m7
#define s10 m8
#define s11 m9
#define s12 c8
#define s13 c9
#define s14 mA
#define s15 mB
#define s16 cA
#define s17 cB
#define s18 cC
#define s19 cD
#define s1A mC
#define s1B mD
#define s1C cE
#define s1D cF
#define s1E mE
#define s1F mF
#define ROUND_BIG(rc, alpha) do { \
s00 ^= alpha[0x00]; \
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
s02 ^= alpha[0x02]; \
s03 ^= alpha[0x03]; \
s04 ^= alpha[0x04]; \
s05 ^= alpha[0x05]; \
s06 ^= alpha[0x06]; \
s07 ^= alpha[0x07]; \
s08 ^= alpha[0x08]; \
s09 ^= alpha[0x09]; \
s0A ^= alpha[0x0A]; \
s0B ^= alpha[0x0B]; \
s0C ^= alpha[0x0C]; \
s0D ^= alpha[0x0D]; \
s0E ^= alpha[0x0E]; \
s0F ^= alpha[0x0F]; \
s10 ^= alpha[0x10]; \
s11 ^= alpha[0x11]; \
s12 ^= alpha[0x12]; \
s13 ^= alpha[0x13]; \
s14 ^= alpha[0x14]; \
s15 ^= alpha[0x15]; \
s16 ^= alpha[0x16]; \
s17 ^= alpha[0x17]; \
s18 ^= alpha[0x18]; \
s19 ^= alpha[0x19]; \
s1A ^= alpha[0x1A]; \
s1B ^= alpha[0x1B]; \
s1C ^= alpha[0x1C]; \
s1D ^= alpha[0x1D]; \
s1E ^= alpha[0x1E]; \
s1F ^= alpha[0x1F]; \
SBOX(s00, s08, s10, s18); \
SBOX(s01, s09, s11, s19); \
SBOX(s02, s0A, s12, s1A); \
SBOX(s03, s0B, s13, s1B); \
SBOX(s04, s0C, s14, s1C); \
SBOX(s05, s0D, s15, s1D); \
SBOX(s06, s0E, s16, s1E); \
SBOX(s07, s0F, s17, s1F); \
L(s00, s09, s12, s1B); \
L(s01, s0A, s13, s1C); \
L(s02, s0B, s14, s1D); \
L(s03, s0C, s15, s1E); \
L(s04, s0D, s16, s1F); \
L(s05, s0E, s17, s18); \
L(s06, s0F, s10, s19); \
L(s07, s08, s11, s1A); \
L(s00, s02, s05, s07); \
L(s10, s13, s15, s16); \
L(s09, s0B, s0C, s0E); \
L(s19, s1A, s1C, s1F); \
} while (0)
#if SPH_SMALL_FOOTPRINT_HAMSI
#define P_BIG do { \
unsigned r; \
for (r = 0; r < 6; r ++) \
ROUND_BIG(r, alpha_n); \
} while (0)
#define PF_BIG do { \
unsigned r; \
for (r = 0; r < 12; r ++) \
ROUND_BIG(r, alpha_f); \
} while (0)
#else
#define P_BIG do { \
ROUND_BIG(0, alpha_n); \
ROUND_BIG(1, alpha_n); \
ROUND_BIG(2, alpha_n); \
ROUND_BIG(3, alpha_n); \
ROUND_BIG(4, alpha_n); \
ROUND_BIG(5, alpha_n); \
} while (0)
#define PF_BIG do { \
ROUND_BIG(0, alpha_f); \
ROUND_BIG(1, alpha_f); \
ROUND_BIG(2, alpha_f); \
ROUND_BIG(3, alpha_f); \
ROUND_BIG(4, alpha_f); \
ROUND_BIG(5, alpha_f); \
ROUND_BIG(6, alpha_f); \
ROUND_BIG(7, alpha_f); \
ROUND_BIG(8, alpha_f); \
ROUND_BIG(9, alpha_f); \
ROUND_BIG(10, alpha_f); \
ROUND_BIG(11, alpha_f); \
} while (0)
#endif
#define T_BIG do { \
/* order is important */ \
cF = (sc->h[0xF] ^= s17); \
cE = (sc->h[0xE] ^= s16); \
cD = (sc->h[0xD] ^= s15); \
cC = (sc->h[0xC] ^= s14); \
cB = (sc->h[0xB] ^= s13); \
cA = (sc->h[0xA] ^= s12); \
c9 = (sc->h[0x9] ^= s11); \
c8 = (sc->h[0x8] ^= s10); \
c7 = (sc->h[0x7] ^= s07); \
c6 = (sc->h[0x6] ^= s06); \
c5 = (sc->h[0x5] ^= s05); \
c4 = (sc->h[0x4] ^= s04); \
c3 = (sc->h[0x3] ^= s03); \
c2 = (sc->h[0x2] ^= s02); \
c1 = (sc->h[0x1] ^= s01); \
c0 = (sc->h[0x0] ^= s00); \
} while (0)
static void
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_BIG
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 6;
#else
tmp = SPH_T32((sph_u32)num << 6);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 13);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_BIG(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
INPUT_BIG;
P_BIG;
T_BIG;
buf += 8;
}
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
DECL_STATE_BIG
READ_STATE_BIG(sc);
INPUT_BIG;
PF_BIG;
T_BIG;
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 8 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_big(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_big(sc, data, (len >> 3));
data = (const unsigned char *)data + (len & ~(size_t)7);
len &= (size_t)7;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_big_close(sph_hamsi_big_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[8];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
#if SPH_64
sph_enc64be(pad, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad, sc->count_high);
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 8)
sc->partial[ptr ++] = 0;
hamsi_big(sc, sc->partial, 1);
hamsi_big_final(sc, pad);
out = dst;
if (out_size_w32 == 12) {
sph_enc32be(out + 0, sc->h[ 0]);
sph_enc32be(out + 4, sc->h[ 1]);
sph_enc32be(out + 8, sc->h[ 3]);
sph_enc32be(out + 12, sc->h[ 4]);
sph_enc32be(out + 16, sc->h[ 5]);
sph_enc32be(out + 20, sc->h[ 6]);
sph_enc32be(out + 24, sc->h[ 8]);
sph_enc32be(out + 28, sc->h[ 9]);
sph_enc32be(out + 32, sc->h[10]);
sph_enc32be(out + 36, sc->h[12]);
sph_enc32be(out + 40, sc->h[13]);
sph_enc32be(out + 44, sc->h[15]);
} else {
for (u = 0; u < 16; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
}
/* see sph_hamsi.h */
void
sph_hamsi224_init(void *cc)
{
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi224_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi256_init(void *cc)
{
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi256_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi384_init(void *cc)
{
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi384_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi512_init(void *cc)
{
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi512_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 16);
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 16);
hamsi_big_init(cc, IV512);
}
#ifdef __cplusplus
}
#endif

39648
sph/hamsi_helper.c

File diff suppressed because it is too large Load Diff

321
sph/sph_hamsi.h

@ -0,0 +1,321 @@
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Hamsi interface. This code implements Hamsi with the recommended
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_hamsi.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_HAMSI_H__
#define SPH_HAMSI_H__
#include <stddef.h>
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
/**
* Output size (in bits) for Hamsi-224.
*/
#define SPH_SIZE_hamsi224 224
/**
* Output size (in bits) for Hamsi-256.
*/
#define SPH_SIZE_hamsi256 256
/**
* Output size (in bits) for Hamsi-384.
*/
#define SPH_SIZE_hamsi384 384
/**
* Output size (in bits) for Hamsi-512.
*/
#define SPH_SIZE_hamsi512 512
/**
* This structure is a context for Hamsi-224 and Hamsi-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[4];
size_t partial_len;
sph_u32 h[8];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_small_context;
/**
* This structure is a context for Hamsi-224 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi224_context;
/**
* This structure is a context for Hamsi-256 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi256_context;
/**
* This structure is a context for Hamsi-384 and Hamsi-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[8];
size_t partial_len;
sph_u32 h[16];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_big_context;
/**
* This structure is a context for Hamsi-384 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi384_context;
/**
* This structure is a context for Hamsi-512 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi512_context;
/**
* Initialize a Hamsi-224 context. This process performs no memory allocation.
*
* @param cc the Hamsi-224 context (pointer to a
* <code>sph_hamsi224_context</code>)
*/
void sph_hamsi224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi224(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-224 context
* @param dst the destination buffer
*/
void sph_hamsi224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-256 context. This process performs no memory allocation.
*
* @param cc the Hamsi-256 context (pointer to a
* <code>sph_hamsi256_context</code>)
*/
void sph_hamsi256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi256(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-256 context
* @param dst the destination buffer
*/
void sph_hamsi256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-384 context. This process performs no memory allocation.
*
* @param cc the Hamsi-384 context (pointer to a
* <code>sph_hamsi384_context</code>)
*/
void sph_hamsi384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi384(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-384 context
* @param dst the destination buffer
*/
void sph_hamsi384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-512 context. This process performs no memory allocation.
*
* @param cc the Hamsi-512 context (pointer to a
* <code>sph_hamsi512_context</code>)
*/
void sph_hamsi512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi512(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-512 context
* @param dst the destination buffer
*/
void sph_hamsi512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

8
x11/cuda_x11_echo.cu

@ -198,7 +198,11 @@ __global__ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint6
for(int i=0;i<16;i++) for(int i=0;i<16;i++)
W[i] ^= Hash[i]; W[i] ^= Hash[i];
W[8] ^= 0x10; // tsiv: I feel iffy about removing this, but it seems to break the full hash
// fortunately for X11 the flipped bit lands outside the first 32 bytes used as the final X11 hash
// try chaining more algos after echo (X13) and boom
//W[8] ^= 0x10;
W[27] ^= 0x02000000; W[27] ^= 0x02000000;
W[28] ^= k0; W[28] ^= k0;
@ -225,6 +229,8 @@ __host__ void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
// Größe des dynamischen Shared Memory Bereichs // Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0; size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x11_echo512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); x11_echo512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id); MyStreamSynchronize(NULL, order, thr_id);
} }

711
x13/cuda_x13_fugue512.cu

@ -0,0 +1,711 @@
/*
* Quick and dirty addition of Fugue-512 for X13
*
* Built on cbuchner1's implementation, actual hashing code
* heavily based on phm's sgminer
*
*/
/*
* X13 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 phm
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#include <stdint.h>
#define SPH_C64(x) ((uint64_t)(x ## ULL))
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
#if __CUDA_ARCH__ < 350
// Kepler (Compute 3.0)
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#else
// Kepler (Compute 3.5)
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
#endif
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
#define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
#define mixtab3(x) (*((uint32_t*)mixtabs + (768+(x))))
texture<unsigned int, 1, cudaReadModeElementType> mixTab0Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab1Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab2Tex;
texture<unsigned int, 1, cudaReadModeElementType> mixTab3Tex;
static const uint32_t mixtab0_cpu[] = {
SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7),
SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0),
SPH_C32(0x01010704), SPH_C32(0x67672e87), SPH_C32(0x2b2bd1ac),
SPH_C32(0xfefeccd5), SPH_C32(0xd7d71371), SPH_C32(0xabab7c9a),
SPH_C32(0x767659c3), SPH_C32(0xcaca4005), SPH_C32(0x8282a33e),
SPH_C32(0xc9c94909), SPH_C32(0x7d7d68ef), SPH_C32(0xfafad0c5),
SPH_C32(0x5959947f), SPH_C32(0x4747ce07), SPH_C32(0xf0f0e6ed),
SPH_C32(0xadad6e82), SPH_C32(0xd4d41a7d), SPH_C32(0xa2a243be),
SPH_C32(0xafaf608a), SPH_C32(0x9c9cf946), SPH_C32(0xa4a451a6),
SPH_C32(0x727245d3), SPH_C32(0xc0c0762d), SPH_C32(0xb7b728ea),
SPH_C32(0xfdfdc5d9), SPH_C32(0x9393d47a), SPH_C32(0x2626f298),
SPH_C32(0x363682d8), SPH_C32(0x3f3fbdfc), SPH_C32(0xf7f7f3f1),
SPH_C32(0xcccc521d), SPH_C32(0x34348cd0), SPH_C32(0xa5a556a2),
SPH_C32(0xe5e58db9), SPH_C32(0xf1f1e1e9), SPH_C32(0x71714cdf),
SPH_C32(0xd8d83e4d), SPH_C32(0x313197c4), SPH_C32(0x15156b54),
SPH_C32(0x04041c10), SPH_C32(0xc7c76331), SPH_C32(0x2323e98c),
SPH_C32(0xc3c37f21), SPH_C32(0x18184860), SPH_C32(0x9696cf6e),
SPH_C32(0x05051b14), SPH_C32(0x9a9aeb5e), SPH_C32(0x0707151c),
SPH_C32(0x12127e48), SPH_C32(0x8080ad36), SPH_C32(0xe2e298a5),
SPH_C32(0xebeba781), SPH_C32(0x2727f59c), SPH_C32(0xb2b233fe),
SPH_C32(0x757550cf), SPH_C32(0x09093f24), SPH_C32(0x8383a43a),
SPH_C32(0x2c2cc4b0), SPH_C32(0x1a1a4668), SPH_C32(0x1b1b416c),
SPH_C32(0x6e6e11a3), SPH_C32(0x5a5a9d73), SPH_C32(0xa0a04db6),
SPH_C32(0x5252a553), SPH_C32(0x3b3ba1ec), SPH_C32(0xd6d61475),
SPH_C32(0xb3b334fa), SPH_C32(0x2929dfa4), SPH_C32(0xe3e39fa1),
SPH_C32(0x2f2fcdbc), SPH_C32(0x8484b126), SPH_C32(0x5353a257),
SPH_C32(0xd1d10169), SPH_C32(0x00000000), SPH_C32(0xededb599),
SPH_C32(0x2020e080), SPH_C32(0xfcfcc2dd), SPH_C32(0xb1b13af2),
SPH_C32(0x5b5b9a77), SPH_C32(0x6a6a0db3), SPH_C32(0xcbcb4701),
SPH_C32(0xbebe17ce), SPH_C32(0x3939afe4), SPH_C32(0x4a4aed33),
SPH_C32(0x4c4cff2b), SPH_C32(0x5858937b), SPH_C32(0xcfcf5b11),
SPH_C32(0xd0d0066d), SPH_C32(0xefefbb91), SPH_C32(0xaaaa7b9e),
SPH_C32(0xfbfbd7c1), SPH_C32(0x4343d217), SPH_C32(0x4d4df82f),
SPH_C32(0x333399cc), SPH_C32(0x8585b622), SPH_C32(0x4545c00f),
SPH_C32(0xf9f9d9c9), SPH_C32(0x02020e08), SPH_C32(0x7f7f66e7),
SPH_C32(0x5050ab5b), SPH_C32(0x3c3cb4f0), SPH_C32(0x9f9ff04a),
SPH_C32(0xa8a87596), SPH_C32(0x5151ac5f), SPH_C32(0xa3a344ba),
SPH_C32(0x4040db1b), SPH_C32(0x8f8f800a), SPH_C32(0x9292d37e),
SPH_C32(0x9d9dfe42), SPH_C32(0x3838a8e0), SPH_C32(0xf5f5fdf9),
SPH_C32(0xbcbc19c6), SPH_C32(0xb6b62fee), SPH_C32(0xdada3045),
SPH_C32(0x2121e784), SPH_C32(0x10107040), SPH_C32(0xffffcbd1),
SPH_C32(0xf3f3efe1), SPH_C32(0xd2d20865), SPH_C32(0xcdcd5519),
SPH_C32(0x0c0c2430), SPH_C32(0x1313794c), SPH_C32(0xececb29d),
SPH_C32(0x5f5f8667), SPH_C32(0x9797c86a), SPH_C32(0x4444c70b),
SPH_C32(0x1717655c), SPH_C32(0xc4c46a3d), SPH_C32(0xa7a758aa),
SPH_C32(0x7e7e61e3), SPH_C32(0x3d3db3f4), SPH_C32(0x6464278b),
SPH_C32(0x5d5d886f), SPH_C32(0x19194f64), SPH_C32(0x737342d7),
SPH_C32(0x60603b9b), SPH_C32(0x8181aa32), SPH_C32(0x4f4ff627),
SPH_C32(0xdcdc225d), SPH_C32(0x2222ee88), SPH_C32(0x2a2ad6a8),
SPH_C32(0x9090dd76), SPH_C32(0x88889516), SPH_C32(0x4646c903),
SPH_C32(0xeeeebc95), SPH_C32(0xb8b805d6), SPH_C32(0x14146c50),
SPH_C32(0xdede2c55), SPH_C32(0x5e5e8163), SPH_C32(0x0b0b312c),
SPH_C32(0xdbdb3741), SPH_C32(0xe0e096ad), SPH_C32(0x32329ec8),
SPH_C32(0x3a3aa6e8), SPH_C32(0x0a0a3628), SPH_C32(0x4949e43f),
SPH_C32(0x06061218), SPH_C32(0x2424fc90), SPH_C32(0x5c5c8f6b),
SPH_C32(0xc2c27825), SPH_C32(0xd3d30f61), SPH_C32(0xacac6986),
SPH_C32(0x62623593), SPH_C32(0x9191da72), SPH_C32(0x9595c662),
SPH_C32(0xe4e48abd), SPH_C32(0x797974ff), SPH_C32(0xe7e783b1),
SPH_C32(0xc8c84e0d), SPH_C32(0x373785dc), SPH_C32(0x6d6d18af),
SPH_C32(0x8d8d8e02), SPH_C32(0xd5d51d79), SPH_C32(0x4e4ef123),
SPH_C32(0xa9a97292), SPH_C32(0x6c6c1fab), SPH_C32(0x5656b943),
SPH_C32(0xf4f4fafd), SPH_C32(0xeaeaa085), SPH_C32(0x6565208f),
SPH_C32(0x7a7a7df3), SPH_C32(0xaeae678e), SPH_C32(0x08083820),
SPH_C32(0xbaba0bde), SPH_C32(0x787873fb), SPH_C32(0x2525fb94),
SPH_C32(0x2e2ecab8), SPH_C32(0x1c1c5470), SPH_C32(0xa6a65fae),
SPH_C32(0xb4b421e6), SPH_C32(0xc6c66435), SPH_C32(0xe8e8ae8d),
SPH_C32(0xdddd2559), SPH_C32(0x747457cb), SPH_C32(0x1f1f5d7c),
SPH_C32(0x4b4bea37), SPH_C32(0xbdbd1ec2), SPH_C32(0x8b8b9c1a),
SPH_C32(0x8a8a9b1e), SPH_C32(0x70704bdb), SPH_C32(0x3e3ebaf8),
SPH_C32(0xb5b526e2), SPH_C32(0x66662983), SPH_C32(0x4848e33b),
SPH_C32(0x0303090c), SPH_C32(0xf6f6f4f5), SPH_C32(0x0e0e2a38),
SPH_C32(0x61613c9f), SPH_C32(0x35358bd4), SPH_C32(0x5757be47),
SPH_C32(0xb9b902d2), SPH_C32(0x8686bf2e), SPH_C32(0xc1c17129),
SPH_C32(0x1d1d5374), SPH_C32(0x9e9ef74e), SPH_C32(0xe1e191a9),
SPH_C32(0xf8f8decd), SPH_C32(0x9898e556), SPH_C32(0x11117744),
SPH_C32(0x696904bf), SPH_C32(0xd9d93949), SPH_C32(0x8e8e870e),
SPH_C32(0x9494c166), SPH_C32(0x9b9bec5a), SPH_C32(0x1e1e5a78),
SPH_C32(0x8787b82a), SPH_C32(0xe9e9a989), SPH_C32(0xcece5c15),
SPH_C32(0x5555b04f), SPH_C32(0x2828d8a0), SPH_C32(0xdfdf2b51),
SPH_C32(0x8c8c8906), SPH_C32(0xa1a14ab2), SPH_C32(0x89899212),
SPH_C32(0x0d0d2334), SPH_C32(0xbfbf10ca), SPH_C32(0xe6e684b5),
SPH_C32(0x4242d513), SPH_C32(0x686803bb), SPH_C32(0x4141dc1f),
SPH_C32(0x9999e252), SPH_C32(0x2d2dc3b4), SPH_C32(0x0f0f2d3c),
SPH_C32(0xb0b03df6), SPH_C32(0x5454b74b), SPH_C32(0xbbbb0cda),
SPH_C32(0x16166258)
};
static const uint32_t mixtab1_cpu[] = {
SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e),
SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a),
SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090),
SPH_C32(0x04010107), SPH_C32(0x8767672e), SPH_C32(0xac2b2bd1),
SPH_C32(0xd5fefecc), SPH_C32(0x71d7d713), SPH_C32(0x9aabab7c),
SPH_C32(0xc3767659), SPH_C32(0x05caca40), SPH_C32(0x3e8282a3),
SPH_C32(0x09c9c949), SPH_C32(0xef7d7d68), SPH_C32(0xc5fafad0),
SPH_C32(0x7f595994), SPH_C32(0x074747ce), SPH_C32(0xedf0f0e6),
SPH_C32(0x82adad6e), SPH_C32(0x7dd4d41a), SPH_C32(0xbea2a243),
SPH_C32(0x8aafaf60), SPH_C32(0x469c9cf9), SPH_C32(0xa6a4a451),
SPH_C32(0xd3727245), SPH_C32(0x2dc0c076), SPH_C32(0xeab7b728),
SPH_C32(0xd9fdfdc5), SPH_C32(0x7a9393d4), SPH_C32(0x982626f2),
SPH_C32(0xd8363682), SPH_C32(0xfc3f3fbd), SPH_C32(0xf1f7f7f3),
SPH_C32(0x1dcccc52), SPH_C32(0xd034348c), SPH_C32(0xa2a5a556),
SPH_C32(0xb9e5e58d), SPH_C32(0xe9f1f1e1), SPH_C32(0xdf71714c),
SPH_C32(0x4dd8d83e), SPH_C32(0xc4313197), SPH_C32(0x5415156b),
SPH_C32(0x1004041c), SPH_C32(0x31c7c763), SPH_C32(0x8c2323e9),
SPH_C32(0x21c3c37f), SPH_C32(0x60181848), SPH_C32(0x6e9696cf),
SPH_C32(0x1405051b), SPH_C32(0x5e9a9aeb), SPH_C32(0x1c070715),
SPH_C32(0x4812127e), SPH_C32(0x368080ad), SPH_C32(0xa5e2e298),
SPH_C32(0x81ebeba7), SPH_C32(0x9c2727f5), SPH_C32(0xfeb2b233),
SPH_C32(0xcf757550), SPH_C32(0x2409093f), SPH_C32(0x3a8383a4),
SPH_C32(0xb02c2cc4), SPH_C32(0x681a1a46), SPH_C32(0x6c1b1b41),
SPH_C32(0xa36e6e11), SPH_C32(0x735a5a9d), SPH_C32(0xb6a0a04d),
SPH_C32(0x535252a5), SPH_C32(0xec3b3ba1), SPH_C32(0x75d6d614),
SPH_C32(0xfab3b334), SPH_C32(0xa42929df), SPH_C32(0xa1e3e39f),
SPH_C32(0xbc2f2fcd), SPH_C32(0x268484b1), SPH_C32(0x575353a2),
SPH_C32(0x69d1d101), SPH_C32(0x00000000), SPH_C32(0x99ededb5),
SPH_C32(0x802020e0), SPH_C32(0xddfcfcc2), SPH_C32(0xf2b1b13a),
SPH_C32(0x775b5b9a), SPH_C32(0xb36a6a0d), SPH_C32(0x01cbcb47),
SPH_C32(0xcebebe17), SPH_C32(0xe43939af), SPH_C32(0x334a4aed),
SPH_C32(0x2b4c4cff), SPH_C32(0x7b585893), SPH_C32(0x11cfcf5b),
SPH_C32(0x6dd0d006), SPH_C32(0x91efefbb), SPH_C32(0x9eaaaa7b),
SPH_C32(0xc1fbfbd7), SPH_C32(0x174343d2), SPH_C32(0x2f4d4df8),
SPH_C32(0xcc333399), SPH_C32(0x228585b6), SPH_C32(0x0f4545c0),
SPH_C32(0xc9f9f9d9), SPH_C32(0x0802020e), SPH_C32(0xe77f7f66),
SPH_C32(0x5b5050ab), SPH_C32(0xf03c3cb4), SPH_C32(0x4a9f9ff0),
SPH_C32(0x96a8a875), SPH_C32(0x5f5151ac), SPH_C32(0xbaa3a344),
SPH_C32(0x1b4040db), SPH_C32(0x0a8f8f80), SPH_C32(0x7e9292d3),
SPH_C32(0x429d9dfe), SPH_C32(0xe03838a8), SPH_C32(0xf9f5f5fd),
SPH_C32(0xc6bcbc19), SPH_C32(0xeeb6b62f), SPH_C32(0x45dada30),
SPH_C32(0x842121e7), SPH_C32(0x40101070), SPH_C32(0xd1ffffcb),
SPH_C32(0xe1f3f3ef), SPH_C32(0x65d2d208), SPH_C32(0x19cdcd55),
SPH_C32(0x300c0c24), SPH_C32(0x4c131379), SPH_C32(0x9dececb2),
SPH_C32(0x675f5f86), SPH_C32(0x6a9797c8), SPH_C32(0x0b4444c7),
SPH_C32(0x5c171765), SPH_C32(0x3dc4c46a), SPH_C32(0xaaa7a758),
SPH_C32(0xe37e7e61), SPH_C32(0xf43d3db3), SPH_C32(0x8b646427),
SPH_C32(0x6f5d5d88), SPH_C32(0x6419194f), SPH_C32(0xd7737342),
SPH_C32(0x9b60603b), SPH_C32(0x328181aa), SPH_C32(0x274f4ff6),
SPH_C32(0x5ddcdc22), SPH_C32(0x882222ee), SPH_C32(0xa82a2ad6),
SPH_C32(0x769090dd), SPH_C32(0x16888895), SPH_C32(0x034646c9),
SPH_C32(0x95eeeebc), SPH_C32(0xd6b8b805), SPH_C32(0x5014146c),
SPH_C32(0x55dede2c), SPH_C32(0x635e5e81), SPH_C32(0x2c0b0b31),
SPH_C32(0x41dbdb37), SPH_C32(0xade0e096), SPH_C32(0xc832329e),
SPH_C32(0xe83a3aa6), SPH_C32(0x280a0a36), SPH_C32(0x3f4949e4),
SPH_C32(0x18060612), SPH_C32(0x902424fc), SPH_C32(0x6b5c5c8f),
SPH_C32(0x25c2c278), SPH_C32(0x61d3d30f), SPH_C32(0x86acac69),
SPH_C32(0x93626235), SPH_C32(0x729191da), SPH_C32(0x629595c6),
SPH_C32(0xbde4e48a), SPH_C32(0xff797974), SPH_C32(0xb1e7e783),
SPH_C32(0x0dc8c84e), SPH_C32(0xdc373785), SPH_C32(0xaf6d6d18),
SPH_C32(0x028d8d8e), SPH_C32(0x79d5d51d), SPH_C32(0x234e4ef1),
SPH_C32(0x92a9a972), SPH_C32(0xab6c6c1f), SPH_C32(0x435656b9),
SPH_C32(0xfdf4f4fa), SPH_C32(0x85eaeaa0), SPH_C32(0x8f656520),
SPH_C32(0xf37a7a7d), SPH_C32(0x8eaeae67), SPH_C32(0x20080838),
SPH_C32(0xdebaba0b), SPH_C32(0xfb787873), SPH_C32(0x942525fb),
SPH_C32(0xb82e2eca), SPH_C32(0x701c1c54), SPH_C32(0xaea6a65f),
SPH_C32(0xe6b4b421), SPH_C32(0x35c6c664), SPH_C32(0x8de8e8ae),
SPH_C32(0x59dddd25), SPH_C32(0xcb747457), SPH_C32(0x7c1f1f5d),
SPH_C32(0x374b4bea), SPH_C32(0xc2bdbd1e), SPH_C32(0x1a8b8b9c),
SPH_C32(0x1e8a8a9b), SPH_C32(0xdb70704b), SPH_C32(0xf83e3eba),
SPH_C32(0xe2b5b526), SPH_C32(0x83666629), SPH_C32(0x3b4848e3),
SPH_C32(0x0c030309), SPH_C32(0xf5f6f6f4), SPH_C32(0x380e0e2a),
SPH_C32(0x9f61613c), SPH_C32(0xd435358b), SPH_C32(0x475757be),
SPH_C32(0xd2b9b902), SPH_C32(0x2e8686bf), SPH_C32(0x29c1c171),
SPH_C32(0x741d1d53), SPH_C32(0x4e9e9ef7), SPH_C32(0xa9e1e191),
SPH_C32(0xcdf8f8de), SPH_C32(0x569898e5), SPH_C32(0x44111177),
SPH_C32(0xbf696904), SPH_C32(0x49d9d939), SPH_C32(0x0e8e8e87),
SPH_C32(0x669494c1), SPH_C32(0x5a9b9bec), SPH_C32(0x781e1e5a),
SPH_C32(0x2a8787b8), SPH_C32(0x89e9e9a9), SPH_C32(0x15cece5c),
SPH_C32(0x4f5555b0), SPH_C32(0xa02828d8), SPH_C32(0x51dfdf2b),
SPH_C32(0x068c8c89), SPH_C32(0xb2a1a14a), SPH_C32(0x12898992),
SPH_C32(0x340d0d23), SPH_C32(0xcabfbf10), SPH_C32(0xb5e6e684),
SPH_C32(0x134242d5), SPH_C32(0xbb686803), SPH_C32(0x1f4141dc),
SPH_C32(0x529999e2), SPH_C32(0xb42d2dc3), SPH_C32(0x3c0f0f2d),
SPH_C32(0xf6b0b03d), SPH_C32(0x4b5454b7), SPH_C32(0xdabbbb0c),
SPH_C32(0x58161662)
};
static const uint32_t mixtab2_cpu[] = {
SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777),
SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b),
SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030),
SPH_C32(0x07040101), SPH_C32(0x2e876767), SPH_C32(0xd1ac2b2b),
SPH_C32(0xccd5fefe), SPH_C32(0x1371d7d7), SPH_C32(0x7c9aabab),
SPH_C32(0x59c37676), SPH_C32(0x4005caca), SPH_C32(0xa33e8282),
SPH_C32(0x4909c9c9), SPH_C32(0x68ef7d7d), SPH_C32(0xd0c5fafa),
SPH_C32(0x947f5959), SPH_C32(0xce074747), SPH_C32(0xe6edf0f0),
SPH_C32(0x6e82adad), SPH_C32(0x1a7dd4d4), SPH_C32(0x43bea2a2),
SPH_C32(0x608aafaf), SPH_C32(0xf9469c9c), SPH_C32(0x51a6a4a4),
SPH_C32(0x45d37272), SPH_C32(0x762dc0c0), SPH_C32(0x28eab7b7),
SPH_C32(0xc5d9fdfd), SPH_C32(0xd47a9393), SPH_C32(0xf2982626),
SPH_C32(0x82d83636), SPH_C32(0xbdfc3f3f), SPH_C32(0xf3f1f7f7),
SPH_C32(0x521dcccc), SPH_C32(0x8cd03434), SPH_C32(0x56a2a5a5),
SPH_C32(0x8db9e5e5), SPH_C32(0xe1e9f1f1), SPH_C32(0x4cdf7171),
SPH_C32(0x3e4dd8d8), SPH_C32(0x97c43131), SPH_C32(0x6b541515),
SPH_C32(0x1c100404), SPH_C32(0x6331c7c7), SPH_C32(0xe98c2323),
SPH_C32(0x7f21c3c3), SPH_C32(0x48601818), SPH_C32(0xcf6e9696),
SPH_C32(0x1b140505), SPH_C32(0xeb5e9a9a), SPH_C32(0x151c0707),
SPH_C32(0x7e481212), SPH_C32(0xad368080), SPH_C32(0x98a5e2e2),
SPH_C32(0xa781ebeb), SPH_C32(0xf59c2727), SPH_C32(0x33feb2b2),
SPH_C32(0x50cf7575), SPH_C32(0x3f240909), SPH_C32(0xa43a8383),
SPH_C32(0xc4b02c2c), SPH_C32(0x46681a1a), SPH_C32(0x416c1b1b),
SPH_C32(0x11a36e6e), SPH_C32(0x9d735a5a), SPH_C32(0x4db6a0a0),
SPH_C32(0xa5535252), SPH_C32(0xa1ec3b3b), SPH_C32(0x1475d6d6),
SPH_C32(0x34fab3b3), SPH_C32(0xdfa42929), SPH_C32(0x9fa1e3e3),
SPH_C32(0xcdbc2f2f), SPH_C32(0xb1268484), SPH_C32(0xa2575353),
SPH_C32(0x0169d1d1), SPH_C32(0x00000000), SPH_C32(0xb599eded),
SPH_C32(0xe0802020), SPH_C32(0xc2ddfcfc), SPH_C32(0x3af2b1b1),
SPH_C32(0x9a775b5b), SPH_C32(0x0db36a6a), SPH_C32(0x4701cbcb),
SPH_C32(0x17cebebe), SPH_C32(0xafe43939), SPH_C32(0xed334a4a),
SPH_C32(0xff2b4c4c), SPH_C32(0x937b5858), SPH_C32(0x5b11cfcf),
SPH_C32(0x066dd0d0), SPH_C32(0xbb91efef), SPH_C32(0x7b9eaaaa),
SPH_C32(0xd7c1fbfb), SPH_C32(0xd2174343), SPH_C32(0xf82f4d4d),
SPH_C32(0x99cc3333), SPH_C32(0xb6228585), SPH_C32(0xc00f4545),
SPH_C32(0xd9c9f9f9), SPH_C32(0x0e080202), SPH_C32(0x66e77f7f),
SPH_C32(0xab5b5050), SPH_C32(0xb4f03c3c), SPH_C32(0xf04a9f9f),
SPH_C32(0x7596a8a8), SPH_C32(0xac5f5151), SPH_C32(0x44baa3a3),
SPH_C32(0xdb1b4040), SPH_C32(0x800a8f8f), SPH_C32(0xd37e9292),
SPH_C32(0xfe429d9d), SPH_C32(0xa8e03838), SPH_C32(0xfdf9f5f5),
SPH_C32(0x19c6bcbc), SPH_C32(0x2feeb6b6), SPH_C32(0x3045dada),
SPH_C32(0xe7842121), SPH_C32(0x70401010), SPH_C32(0xcbd1ffff),
SPH_C32(0xefe1f3f3), SPH_C32(0x0865d2d2), SPH_C32(0x5519cdcd),
SPH_C32(0x24300c0c), SPH_C32(0x794c1313), SPH_C32(0xb29decec),
SPH_C32(0x86675f5f), SPH_C32(0xc86a9797), SPH_C32(0xc70b4444),
SPH_C32(0x655c1717), SPH_C32(0x6a3dc4c4), SPH_C32(0x58aaa7a7),
SPH_C32(0x61e37e7e), SPH_C32(0xb3f43d3d), SPH_C32(0x278b6464),
SPH_C32(0x886f5d5d), SPH_C32(0x4f641919), SPH_C32(0x42d77373),
SPH_C32(0x3b9b6060), SPH_C32(0xaa328181), SPH_C32(0xf6274f4f),
SPH_C32(0x225ddcdc), SPH_C32(0xee882222), SPH_C32(0xd6a82a2a),
SPH_C32(0xdd769090), SPH_C32(0x95168888), SPH_C32(0xc9034646),
SPH_C32(0xbc95eeee), SPH_C32(0x05d6b8b8), SPH_C32(0x6c501414),
SPH_C32(0x2c55dede), SPH_C32(0x81635e5e), SPH_C32(0x312c0b0b),
SPH_C32(0x3741dbdb), SPH_C32(0x96ade0e0), SPH_C32(0x9ec83232),
SPH_C32(0xa6e83a3a), SPH_C32(0x36280a0a), SPH_C32(0xe43f4949),
SPH_C32(0x12180606), SPH_C32(0xfc902424), SPH_C32(0x8f6b5c5c),
SPH_C32(0x7825c2c2), SPH_C32(0x0f61d3d3), SPH_C32(0x6986acac),
SPH_C32(0x35936262), SPH_C32(0xda729191), SPH_C32(0xc6629595),
SPH_C32(0x8abde4e4), SPH_C32(0x74ff7979), SPH_C32(0x83b1e7e7),
SPH_C32(0x4e0dc8c8), SPH_C32(0x85dc3737), SPH_C32(0x18af6d6d),
SPH_C32(0x8e028d8d), SPH_C32(0x1d79d5d5), SPH_C32(0xf1234e4e),
SPH_C32(0x7292a9a9), SPH_C32(0x1fab6c6c), SPH_C32(0xb9435656),
SPH_C32(0xfafdf4f4), SPH_C32(0xa085eaea), SPH_C32(0x208f6565),
SPH_C32(0x7df37a7a), SPH_C32(0x678eaeae), SPH_C32(0x38200808),
SPH_C32(0x0bdebaba), SPH_C32(0x73fb7878), SPH_C32(0xfb942525),
SPH_C32(0xcab82e2e), SPH_C32(0x54701c1c), SPH_C32(0x5faea6a6),
SPH_C32(0x21e6b4b4), SPH_C32(0x6435c6c6), SPH_C32(0xae8de8e8),
SPH_C32(0x2559dddd), SPH_C32(0x57cb7474), SPH_C32(0x5d7c1f1f),
SPH_C32(0xea374b4b), SPH_C32(0x1ec2bdbd), SPH_C32(0x9c1a8b8b),
SPH_C32(0x9b1e8a8a), SPH_C32(0x4bdb7070), SPH_C32(0xbaf83e3e),
SPH_C32(0x26e2b5b5), SPH_C32(0x29836666), SPH_C32(0xe33b4848),
SPH_C32(0x090c0303), SPH_C32(0xf4f5f6f6), SPH_C32(0x2a380e0e),
SPH_C32(0x3c9f6161), SPH_C32(0x8bd43535), SPH_C32(0xbe475757),
SPH_C32(0x02d2b9b9), SPH_C32(0xbf2e8686), SPH_C32(0x7129c1c1),
SPH_C32(0x53741d1d), SPH_C32(0xf74e9e9e), SPH_C32(0x91a9e1e1),
SPH_C32(0xdecdf8f8), SPH_C32(0xe5569898), SPH_C32(0x77441111),
SPH_C32(0x04bf6969), SPH_C32(0x3949d9d9), SPH_C32(0x870e8e8e),
SPH_C32(0xc1669494), SPH_C32(0xec5a9b9b), SPH_C32(0x5a781e1e),
SPH_C32(0xb82a8787), SPH_C32(0xa989e9e9), SPH_C32(0x5c15cece),
SPH_C32(0xb04f5555), SPH_C32(0xd8a02828), SPH_C32(0x2b51dfdf),
SPH_C32(0x89068c8c), SPH_C32(0x4ab2a1a1), SPH_C32(0x92128989),
SPH_C32(0x23340d0d), SPH_C32(0x10cabfbf), SPH_C32(0x84b5e6e6),
SPH_C32(0xd5134242), SPH_C32(0x03bb6868), SPH_C32(0xdc1f4141),
SPH_C32(0xe2529999), SPH_C32(0xc3b42d2d), SPH_C32(0x2d3c0f0f),
SPH_C32(0x3df6b0b0), SPH_C32(0xb74b5454), SPH_C32(0x0cdabbbb),
SPH_C32(0x62581616)
};
static const uint32_t mixtab3_cpu[] = {
SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777),
SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b),
SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030),
SPH_C32(0x01070401), SPH_C32(0x672e8767), SPH_C32(0x2bd1ac2b),
SPH_C32(0xfeccd5fe), SPH_C32(0xd71371d7), SPH_C32(0xab7c9aab),
SPH_C32(0x7659c376), SPH_C32(0xca4005ca), SPH_C32(0x82a33e82),
SPH_C32(0xc94909c9), SPH_C32(0x7d68ef7d), SPH_C32(0xfad0c5fa),
SPH_C32(0x59947f59), SPH_C32(0x47ce0747), SPH_C32(0xf0e6edf0),
SPH_C32(0xad6e82ad), SPH_C32(0xd41a7dd4), SPH_C32(0xa243bea2),
SPH_C32(0xaf608aaf), SPH_C32(0x9cf9469c), SPH_C32(0xa451a6a4),
SPH_C32(0x7245d372), SPH_C32(0xc0762dc0), SPH_C32(0xb728eab7),
SPH_C32(0xfdc5d9fd), SPH_C32(0x93d47a93), SPH_C32(0x26f29826),
SPH_C32(0x3682d836), SPH_C32(0x3fbdfc3f), SPH_C32(0xf7f3f1f7),
SPH_C32(0xcc521dcc), SPH_C32(0x348cd034), SPH_C32(0xa556a2a5),
SPH_C32(0xe58db9e5), SPH_C32(0xf1e1e9f1), SPH_C32(0x714cdf71),
SPH_C32(0xd83e4dd8), SPH_C32(0x3197c431), SPH_C32(0x156b5415),
SPH_C32(0x041c1004), SPH_C32(0xc76331c7), SPH_C32(0x23e98c23),
SPH_C32(0xc37f21c3), SPH_C32(0x18486018), SPH_C32(0x96cf6e96),
SPH_C32(0x051b1405), SPH_C32(0x9aeb5e9a), SPH_C32(0x07151c07),
SPH_C32(0x127e4812), SPH_C32(0x80ad3680), SPH_C32(0xe298a5e2),
SPH_C32(0xeba781eb), SPH_C32(0x27f59c27), SPH_C32(0xb233feb2),
SPH_C32(0x7550cf75), SPH_C32(0x093f2409), SPH_C32(0x83a43a83),
SPH_C32(0x2cc4b02c), SPH_C32(0x1a46681a), SPH_C32(0x1b416c1b),
SPH_C32(0x6e11a36e), SPH_C32(0x5a9d735a), SPH_C32(0xa04db6a0),
SPH_C32(0x52a55352), SPH_C32(0x3ba1ec3b), SPH_C32(0xd61475d6),
SPH_C32(0xb334fab3), SPH_C32(0x29dfa429), SPH_C32(0xe39fa1e3),
SPH_C32(0x2fcdbc2f), SPH_C32(0x84b12684), SPH_C32(0x53a25753),
SPH_C32(0xd10169d1), SPH_C32(0x00000000), SPH_C32(0xedb599ed),
SPH_C32(0x20e08020), SPH_C32(0xfcc2ddfc), SPH_C32(0xb13af2b1),
SPH_C32(0x5b9a775b), SPH_C32(0x6a0db36a), SPH_C32(0xcb4701cb),
SPH_C32(0xbe17cebe), SPH_C32(0x39afe439), SPH_C32(0x4aed334a),
SPH_C32(0x4cff2b4c), SPH_C32(0x58937b58), SPH_C32(0xcf5b11cf),
SPH_C32(0xd0066dd0), SPH_C32(0xefbb91ef), SPH_C32(0xaa7b9eaa),
SPH_C32(0xfbd7c1fb), SPH_C32(0x43d21743), SPH_C32(0x4df82f4d),
SPH_C32(0x3399cc33), SPH_C32(0x85b62285), SPH_C32(0x45c00f45),
SPH_C32(0xf9d9c9f9), SPH_C32(0x020e0802), SPH_C32(0x7f66e77f),
SPH_C32(0x50ab5b50), SPH_C32(0x3cb4f03c), SPH_C32(0x9ff04a9f),
SPH_C32(0xa87596a8), SPH_C32(0x51ac5f51), SPH_C32(0xa344baa3),
SPH_C32(0x40db1b40), SPH_C32(0x8f800a8f), SPH_C32(0x92d37e92),
SPH_C32(0x9dfe429d), SPH_C32(0x38a8e038), SPH_C32(0xf5fdf9f5),
SPH_C32(0xbc19c6bc), SPH_C32(0xb62feeb6), SPH_C32(0xda3045da),
SPH_C32(0x21e78421), SPH_C32(0x10704010), SPH_C32(0xffcbd1ff),
SPH_C32(0xf3efe1f3), SPH_C32(0xd20865d2), SPH_C32(0xcd5519cd),
SPH_C32(0x0c24300c), SPH_C32(0x13794c13), SPH_C32(0xecb29dec),
SPH_C32(0x5f86675f), SPH_C32(0x97c86a97), SPH_C32(0x44c70b44),
SPH_C32(0x17655c17), SPH_C32(0xc46a3dc4), SPH_C32(0xa758aaa7),
SPH_C32(0x7e61e37e), SPH_C32(0x3db3f43d), SPH_C32(0x64278b64),
SPH_C32(0x5d886f5d), SPH_C32(0x194f6419), SPH_C32(0x7342d773),
SPH_C32(0x603b9b60), SPH_C32(0x81aa3281), SPH_C32(0x4ff6274f),
SPH_C32(0xdc225ddc), SPH_C32(0x22ee8822), SPH_C32(0x2ad6a82a),
SPH_C32(0x90dd7690), SPH_C32(0x88951688), SPH_C32(0x46c90346),
SPH_C32(0xeebc95ee), SPH_C32(0xb805d6b8), SPH_C32(0x146c5014),
SPH_C32(0xde2c55de), SPH_C32(0x5e81635e), SPH_C32(0x0b312c0b),
SPH_C32(0xdb3741db), SPH_C32(0xe096ade0), SPH_C32(0x329ec832),
SPH_C32(0x3aa6e83a), SPH_C32(0x0a36280a), SPH_C32(0x49e43f49),
SPH_C32(0x06121806), SPH_C32(0x24fc9024), SPH_C32(0x5c8f6b5c),
SPH_C32(0xc27825c2), SPH_C32(0xd30f61d3), SPH_C32(0xac6986ac),
SPH_C32(0x62359362), SPH_C32(0x91da7291), SPH_C32(0x95c66295),
SPH_C32(0xe48abde4), SPH_C32(0x7974ff79), SPH_C32(0xe783b1e7),
SPH_C32(0xc84e0dc8), SPH_C32(0x3785dc37), SPH_C32(0x6d18af6d),
SPH_C32(0x8d8e028d), SPH_C32(0xd51d79d5), SPH_C32(0x4ef1234e),
SPH_C32(0xa97292a9), SPH_C32(0x6c1fab6c), SPH_C32(0x56b94356),
SPH_C32(0xf4fafdf4), SPH_C32(0xeaa085ea), SPH_C32(0x65208f65),
SPH_C32(0x7a7df37a), SPH_C32(0xae678eae), SPH_C32(0x08382008),
SPH_C32(0xba0bdeba), SPH_C32(0x7873fb78), SPH_C32(0x25fb9425),
SPH_C32(0x2ecab82e), SPH_C32(0x1c54701c), SPH_C32(0xa65faea6),
SPH_C32(0xb421e6b4), SPH_C32(0xc66435c6), SPH_C32(0xe8ae8de8),
SPH_C32(0xdd2559dd), SPH_C32(0x7457cb74), SPH_C32(0x1f5d7c1f),
SPH_C32(0x4bea374b), SPH_C32(0xbd1ec2bd), SPH_C32(0x8b9c1a8b),
SPH_C32(0x8a9b1e8a), SPH_C32(0x704bdb70), SPH_C32(0x3ebaf83e),
SPH_C32(0xb526e2b5), SPH_C32(0x66298366), SPH_C32(0x48e33b48),
SPH_C32(0x03090c03), SPH_C32(0xf6f4f5f6), SPH_C32(0x0e2a380e),
SPH_C32(0x613c9f61), SPH_C32(0x358bd435), SPH_C32(0x57be4757),
SPH_C32(0xb902d2b9), SPH_C32(0x86bf2e86), SPH_C32(0xc17129c1),
SPH_C32(0x1d53741d), SPH_C32(0x9ef74e9e), SPH_C32(0xe191a9e1),
SPH_C32(0xf8decdf8), SPH_C32(0x98e55698), SPH_C32(0x11774411),
SPH_C32(0x6904bf69), SPH_C32(0xd93949d9), SPH_C32(0x8e870e8e),
SPH_C32(0x94c16694), SPH_C32(0x9bec5a9b), SPH_C32(0x1e5a781e),
SPH_C32(0x87b82a87), SPH_C32(0xe9a989e9), SPH_C32(0xce5c15ce),
SPH_C32(0x55b04f55), SPH_C32(0x28d8a028), SPH_C32(0xdf2b51df),
SPH_C32(0x8c89068c), SPH_C32(0xa14ab2a1), SPH_C32(0x89921289),
SPH_C32(0x0d23340d), SPH_C32(0xbf10cabf), SPH_C32(0xe684b5e6),
SPH_C32(0x42d51342), SPH_C32(0x6803bb68), SPH_C32(0x41dc1f41),
SPH_C32(0x99e25299), SPH_C32(0x2dc3b42d), SPH_C32(0x0f2d3c0f),
SPH_C32(0xb03df6b0), SPH_C32(0x54b74b54), SPH_C32(0xbb0cdabb),
SPH_C32(0x16625816)
};
#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \
x22 ^= x00; \
x00 = (q); \
x08 ^= x00; \
x01 ^= x24; \
x04 ^= x27; \
x07 ^= x30; \
}
#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \
x00 ^= x04; \
x01 ^= x05; \
x02 ^= x06; \
x18 ^= x04; \
x19 ^= x05; \
x20 ^= x06; \
}
#define SMIX(x0, x1, x2, x3) { \
uint32_t c0 = 0; \
uint32_t c1 = 0; \
uint32_t c2 = 0; \
uint32_t c3 = 0; \
uint32_t r0 = 0; \
uint32_t r1 = 0; \
uint32_t r2 = 0; \
uint32_t r3 = 0; \
uint32_t tmp; \
tmp = mixtab0(x0 >> 24); \
c0 ^= tmp; \
tmp = mixtab1((x0 >> 16) & 0xFF); \
c0 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x0 >> 8) & 0xFF); \
c0 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x0 & 0xFF); \
c0 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x1 >> 24); \
c1 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x1 >> 16) & 0xFF); \
c1 ^= tmp; \
tmp = mixtab2((x1 >> 8) & 0xFF); \
c1 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x1 & 0xFF); \
c1 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x2 >> 24); \
c2 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x2 >> 16) & 0xFF); \
c2 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x2 >> 8) & 0xFF); \
c2 ^= tmp; \
tmp = mixtab3(x2 & 0xFF); \
c2 ^= tmp; \
r3 ^= tmp; \
tmp = mixtab0(x3 >> 24); \
c3 ^= tmp; \
r0 ^= tmp; \
tmp = mixtab1((x3 >> 16) & 0xFF); \
c3 ^= tmp; \
r1 ^= tmp; \
tmp = mixtab2((x3 >> 8) & 0xFF); \
c3 ^= tmp; \
r2 ^= tmp; \
tmp = mixtab3(x3 & 0xFF); \
c3 ^= tmp; \
x0 = ((c0 ^ r0) & SPH_C32(0xFF000000)) \
| ((c1 ^ r1) & SPH_C32(0x00FF0000)) \
| ((c2 ^ r2) & SPH_C32(0x0000FF00)) \
| ((c3 ^ r3) & SPH_C32(0x000000FF)); \
x1 = ((c1 ^ (r0 << 8)) & SPH_C32(0xFF000000)) \
| ((c2 ^ (r1 << 8)) & SPH_C32(0x00FF0000)) \
| ((c3 ^ (r2 << 8)) & SPH_C32(0x0000FF00)) \
| ((c0 ^ (r3 >> 24)) & SPH_C32(0x000000FF)); \
x2 = ((c2 ^ (r0 << 16)) & SPH_C32(0xFF000000)) \
| ((c3 ^ (r1 << 16)) & SPH_C32(0x00FF0000)) \
| ((c0 ^ (r2 >> 16)) & SPH_C32(0x0000FF00)) \
| ((c1 ^ (r3 >> 16)) & SPH_C32(0x000000FF)); \
x3 = ((c3 ^ (r0 << 24)) & SPH_C32(0xFF000000)) \
| ((c0 ^ (r1 >> 8)) & SPH_C32(0x00FF0000)) \
| ((c1 ^ (r2 >> 8)) & SPH_C32(0x0000FF00)) \
| ((c2 ^ (r3 >> 8)) & SPH_C32(0x000000FF)); \
}
#define ROR3 { \
B33 = S33, B34 = S34, B35 = S35; \
S35 = S32; S34 = S31; S33 = S30; S32 = S29; S31 = S28; S30 = S27; S29 = S26; S28 = S25; S27 = S24; \
S26 = S23; S25 = S22; S24 = S21; S23 = S20; S22 = S19; S21 = S18; S20 = S17; S19 = S16; S18 = S15; \
S17 = S14; S16 = S13; S15 = S12; S14 = S11; S13 = S10; S12 = S09; S11 = S08; S10 = S07; S09 = S06; \
S08 = S05; S07 = S04; S06 = S03; S05 = S02; S04 = S01; S03 = S00; S02 = B35; S01 = B34; S00 = B33; \
}
#define ROR8 { \
B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S27; S34 = S26; S33 = S25; S32 = S24; S31 = S23; S30 = S22; S29 = S21; S28 = S20; S27 = S19; \
S26 = S18; S25 = S17; S24 = S16; S23 = S15; S22 = S14; S21 = S13; S20 = S12; S19 = S11; S18 = S10; \
S17 = S09; S16 = S08; S15 = S07; S14 = S06; S13 = S05; S12 = S04; S11 = S03; S10 = S02; S09 = S01; \
S08 = S00; S07 = B35; S06 = B34; S05 = B33; S04 = B32; S03 = B31; S02 = B30; S01 = B29; S00 = B28; \
}
#define ROR9 { \
B27 = S27, B28 = S28, B29 = S29, B30 = S30, B31 = S31, B32 = S32, B33 = S33, B34 = S34, B35 = S35; \
S35 = S26; S34 = S25; S33 = S24; S32 = S23; S31 = S22; S30 = S21; S29 = S20; S28 = S19; S27 = S18; \
S26 = S17; S25 = S16; S24 = S15; S23 = S14; S22 = S13; S21 = S12; S20 = S11; S19 = S10; S18 = S09; \
S17 = S08; S16 = S07; S15 = S06; S14 = S05; S13 = S04; S12 = S03; S11 = S02; S10 = S01; S09 = S00; \
S08 = B35; S07 = B34; S06 = B33; S05 = B32; S04 = B31; S03 = B30; S02 = B29; S01 = B28; S00 = B27; \
}
#define FUGUE512_3(x, y, z) { \
TIX4(x, S00, S01, S04, S07, S08, S22, S24, S27, S30); \
CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17); \
SMIX(S33, S34, S35, S00); \
CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14); \
SMIX(S30, S31, S32, S33); \
CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11); \
SMIX(S27, S28, S29, S30); \
CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08); \
SMIX(S24, S25, S26, S27); \
\
TIX4(y, S24, S25, S28, S31, S32, S10, S12, S15, S18); \
CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05); \
SMIX(S21, S22, S23, S24); \
CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02); \
SMIX(S18, S19, S20, S21); \
CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35); \
SMIX(S15, S16, S17, S18); \
CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32); \
SMIX(S12, S13, S14, S15); \
\
TIX4(z, S12, S13, S16, S19, S20, S34, S00, S03, S06); \
CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29); \
SMIX(S09, S10, S11, S12); \
CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26); \
SMIX(S06, S07, S08, S09); \
CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23); \
SMIX(S03, S04, S05, S06); \
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); \
SMIX(S00, S01, S02, S03); \
}
/***************************************************/
// Die Hash-Funktion
__global__ void x13_fugue512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
extern __shared__ char mixtabs[];
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(mixTab0Tex, threadIdx.x);
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x);
__syncthreads();
int i;
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
#pragma unroll 16
for( i = 0; i < 16; i++ )
Hash[i] = SWAB32(Hash[i]);
uint32_t S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
uint32_t S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
uint32_t S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
uint32_t S30, S31, S32, S33, S34, S35;
uint32_t B27, B28, B29, B30, B31, B32, B33, B34, B35;
uint64_t bc = (uint64_t) 64 << 3;
uint32_t bclo = (uint32_t)(bc & 0xFFFFFFFFULL);
uint32_t bchi = (uint32_t)(bc >> 32);
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1);
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((Hash[0x0]), (Hash[0x1]), (Hash[0x2]));
FUGUE512_3((Hash[0x3]), (Hash[0x4]), (Hash[0x5]));
FUGUE512_3((Hash[0x6]), (Hash[0x7]), (Hash[0x8]));
FUGUE512_3((Hash[0x9]), (Hash[0xA]), (Hash[0xB]));
FUGUE512_3((Hash[0xC]), (Hash[0xD]), (Hash[0xE]));
FUGUE512_3((Hash[0xF]), bchi, bclo);
#pragma unroll 32
for (i = 0; i < 32; i ++) {
ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03);
}
#pragma unroll 13
for (i = 0; i < 13; i ++) {
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S28 ^= S00;
ROR8;
SMIX(S00, S01, S02, S03);
}
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
Hash[0] = SWAB32(S01);
Hash[1] = SWAB32(S02);
Hash[2] = SWAB32(S03);
Hash[3] = SWAB32(S04);
Hash[4] = SWAB32(S09);
Hash[5] = SWAB32(S10);
Hash[6] = SWAB32(S11);
Hash[7] = SWAB32(S12);
Hash[8] = SWAB32(S18);
Hash[9] = SWAB32(S19);
Hash[10] = SWAB32(S20);
Hash[11] = SWAB32(S21);
Hash[12] = SWAB32(S27);
Hash[13] = SWAB32(S28);
Hash[14] = SWAB32(S29);
Hash[15] = SWAB32(S30);
}
}
#define texDef(texname, texmem, texsource, texsize) \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
__host__ void x13_fugue512_cpu_init(int thr_id, int threads)
{
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
}
__host__ void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 4 * 256 * sizeof(uint32_t);
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x13_fugue512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

764
x13/cuda_x13_hamsi512.cu

@ -0,0 +1,764 @@
/*
* Quick and dirty addition of Hamsi-512 for X13
*
* Built on cbuchner1's implementation, actual hashing code
* heavily based on phm's sgminer
*
*/
/*
* X13 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 phm
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#define SPH_C64(x) ((uint64_t)(x ## ULL))
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
#if __CUDA_ARCH__ < 350
// Kepler (Compute 3.0)
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#else
// Kepler (Compute 3.5)
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
#endif
static __constant__ uint32_t d_alpha_n[32];
static __constant__ uint32_t d_alpha_f[32];
static __constant__ uint32_t d_T512[64][16];
static const uint32_t alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const uint32_t alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define hamsi_s00 m0
#define hamsi_s01 m1
#define hamsi_s02 c0
#define hamsi_s03 c1
#define hamsi_s04 m2
#define hamsi_s05 m3
#define hamsi_s06 c2
#define hamsi_s07 c3
#define hamsi_s08 c4
#define hamsi_s09 c5
#define hamsi_s0A m4
#define hamsi_s0B m5
#define hamsi_s0C c6
#define hamsi_s0D c7
#define hamsi_s0E m6
#define hamsi_s0F m7
#define hamsi_s10 m8
#define hamsi_s11 m9
#define hamsi_s12 c8
#define hamsi_s13 c9
#define hamsi_s14 mA
#define hamsi_s15 mB
#define hamsi_s16 cA
#define hamsi_s17 cB
#define hamsi_s18 cC
#define hamsi_s19 cD
#define hamsi_s1A mC
#define hamsi_s1B mD
#define hamsi_s1C cE
#define hamsi_s1D cF
#define hamsi_s1E mE
#define hamsi_s1F mF
#define SBOX(a, b, c, d) { \
uint32_t t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
}
#define HAMSI_L(a, b, c, d) { \
(a) = ROTL32(a, 13); \
(c) = ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = ROTL32(b, 1); \
(d) = ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = ROTL32(a, 5); \
(c) = ROTL32(c, 22); \
}
#define ROUND_BIG(rc, alpha) { \
hamsi_s00 ^= alpha[0x00]; \
hamsi_s01 ^= alpha[0x01] ^ (uint32_t)(rc); \
hamsi_s02 ^= alpha[0x02]; \
hamsi_s03 ^= alpha[0x03]; \
hamsi_s04 ^= alpha[0x04]; \
hamsi_s05 ^= alpha[0x05]; \
hamsi_s06 ^= alpha[0x06]; \
hamsi_s07 ^= alpha[0x07]; \
hamsi_s08 ^= alpha[0x08]; \
hamsi_s09 ^= alpha[0x09]; \
hamsi_s0A ^= alpha[0x0A]; \
hamsi_s0B ^= alpha[0x0B]; \
hamsi_s0C ^= alpha[0x0C]; \
hamsi_s0D ^= alpha[0x0D]; \
hamsi_s0E ^= alpha[0x0E]; \
hamsi_s0F ^= alpha[0x0F]; \
hamsi_s10 ^= alpha[0x10]; \
hamsi_s11 ^= alpha[0x11]; \
hamsi_s12 ^= alpha[0x12]; \
hamsi_s13 ^= alpha[0x13]; \
hamsi_s14 ^= alpha[0x14]; \
hamsi_s15 ^= alpha[0x15]; \
hamsi_s16 ^= alpha[0x16]; \
hamsi_s17 ^= alpha[0x17]; \
hamsi_s18 ^= alpha[0x18]; \
hamsi_s19 ^= alpha[0x19]; \
hamsi_s1A ^= alpha[0x1A]; \
hamsi_s1B ^= alpha[0x1B]; \
hamsi_s1C ^= alpha[0x1C]; \
hamsi_s1D ^= alpha[0x1D]; \
hamsi_s1E ^= alpha[0x1E]; \
hamsi_s1F ^= alpha[0x1F]; \
SBOX(hamsi_s00, hamsi_s08, hamsi_s10, hamsi_s18); \
SBOX(hamsi_s01, hamsi_s09, hamsi_s11, hamsi_s19); \
SBOX(hamsi_s02, hamsi_s0A, hamsi_s12, hamsi_s1A); \
SBOX(hamsi_s03, hamsi_s0B, hamsi_s13, hamsi_s1B); \
SBOX(hamsi_s04, hamsi_s0C, hamsi_s14, hamsi_s1C); \
SBOX(hamsi_s05, hamsi_s0D, hamsi_s15, hamsi_s1D); \
SBOX(hamsi_s06, hamsi_s0E, hamsi_s16, hamsi_s1E); \
SBOX(hamsi_s07, hamsi_s0F, hamsi_s17, hamsi_s1F); \
HAMSI_L(hamsi_s00, hamsi_s09, hamsi_s12, hamsi_s1B); \
HAMSI_L(hamsi_s01, hamsi_s0A, hamsi_s13, hamsi_s1C); \
HAMSI_L(hamsi_s02, hamsi_s0B, hamsi_s14, hamsi_s1D); \
HAMSI_L(hamsi_s03, hamsi_s0C, hamsi_s15, hamsi_s1E); \
HAMSI_L(hamsi_s04, hamsi_s0D, hamsi_s16, hamsi_s1F); \
HAMSI_L(hamsi_s05, hamsi_s0E, hamsi_s17, hamsi_s18); \
HAMSI_L(hamsi_s06, hamsi_s0F, hamsi_s10, hamsi_s19); \
HAMSI_L(hamsi_s07, hamsi_s08, hamsi_s11, hamsi_s1A); \
HAMSI_L(hamsi_s00, hamsi_s02, hamsi_s05, hamsi_s07); \
HAMSI_L(hamsi_s10, hamsi_s13, hamsi_s15, hamsi_s16); \
HAMSI_L(hamsi_s09, hamsi_s0B, hamsi_s0C, hamsi_s0E); \
HAMSI_L(hamsi_s19, hamsi_s1A, hamsi_s1C, hamsi_s1F); \
}
#define P_BIG { \
ROUND_BIG(0, d_alpha_n); \
ROUND_BIG(1, d_alpha_n); \
ROUND_BIG(2, d_alpha_n); \
ROUND_BIG(3, d_alpha_n); \
ROUND_BIG(4, d_alpha_n); \
ROUND_BIG(5, d_alpha_n); \
}
#define PF_BIG { \
ROUND_BIG(0, d_alpha_f); \
ROUND_BIG(1, d_alpha_f); \
ROUND_BIG(2, d_alpha_f); \
ROUND_BIG(3, d_alpha_f); \
ROUND_BIG(4, d_alpha_f); \
ROUND_BIG(5, d_alpha_f); \
ROUND_BIG(6, d_alpha_f); \
ROUND_BIG(7, d_alpha_f); \
ROUND_BIG(8, d_alpha_f); \
ROUND_BIG(9, d_alpha_f); \
ROUND_BIG(10, d_alpha_f); \
ROUND_BIG(11, d_alpha_f); \
}
#define T_BIG { \
/* order is important */ \
cF = (h[0xF] ^= hamsi_s17); \
cE = (h[0xE] ^= hamsi_s16); \
cD = (h[0xD] ^= hamsi_s15); \
cC = (h[0xC] ^= hamsi_s14); \
cB = (h[0xB] ^= hamsi_s13); \
cA = (h[0xA] ^= hamsi_s12); \
c9 = (h[0x9] ^= hamsi_s11); \
c8 = (h[0x8] ^= hamsi_s10); \
c7 = (h[0x7] ^= hamsi_s07); \
c6 = (h[0x6] ^= hamsi_s06); \
c5 = (h[0x5] ^= hamsi_s05); \
c4 = (h[0x4] ^= hamsi_s04); \
c3 = (h[0x3] ^= hamsi_s03); \
c2 = (h[0x2] ^= hamsi_s02); \
c1 = (h[0x1] ^= hamsi_s01); \
c0 = (h[0x0] ^= hamsi_s00); \
}
static const uint32_t T512[64][16] = {
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
SPH_C32(0x9e69af68) },
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
SPH_C32(0x0c26f262) },
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
SPH_C32(0xdc24e61f) },
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
SPH_C32(0x3daac2da) },
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
SPH_C32(0x78cace29) },
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
SPH_C32(0x2dd1f9ab) },
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
SPH_C32(0xbf2c0be2) },
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
SPH_C32(0x32219526) },
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
SPH_C32(0xac8e6c88) },
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
SPH_C32(0x7b1bd6b9) },
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
SPH_C32(0xf746c320) },
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
SPH_C32(0x69505b3a) },
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
SPH_C32(0x8a341574) },
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
SPH_C32(0x450360bf) },
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
SPH_C32(0xf3d45758) },
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
SPH_C32(0x925c44e9) },
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
SPH_C32(0xa123ff9f) },
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
SPH_C32(0x1568ff0f) },
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
SPH_C32(0xc5c1eb3e) },
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
SPH_C32(0x1af21fe1) },
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
SPH_C32(0x857f3c2b) },
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
SPH_C32(0x2ba05a55) },
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
SPH_C32(0xfeabf254) },
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
SPH_C32(0xfe1cdc7f) },
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
SPH_C32(0xb0a51834) },
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
SPH_C32(0xa6b8c28d) },
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
SPH_C32(0x3a4e99d7) },
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
SPH_C32(0xe1844257) },
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
SPH_C32(0x2c3b504e) },
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
SPH_C32(0x524a0d59) },
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
SPH_C32(0x378dd173) },
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
SPH_C32(0x8b6c72bd) },
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
SPH_C32(0x8e67b7fa) },
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
SPH_C32(0x443d3004) },
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
SPH_C32(0xf4f6ea7b) },
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
SPH_C32(0x979961d0) },
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
SPH_C32(0x98aa496e) },
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
SPH_C32(0x094e3198) },
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
SPH_C32(0xe86cba2e) },
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
SPH_C32(0x4b7eec55) },
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
SPH_C32(0x1e7536a6) },
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
SPH_C32(0x24314f17) },
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
SPH_C32(0x9075b1ce) },
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
SPH_C32(0x9b6ef888) },
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
SPH_C32(0xd8b61463) },
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
SPH_C32(0x3ea660f7) },
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
SPH_C32(0x7f975691) },
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
SPH_C32(0x2c94459e) },
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
SPH_C32(0x56a7b19f) },
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
SPH_C32(0x81fdf908) },
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
SPH_C32(0x5bd61539) },
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
SPH_C32(0x15b961e7) },
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
SPH_C32(0x2a2c18f0) },
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
SPH_C32(0x551e3d6e) },
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
SPH_C32(0x33c5244f) },
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
SPH_C32(0x8a58e6a4) },
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
SPH_C32(0xda878000) },
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
SPH_C32(0x3c5dfffe) },
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
SPH_C32(0x7b1675d7) },
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
SPH_C32(0x2879ebac) },
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
SPH_C32(0xbe0a679e) },
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
SPH_C32(0x30aebcf7) },
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
SPH_C32(0xc7ff60f0) },
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
SPH_C32(0xe7e00a94) }
};
#define INPUT_BIG { \
const uint32_t *tp = &d_T512[0][0]; \
unsigned u, v; \
m0 = 0; \
m1 = 0; \
m2 = 0; \
m3 = 0; \
m4 = 0; \
m5 = 0; \
m6 = 0; \
m7 = 0; \
m8 = 0; \
m9 = 0; \
mA = 0; \
mB = 0; \
mC = 0; \
mD = 0; \
mE = 0; \
mF = 0; \
for (u = 0; u < 8; u ++) { \
unsigned db = buf(u); \
for (v = 0; v < 8; v ++, db >>= 1) { \
uint32_t dm = SPH_T32(-(uint32_t)(db & 1)); \
m0 ^= dm & *tp ++; \
m1 ^= dm & *tp ++; \
m2 ^= dm & *tp ++; \
m3 ^= dm & *tp ++; \
m4 ^= dm & *tp ++; \
m5 ^= dm & *tp ++; \
m6 ^= dm & *tp ++; \
m7 ^= dm & *tp ++; \
m8 ^= dm & *tp ++; \
m9 ^= dm & *tp ++; \
mA ^= dm & *tp ++; \
mB ^= dm & *tp ++; \
mC ^= dm & *tp ++; \
mD ^= dm & *tp ++; \
mE ^= dm & *tp ++; \
mF ^= dm & *tp ++; \
} \
} \
}
/***************************************************/
// Die Hash-Funktion
__global__ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
unsigned char *h1 = (unsigned char *)Hash;
uint32_t c0 = SPH_C32(0x73746565), c1 = SPH_C32(0x6c706172), c2 = SPH_C32(0x6b204172), c3 = SPH_C32(0x656e6265);
uint32_t c4 = SPH_C32(0x72672031), c5 = SPH_C32(0x302c2062), c6 = SPH_C32(0x75732032), c7 = SPH_C32(0x3434362c);
uint32_t c8 = SPH_C32(0x20422d33), c9 = SPH_C32(0x30303120), cA = SPH_C32(0x4c657576), cB = SPH_C32(0x656e2d48);
uint32_t cC = SPH_C32(0x65766572), cD = SPH_C32(0x6c65652c), cE = SPH_C32(0x2042656c), cF = SPH_C32(0x6769756d);
uint32_t m0, m1, m2, m3, m4, m5, m6, m7;
uint32_t m8, m9, mA, mB, mC, mD, mE, mF;
uint32_t h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
#define buf(u) (h1[i+u])
#pragma unroll 8
for(int i = 0; i < 64; i += 8) {
INPUT_BIG;
P_BIG;
T_BIG;
}
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG;
P_BIG;
T_BIG;
#undef buf
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG;
PF_BIG;
T_BIG;
#pragma unroll 16
for (int i = 0; i < 16; i++)
Hash[i] = SWAB32(h[i]);
}
}
__host__ void x13_hamsi512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol( d_alpha_n, alpha_n, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_alpha_f, alpha_f, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_T512, T512, sizeof(uint32_t)*64*16, 0, cudaMemcpyHostToDevice);
}
__host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
x13_hamsi512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

284
x13/x13.cu

@ -0,0 +1,284 @@
/*
* X13 algorithm built on cbuchner1's original X11
*
*/
extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
#include "miner.h"
}
// aus cpu-miner.c
extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_init(int thr_id, int threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_keccak512_cpu_init(int thr_id, int threads);
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_luffa512_cpu_init(int thr_id, int threads);
extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_hamsi512_cpu_init(int thr_id, int threads);
extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_init(int thr_id, int threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_check_cpu_init(int thr_id, int threads);
extern void quark_check_cpu_setTarget(const void *ptarget);
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
int order);
// X13 Hashfunktion
inline void x13hash(void *state, const void *input)
{
// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13
sph_blake512_context ctx_blake;
sph_bmw512_context ctx_bmw;
sph_groestl512_context ctx_groestl;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
sph_luffa512_context ctx_luffa;
sph_cubehash512_context ctx_cubehash;
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_echo512_context ctx_echo;
sph_hamsi512_context ctx_hamsi;
sph_fugue512_context ctx_fugue;
uint32_t hash[16];
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, (void*) hash);
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash);
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_luffa512_init(&ctx_luffa);
// ZLUFFA;
sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
sph_luffa512_close (&ctx_luffa, (void*) hash);
sph_cubehash512_init(&ctx_cubehash);
// ZCUBEHASH;
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
sph_shavite512_init(&ctx_shavite);
// ZSHAVITE;
sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx_shavite, (void*) hash);
sph_simd512_init(&ctx_simd);
// ZSIMD
sph_simd512 (&ctx_simd, (const void*) hash, 64);
sph_simd512_close(&ctx_simd, (void*) hash);
sph_echo512_init(&ctx_echo);
// ZECHO
sph_echo512 (&ctx_echo, (const void*) hash, 64);
sph_echo512_close(&ctx_echo, (void*) hash);
sph_hamsi512_init(&ctx_hamsi);
sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
sph_hamsi512_close(&ctx_hamsi, (void*) hash);
sph_fugue512_init(&ctx_fugue);
sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
sph_fugue512_close(&ctx_fugue, (void*) hash);
memcpy(state, hash, 32);
}
extern bool opt_benchmark;
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
//unsigned char echobefore[64], echoafter[64];
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
quark_blake512_cpu_setBlock_80((void*)endiandata);
quark_check_cpu_setTarget(ptarget);
do {
int order = 0;
// erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
x13hash(vhash64, endiandata);
if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
Loading…
Cancel
Save