Implement x14 (cuda + cpu functions)

Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
10 years ago · 06763c20b1
32 changed files with 8179 additions and 436 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -29,16 +29,18 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
 			  myriadgroestl.cpp cuda_myriadgroestl.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
-			  JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \
+			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
 			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
 			  quark/cuda_quark_compactionTest.cu \
 			  cuda_nist5.cu \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  sph/shabal.c sph/whirlpool.c \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu
 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
--- a/README.md
+++ b/README.md
@ -2,3 +2,6 @@ ccminer
 =======
 Christian Buchner's &amp; Christian H.'s CUDA miner project
 Fork by tpruvot@github with X14 support
   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
--- a/README.txt
+++ b/README.txt
@ -19,6 +19,10 @@ If you find this tool useful and like to support its continued
   SFR donation address: SR4b87aEnPfTs77bo9NnnaV21fiF6jQpAp
   MNC donation address: MShgNUSYwybEbXLvJUtdNg1a7rUeiNgooK
   BTQ donation address: 13GFwLiZL2DaA9XeE733PNrQX5QYLFsonS
 X14/X15 (tpruvot@github)
   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
 ***************************************************************
 >>> Introduction <<<
@ -88,6 +92,7 @@ its command line interface and options.
  -P, --protocol-dump   verbose dump of protocol-level activities
  -B, --background      run the miner in the background
      --benchmark       run in offline benchmark mode
      --cputest         debug hashes from cpu algorithms
  -c, --config=FILE     load a JSON-format configuration file
  -V, --version         display version information and exit
  -h, --help            display this help text and exit
--- a/ccminer.sln
+++ b/ccminer.sln
@ -1,6 +1,7 @@
-
+Microsoft Visual Studio Solution File, Format Version 12.00
-Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2013
-# Visual Studio 2010
+VisualStudioVersion = 12.0.30723.0
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ccminer", "ccminer.vcxproj", "{36DC07F9-A4A6-4877-A146-1B960083CF6F}"
 EndProject
 Global
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -1,53 +1,61 @@
 <?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
+    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Debug</Configuration>
+      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
+    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{36DC07F9-A4A6-4877-A146-1B960083CF6F}</ProjectGuid>
    <RootNamespace>ccminer</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
+    <UseDebugLibraries>false</UseDebugLibraries>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
    <PlatformToolset>v120</PlatformToolset>
    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <CharacterSet>MultiByte</CharacterSet>
    <PlatformToolset>v120</PlatformToolset>
    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
    <PlatformToolset>v120</PlatformToolset>
    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
+    <UseDebugLibraries>true</UseDebugLibraries>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
    <PlatformToolset>v120</PlatformToolset>
    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -64,6 +72,8 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
    <IncludePath>$(CUDA_INC_PATH);$(IncludePath)</IncludePath>
    <LibraryPath>$(CUDA_LIB_PATH);$(LibraryPath)</LibraryPath>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
@ -92,17 +102,15 @@
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
    </CudaCompile>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
+      <Keep>false</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
      <Include>
      </Include>
    </CudaCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -123,7 +131,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
@ -142,7 +150,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;MAXWELL_OR_FERMI=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
@ -158,18 +166,23 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
    </CudaCompile>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
+      <Keep>false</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
-      <Include>
+      <Defines>--optimize 2</Defines>
-      </Include>
+    </CudaCompile>
-    </CudaCompile>
+    <CudaLink>
      <GPUDebugInfo>false</GPUDebugInfo>
    </CudaLink>
    <CudaLink>
      <Optimization>O2</Optimization>
    </CudaLink>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
@ -193,7 +206,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
@ -201,7 +214,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
      <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
      <Include>
      </Include>
    </CudaCompile>
@ -216,10 +229,12 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="compat\jansson\utf.c" />
    <ClCompile Include="compat\jansson\value.c" />
    <ClCompile Include="cpu-miner.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
      <TreatWChar_tAsBuiltInType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</TreatWChar_tAsBuiltInType>
      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
    </ClCompile>
    <ClCompile Include="fuguecoin.cpp" />
    <ClCompile Include="groestlcoin.cpp" />
@ -227,10 +242,10 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="myriadgroestl.cpp" />
    <ClCompile Include="scrypt.c" />
    <ClCompile Include="sha2.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
    <ClCompile Include="sph\aes_helper.c" />
    <ClCompile Include="sph\blake.c" />
@ -242,16 +257,21 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="sph\jh.c" />
    <ClCompile Include="sph\keccak.c" />
    <ClCompile Include="sph\luffa.c" />
    <ClCompile Include="sph\shabal.c" />
    <ClCompile Include="sph\shavite.c" />
    <ClCompile Include="sph\simd.c" />
    <ClCompile Include="sph\skein.c" />
    <ClCompile Include="sph\hamsi.c" />
    <ClCompile Include="sph\hamsi_helper.c" />
    <ClCompile Include="sph\whirlpool.c" />
    <ClCompile Include="sph\x15_helper.c">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="util.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
@ -277,241 +297,263 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClInclude Include="sph\sph_bmw.h" />
    <ClInclude Include="sph\sph_cubehash.h" />
    <ClInclude Include="sph\sph_echo.h" />
    <ClInclude Include="sph\sph_fugue.h" />
    <ClInclude Include="sph\sph_groestl.h" />
    <ClInclude Include="sph\sph_jh.h" />
    <ClInclude Include="sph\sph_keccak.h" />
    <ClInclude Include="sph\sph_luffa.h" />
    <ClInclude Include="sph\sph_shabal.h" />
    <ClInclude Include="sph\sph_shavite.h" />
    <ClInclude Include="sph\sph_simd.h" />
    <ClInclude Include="sph\sph_skein.h" />
    <ClInclude Include="sph\sph_hamsi.h" />
    <ClInclude Include="sph\sph_types.h" />
    <ClInclude Include="sph\sph_whirlpool.h" />
    <ClInclude Include="uint256.h" />
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda_fugue256.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_groestlcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_myriadgroestl.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_nist5.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_blake512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_combine.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_groestl512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_hefty1.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_sha256.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\heavy.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\jackpotcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\animecoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_bmw512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_jh512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_blake512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
-    </CudaCompile>
+    </CudaCompile>
-    <CudaCompile Include="quark\cuda_quark_checkhash.cu">
+    <CudaCompile Include="quark\cuda_checkhash.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <CInterleavedPTX>false</CInterleavedPTX>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_groestl512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_skein512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\quarkcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_aes.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <ExcludedFromBuild>true</ExcludedFromBuild>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_cubehash512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_echo.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_luffa512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_shavite512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">128</MaxRegCount>
      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">128</MaxRegCount>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\simd_functions.cu">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild>true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\x11.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\cuda_x13_fugue512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\x13.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x15\cuda_x14_shabal512.cu">
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x15\cuda_x15_whirlpool.cu">
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x15\x14.cu">
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x15\x15.cu">
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
  </ImportGroup>
 </Project>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -58,6 +58,9 @@
    <Filter Include="Source Files\CUDA\x13">
      <UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
    </Filter>
    <Filter Include="Source Files\CUDA\x15">
      <UniqueIdentifier>{a2403c22-6777-46ab-a55a-3fcc7386c974}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="compat\jansson\dump.c">
@ -153,6 +156,15 @@
    <ClCompile Include="sph\hamsi_helper.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
    <ClCompile Include="sph\shabal.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
    <ClCompile Include="sph\whirlpool.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
    <ClCompile Include="sph\x15_helper.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="compat.h">
@ -254,6 +266,12 @@
    <ClInclude Include="sph\sph_hamsi.h">
      <Filter>Header Files\sph</Filter>
    </ClInclude>
    <ClInclude Include="sph\sph_shabal.h">
      <Filter>Header Files\sph</Filter>
    </ClInclude>
    <ClInclude Include="sph\sph_whirlpool.h">
      <Filter>Header Files\sph</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda_fugue256.cu">
@ -268,9 +286,6 @@
    <CudaCompile Include="JHA\jackpotcoin.cu">
      <Filter>Source Files\CUDA\JHA</Filter>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_checkhash.cu">
      <Filter>Source Files\CUDA\quark</Filter>
    </CudaCompile>
    <CudaCompile Include="cuda_myriadgroestl.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
@ -361,5 +376,20 @@
    <CudaCompile Include="x13\x13.cu">
      <Filter>Source Files\CUDA\x13</Filter>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_checkhash.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="x15\x14.cu">
      <Filter>Source Files\CUDA\x15</Filter>
    </CudaCompile>
    <CudaCompile Include="x15\cuda_x14_shabal512.cu">
      <Filter>Source Files\CUDA\x15</Filter>
    </CudaCompile>
    <CudaCompile Include="x15\x15.cu">
      <Filter>Source Files\CUDA\x15</Filter>
    </CudaCompile>
    <CudaCompile Include="x15\cuda_x15_whirlpool.cu">
      <Filter>Source Files\CUDA\x15</Filter>
    </CudaCompile>
  </ItemGroup>
 </Project>
--- a/compat/thrust/system/detail/generic/find.inl
+++ b/compat/thrust/system/detail/generic/find.inl
@ -86,7 +86,7 @@ InputIterator find_if(thrust::execution_policy<DerivedPolicy> &exec,
    // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32
    const difference_type interval_threshold = 1 << 20;
-    const difference_type interval_size = (std::min)(interval_threshold, n);
+    const difference_type interval_size = min(interval_threshold, n);
    // force transform_iterator output to bool
    typedef thrust::transform_iterator<Predicate, InputIterator, bool> XfrmIterator;
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -21,6 +21,7 @@
 #include <time.h>
 #ifdef WIN32
 #include <windows.h>
 #include <stdint.h>
 #else
 #include <errno.h>
 #include <signal.h>
@ -133,6 +134,8 @@ typedef enum {
 	ALGO_NIST5,
 	ALGO_X11,
 	ALGO_X13,
 	ALGO_X14,
 	ALGO_X15,
 	ALGO_DMD_GR,
 } sha256_algos;
@ -148,6 +151,8 @@ static const char *algo_names[] = {
 	"nist5",
 	"x11",
 	"x13",
 	"x14",
 	"x15",
 	"dmd-gr",
 };
@ -222,6 +227,8 @@ Options:\n\
                        nist5     NIST5 (TalkCoin) hash\n\
                        x11       X11 (DarkCoin) hash\n\
                        x13       X13 (MaruCoin) hash\n\
                        x14       X14 hash\n\
                        x15       X15 hash\n\
                        dmd-gr    Diamond-Groestl hash\n\
  -d, --devices         takes a comma separated list of CUDA devices to use.\n\
                        Device IDs start counting from 0! Alternatively takes\n\
@ -258,6 +265,7 @@ Options:\n\
 #endif
 "\
      --benchmark       run in offline benchmark mode\n\
      --cputest         debug hashes from cpu algorithms\n\
  -c, --config=FILE     load a JSON-format configuration file\n\
  -V, --version         display version information and exit\n\
  -h, --help            display this help text and exit\n\
@ -278,6 +286,7 @@ static struct option const options[] = {
 	{ "background", 0, NULL, 'B' },
 #endif
 	{ "benchmark", 0, NULL, 1005 },
 	{ "cputest", 0, NULL, 1006 },
 	{ "cert", 1, NULL, 1001 },
 	{ "config", 1, NULL, 'c' },
 	{ "debug", 0, NULL, 'D' },
@ -924,6 +933,17 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;
 		case ALGO_X14:
 			rc = scanhash_x14(thr_id, work.data, work.target,
 				max_nonce, &hashes_done);
 			break;
 		case ALGO_X15:
 			rc = scanhash_x15(thr_id, work.data, work.target,
 				max_nonce, &hashes_done);
 			exit(0);
 			break;
 		default:
 			/* should never happen */
 			goto out;
@ -1345,6 +1365,10 @@ static void parse_arg (int key, char *arg)
 		want_stratum = false;
 		have_stratum = false;
 		break;
 	case 1006:
 		print_hash_tests();
 		exit(0);
 		break;
 	case 1003:
 		want_longpoll = false;
 		break;
@ -1481,26 +1505,26 @@ static void signal_handler(int sig)
 }
 #endif
-#define PROGRAM_VERSION "1.2"
+#define PROGRAM_VERSION "1.2-VC12"
 int main(int argc, char *argv[])
 {
 	struct thr_info *thr;
 	long flags;
 	int i;
 	printf("*** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
 	printf("\t This is version "PROGRAM_VERSION" (tpruvot@github)\n");
 #ifdef WIN32
-	SYSTEM_INFO sysinfo;
+	printf("\t  Built with VC++ 2013 and nVidia CUDA SDK 6.5 RC (DC 5.0)\n\n");
 #else
 	printf("\t  Built with the nVidia CUDA SDK 6.5 RC\n\n");
 #endif
 	printf("     *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
 	printf("\t             This is version "PROGRAM_VERSION" (beta)\n");
 	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
-	printf("\t  based on pooler-cpuminer extension for HVC from\n\t       https://github.com/heavycoin/cpuminer-heavycoin\n");
+	printf("\t  based on pooler-cpuminer extension for HVC from http://hvc.1gh.com/" "\n\n");
 	printf("\t\t\tand\n\t       http://hvc.1gh.com/\n");
 	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
 	printf("\t  LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n");
 	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
-	printf("\t  YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n");
+	printf("\tCuda X14 and X15 added by Tanguy Pruvot (also in cpuminer-multi)\n");
 	printf("\t  BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");
 	rpc_user = strdup("");
 	rpc_pass = strdup("");
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -152,7 +152,7 @@
 #define PACKAGE_NAME "ccminer"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.06.15"
+#define PACKAGE_STRING "ccminer 2014.08.12"
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
@ -161,7 +161,7 @@
 #undef PACKAGE_URL
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.06.15"
+#define PACKAGE_VERSION "2014.08.12-VC12"
 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@ -1,10 +1,15 @@
 #include <string.h>
 #include <openssl/sha.h>
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
 #include <stdio.h>
 #include <memory.h>
 #include <string.h>
 #include <map>
 #include <openssl/sha.h>
 #ifndef _WIN32
 #include <unistd.h>
 #endif
@ -337,7 +342,7 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
    blake512_cpu_setBlock(pdata, blocklen);
    do {
-        int i;
+        uint32_t i;
        ////// Compaction init
        thrust::device_ptr<uint32_t> devNoncePtr(d_nonceVector[thr_id]);
--- a/miner.h
+++ b/miner.h
@ -243,6 +243,14 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern int scanhash_x14(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern int scanhash_x15(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
@ -342,6 +350,12 @@ extern void *tq_pop(struct thread_q *tq, const struct timespec *abstime);
 extern void tq_freeze(struct thread_q *tq);
 extern void tq_thaw(struct thread_q *tq);
 void print_hash_tests(void);
 void x11hash(void *output, const void *input);
 void x13hash(void *output, const void *input);
 void x14hash(void *output, const void *input);
 void x15hash(void *output, const void *input);
 #ifdef __cplusplus
 }
 #endif
--- a/quark/cuda_bmw512.cu
+++ b/quark/cuda_bmw512.cu
@ -23,6 +23,13 @@ static __device__ uint32_t cuda_swab32(uint32_t x)
 {
 	return __byte_perm(x, 0, 0x0123);
 }
 // das Hi Word in einem 64 Bit Typen ersetzen
 static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
 }
 #if 0
 // Endian Drehung für 64 Bit Typen
 static __device__ unsigned long long cuda_swab64(unsigned long long x) {
    uint32_t h = (x >> 32);
@ -39,11 +46,6 @@ static __device__ uint32_t HIWORD(const unsigned long long &x) {
 #endif
 }
 // das Hi Word in einem 64 Bit Typen ersetzen
 static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
 }
 // das Lo Word aus einem 64 Bit Typen extrahieren
 static __device__ uint32_t LOWORD(const unsigned long long &x) {
 #if __CUDA_ARCH__ >= 130
@ -66,6 +68,7 @@ static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 static __device__ unsigned long long REPLACE_LOWORD(const unsigned long long &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((unsigned long long)y);
 }
 #endif
 // der Versuch, einen Wrapper für einen aus 32 Bit Registern zusammengesetzten uin64_t Typen zu entferfen...
 #if 1
--- a/quark/cuda_quark_checkhash.cu
+++ b/quark/cuda_quark_checkhash.cu
@ -3,14 +3,10 @@
 #include "device_launch_parameters.h"
 #include <stdio.h>
 #include <stdint.h>
 #include <memory.h>
-// Folgende Definitionen später durch header ersetzen
+// Hash Target gegen das wir testen sollen
 typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 // das Hash Target gegen das wir testen sollen
 __constant__ uint32_t pTarget[8];
 uint32_t *d_resNounce[8];
@ -19,7 +15,7 @@ uint32_t *h_resNounce[8];
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-__global__ void quark_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
+__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
 {
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@ -89,7 +85,7 @@ __host__ uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t star
 	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 0;
-	quark_check_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
+	cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
 	// Strategisches Sleep Kommando zur Senkung der CPU Last
 	MyStreamSynchronize(NULL, order, thr_id);
--- a/quark/cuda_quark_blake512.cu
+++ b/quark/cuda_quark_blake512.cu
@ -64,11 +64,12 @@ static __device__ uint32_t LOWORD(const uint64_t &x) {
 	return (uint32_t)(x & 0xFFFFFFFFULL);
 #endif
 }
-
+#if 0
 // das Lo Word in einem 64 Bit Typen ersetzen
 static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
 }
 #endif
 __device__ __forceinline__ uint64_t SWAP64(uint64_t x)
 {
--- a/quark/cuda_quark_groestl512.cu
+++ b/quark/cuda_quark_groestl512.cu
@ -1,7 +1,7 @@
 // Auf QuarkCoin spezialisierte Version von Groestl inkl. Bitslice
 #include <cuda.h>
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
 #include "device_launch_parameters.h"
 #include <stdio.h>
--- a/sph/shabal.c
+++ b/sph/shabal.c
@ -0,0 +1,810 @@
 /* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
 /*
 * Shabal implementation.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */
 #include <stddef.h>
 #include <string.h>
 #include "sph_shabal.h"
 #ifdef __cplusplus
 extern "C"{
 #endif
 #ifdef _MSC_VER
 #pragma warning (disable: 4146)
 #endif
 /*
 * Part of this code was automatically generated (the part between
 * the "BEGIN" and "END" markers).
 */
 #define sM    16
 #define C32   SPH_C32
 #define T32   SPH_T32
 #define O1   13
 #define O2    9
 #define O3    6
 /*
 * We copy the state into local variables, so that the compiler knows
 * that it can optimize them at will.
 */
 /* BEGIN -- automatically generated code. */
 #define DECL_STATE   \
 	sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
 	        A08, A09, A0A, A0B; \
 	sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
 	        B8, B9, BA, BB, BC, BD, BE, BF; \
 	sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
 	        C8, C9, CA, CB, CC, CD, CE, CF; \
 	sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
 	        M8, M9, MA, MB, MC, MD, ME, MF; \
 	sph_u32 Wlow, Whigh;
 #define READ_STATE(state)   do { \
 		A00 = (state)->A[0]; \
 		A01 = (state)->A[1]; \
 		A02 = (state)->A[2]; \
 		A03 = (state)->A[3]; \
 		A04 = (state)->A[4]; \
 		A05 = (state)->A[5]; \
 		A06 = (state)->A[6]; \
 		A07 = (state)->A[7]; \
 		A08 = (state)->A[8]; \
 		A09 = (state)->A[9]; \
 		A0A = (state)->A[10]; \
 		A0B = (state)->A[11]; \
 		B0 = (state)->B[0]; \
 		B1 = (state)->B[1]; \
 		B2 = (state)->B[2]; \
 		B3 = (state)->B[3]; \
 		B4 = (state)->B[4]; \
 		B5 = (state)->B[5]; \
 		B6 = (state)->B[6]; \
 		B7 = (state)->B[7]; \
 		B8 = (state)->B[8]; \
 		B9 = (state)->B[9]; \
 		BA = (state)->B[10]; \
 		BB = (state)->B[11]; \
 		BC = (state)->B[12]; \
 		BD = (state)->B[13]; \
 		BE = (state)->B[14]; \
 		BF = (state)->B[15]; \
 		C0 = (state)->C[0]; \
 		C1 = (state)->C[1]; \
 		C2 = (state)->C[2]; \
 		C3 = (state)->C[3]; \
 		C4 = (state)->C[4]; \
 		C5 = (state)->C[5]; \
 		C6 = (state)->C[6]; \
 		C7 = (state)->C[7]; \
 		C8 = (state)->C[8]; \
 		C9 = (state)->C[9]; \
 		CA = (state)->C[10]; \
 		CB = (state)->C[11]; \
 		CC = (state)->C[12]; \
 		CD = (state)->C[13]; \
 		CE = (state)->C[14]; \
 		CF = (state)->C[15]; \
 		Wlow = (state)->Wlow; \
 		Whigh = (state)->Whigh; \
 	} while (0)
 #define WRITE_STATE(state)   do { \
 		(state)->A[0] = A00; \
 		(state)->A[1] = A01; \
 		(state)->A[2] = A02; \
 		(state)->A[3] = A03; \
 		(state)->A[4] = A04; \
 		(state)->A[5] = A05; \
 		(state)->A[6] = A06; \
 		(state)->A[7] = A07; \
 		(state)->A[8] = A08; \
 		(state)->A[9] = A09; \
 		(state)->A[10] = A0A; \
 		(state)->A[11] = A0B; \
 		(state)->B[0] = B0; \
 		(state)->B[1] = B1; \
 		(state)->B[2] = B2; \
 		(state)->B[3] = B3; \
 		(state)->B[4] = B4; \
 		(state)->B[5] = B5; \
 		(state)->B[6] = B6; \
 		(state)->B[7] = B7; \
 		(state)->B[8] = B8; \
 		(state)->B[9] = B9; \
 		(state)->B[10] = BA; \
 		(state)->B[11] = BB; \
 		(state)->B[12] = BC; \
 		(state)->B[13] = BD; \
 		(state)->B[14] = BE; \
 		(state)->B[15] = BF; \
 		(state)->C[0] = C0; \
 		(state)->C[1] = C1; \
 		(state)->C[2] = C2; \
 		(state)->C[3] = C3; \
 		(state)->C[4] = C4; \
 		(state)->C[5] = C5; \
 		(state)->C[6] = C6; \
 		(state)->C[7] = C7; \
 		(state)->C[8] = C8; \
 		(state)->C[9] = C9; \
 		(state)->C[10] = CA; \
 		(state)->C[11] = CB; \
 		(state)->C[12] = CC; \
 		(state)->C[13] = CD; \
 		(state)->C[14] = CE; \
 		(state)->C[15] = CF; \
 		(state)->Wlow = Wlow; \
 		(state)->Whigh = Whigh; \
 	} while (0)
 #define DECODE_BLOCK   do { \
 		M0 = sph_dec32le_aligned(buf + 0); \
 		M1 = sph_dec32le_aligned(buf + 4); \
 		M2 = sph_dec32le_aligned(buf + 8); \
 		M3 = sph_dec32le_aligned(buf + 12); \
 		M4 = sph_dec32le_aligned(buf + 16); \
 		M5 = sph_dec32le_aligned(buf + 20); \
 		M6 = sph_dec32le_aligned(buf + 24); \
 		M7 = sph_dec32le_aligned(buf + 28); \
 		M8 = sph_dec32le_aligned(buf + 32); \
 		M9 = sph_dec32le_aligned(buf + 36); \
 		MA = sph_dec32le_aligned(buf + 40); \
 		MB = sph_dec32le_aligned(buf + 44); \
 		MC = sph_dec32le_aligned(buf + 48); \
 		MD = sph_dec32le_aligned(buf + 52); \
 		ME = sph_dec32le_aligned(buf + 56); \
 		MF = sph_dec32le_aligned(buf + 60); \
 	} while (0)
 #define INPUT_BLOCK_ADD   do { \
 		B0 = T32(B0 + M0); \
 		B1 = T32(B1 + M1); \
 		B2 = T32(B2 + M2); \
 		B3 = T32(B3 + M3); \
 		B4 = T32(B4 + M4); \
 		B5 = T32(B5 + M5); \
 		B6 = T32(B6 + M6); \
 		B7 = T32(B7 + M7); \
 		B8 = T32(B8 + M8); \
 		B9 = T32(B9 + M9); \
 		BA = T32(BA + MA); \
 		BB = T32(BB + MB); \
 		BC = T32(BC + MC); \
 		BD = T32(BD + MD); \
 		BE = T32(BE + ME); \
 		BF = T32(BF + MF); \
 	} while (0)
 #define INPUT_BLOCK_SUB   do { \
 		C0 = T32(C0 - M0); \
 		C1 = T32(C1 - M1); \
 		C2 = T32(C2 - M2); \
 		C3 = T32(C3 - M3); \
 		C4 = T32(C4 - M4); \
 		C5 = T32(C5 - M5); \
 		C6 = T32(C6 - M6); \
 		C7 = T32(C7 - M7); \
 		C8 = T32(C8 - M8); \
 		C9 = T32(C9 - M9); \
 		CA = T32(CA - MA); \
 		CB = T32(CB - MB); \
 		CC = T32(CC - MC); \
 		CD = T32(CD - MD); \
 		CE = T32(CE - ME); \
 		CF = T32(CF - MF); \
 	} while (0)
 #define XOR_W   do { \
 		A00 ^= Wlow; \
 		A01 ^= Whigh; \
 	} while (0)
 #define SWAP(v1, v2)   do { \
 		sph_u32 tmp = (v1); \
 		(v1) = (v2); \
 		(v2) = tmp; \
 	} while (0)
 #define SWAP_BC   do { \
 		SWAP(B0, C0); \
 		SWAP(B1, C1); \
 		SWAP(B2, C2); \
 		SWAP(B3, C3); \
 		SWAP(B4, C4); \
 		SWAP(B5, C5); \
 		SWAP(B6, C6); \
 		SWAP(B7, C7); \
 		SWAP(B8, C8); \
 		SWAP(B9, C9); \
 		SWAP(BA, CA); \
 		SWAP(BB, CB); \
 		SWAP(BC, CC); \
 		SWAP(BD, CD); \
 		SWAP(BE, CE); \
 		SWAP(BF, CF); \
 	} while (0)
 #define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
 		xa0 = T32((xa0 \
 			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
 			^ xc) * 3U) \
 			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
 		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
 	} while (0)
 #define PERM_STEP_0   do { \
 		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define PERM_STEP_1   do { \
 		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define PERM_STEP_2   do { \
 		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define APPLY_P   do { \
 		B0 = T32(B0 << 17) | (B0 >> 15); \
 		B1 = T32(B1 << 17) | (B1 >> 15); \
 		B2 = T32(B2 << 17) | (B2 >> 15); \
 		B3 = T32(B3 << 17) | (B3 >> 15); \
 		B4 = T32(B4 << 17) | (B4 >> 15); \
 		B5 = T32(B5 << 17) | (B5 >> 15); \
 		B6 = T32(B6 << 17) | (B6 >> 15); \
 		B7 = T32(B7 << 17) | (B7 >> 15); \
 		B8 = T32(B8 << 17) | (B8 >> 15); \
 		B9 = T32(B9 << 17) | (B9 >> 15); \
 		BA = T32(BA << 17) | (BA >> 15); \
 		BB = T32(BB << 17) | (BB >> 15); \
 		BC = T32(BC << 17) | (BC >> 15); \
 		BD = T32(BD << 17) | (BD >> 15); \
 		BE = T32(BE << 17) | (BE >> 15); \
 		BF = T32(BF << 17) | (BF >> 15); \
 		PERM_STEP_0; \
 		PERM_STEP_1; \
 		PERM_STEP_2; \
 		A0B = T32(A0B + C6); \
 		A0A = T32(A0A + C5); \
 		A09 = T32(A09 + C4); \
 		A08 = T32(A08 + C3); \
 		A07 = T32(A07 + C2); \
 		A06 = T32(A06 + C1); \
 		A05 = T32(A05 + C0); \
 		A04 = T32(A04 + CF); \
 		A03 = T32(A03 + CE); \
 		A02 = T32(A02 + CD); \
 		A01 = T32(A01 + CC); \
 		A00 = T32(A00 + CB); \
 		A0B = T32(A0B + CA); \
 		A0A = T32(A0A + C9); \
 		A09 = T32(A09 + C8); \
 		A08 = T32(A08 + C7); \
 		A07 = T32(A07 + C6); \
 		A06 = T32(A06 + C5); \
 		A05 = T32(A05 + C4); \
 		A04 = T32(A04 + C3); \
 		A03 = T32(A03 + C2); \
 		A02 = T32(A02 + C1); \
 		A01 = T32(A01 + C0); \
 		A00 = T32(A00 + CF); \
 		A0B = T32(A0B + CE); \
 		A0A = T32(A0A + CD); \
 		A09 = T32(A09 + CC); \
 		A08 = T32(A08 + CB); \
 		A07 = T32(A07 + CA); \
 		A06 = T32(A06 + C9); \
 		A05 = T32(A05 + C8); \
 		A04 = T32(A04 + C7); \
 		A03 = T32(A03 + C6); \
 		A02 = T32(A02 + C5); \
 		A01 = T32(A01 + C4); \
 		A00 = T32(A00 + C3); \
 	} while (0)
 #define INCR_W   do { \
 		if ((Wlow = T32(Wlow + 1)) == 0) \
 			Whigh = T32(Whigh + 1); \
 	} while (0)
 #if 0
 static const sph_u32 A_init_192[] = {
 	C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
 	C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
 	C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
 };
 static const sph_u32 B_init_192[] = {
 	C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
 	C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
 	C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
 	C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
 };
 static const sph_u32 C_init_192[] = {
 	C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
 	C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
 	C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
 	C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
 };
 static const sph_u32 A_init_224[] = {
 	C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
 	C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
 	C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
 };
 static const sph_u32 B_init_224[] = {
 	C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
 	C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
 	C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
 	C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
 };
 static const sph_u32 C_init_224[] = {
 	C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
 	C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
 	C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
 	C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
 };
 static const sph_u32 A_init_256[] = {
 	C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
 	C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
 	C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
 };
 static const sph_u32 B_init_256[] = {
 	C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
 	C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
 	C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
 	C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
 };
 static const sph_u32 C_init_256[] = {
 	C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
 	C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
 	C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
 	C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
 };
 static const sph_u32 A_init_384[] = {
 	C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
 	C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
 	C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
 };
 static const sph_u32 B_init_384[] = {
 	C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
 	C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
 	C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
 	C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
 };
 static const sph_u32 C_init_384[] = {
 	C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
 	C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
 	C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
 	C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
 };
 #endif
 static const sph_u32 A_init_512[] = {
 	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
 	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
 	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
 };
 static const sph_u32 B_init_512[] = {
 	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
 	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
 	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
 	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
 };
 static const sph_u32 C_init_512[] = {
 	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
 	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
 	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
 	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
 };
 /* END -- automatically generated code. */
 static void
 shabal_init(void *cc, unsigned size)
 {
 	/*
 	 * We have precomputed initial states for all the supported
 	 * output bit lengths.
 	 */
 	const sph_u32 *A_init, *B_init, *C_init;
 	sph_shabal_context *sc;
 	switch (size) {
 #if 0
 	case 192:
 		A_init = A_init_192;
 		B_init = B_init_192;
 		C_init = C_init_192;
 		break;
 	case 224:
 		A_init = A_init_224;
 		B_init = B_init_224;
 		C_init = C_init_224;
 		break;
 	case 256:
 		A_init = A_init_256;
 		B_init = B_init_256;
 		C_init = C_init_256;
 		break;
 	case 384:
 		A_init = A_init_384;
 		B_init = B_init_384;
 		C_init = C_init_384;
 		break;
 #endif
 	case 512:
 		A_init = A_init_512;
 		B_init = B_init_512;
 		C_init = C_init_512;
 		break;
 	default:
 		return;
 	}
 	sc = cc;
 	memcpy(sc->A, A_init, sizeof sc->A);
 	memcpy(sc->B, B_init, sizeof sc->B);
 	memcpy(sc->C, C_init, sizeof sc->C);
 	sc->Wlow = 1;
 	sc->Whigh = 0;
 	sc->ptr = 0;
 }
 static void
 shabal_core(void *cc, const unsigned char *data, size_t len)
 {
 	sph_shabal_context *sc;
 	unsigned char *buf;
 	size_t ptr;
 	DECL_STATE
 	sc = cc;
 	buf = sc->buf;
 	ptr = sc->ptr;
 	/*
 	 * We do not want to copy the state to local variables if the
 	 * amount of data is less than what is needed to complete the
 	 * current block. Note that it is anyway suboptimal to call
 	 * this method many times for small chunks of data.
 	 */
 	if (len < (sizeof sc->buf) - ptr) {
 		memcpy(buf + ptr, data, len);
 		ptr += len;
 		sc->ptr = ptr;
 		return;
 	}
 	READ_STATE(sc);
 	while (len > 0) {
 		size_t clen;
 		clen = (sizeof sc->buf) - ptr;
 		if (clen > len)
 			clen = len;
 		memcpy(buf + ptr, data, clen);
 		ptr += clen;
 		data += clen;
 		len -= clen;
 		if (ptr == sizeof sc->buf) {
 			DECODE_BLOCK;
 			INPUT_BLOCK_ADD;
 			XOR_W;
 			APPLY_P;
 			INPUT_BLOCK_SUB;
 			SWAP_BC;
 			INCR_W;
 			ptr = 0;
 		}
 	}
 	WRITE_STATE(sc);
 	sc->ptr = ptr;
 }
 static void
 shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
 {
 	sph_shabal_context *sc;
 	unsigned char *buf;
 	size_t ptr;
 	int i;
 	unsigned z;
 	union {
 		unsigned char tmp_out[64];
 		sph_u32 dummy;
 	} u;
 	size_t out_len;
 	DECL_STATE
 	sc = cc;
 	buf = sc->buf;
 	ptr = sc->ptr;
 	z = 0x80 >> n;
 	buf[ptr] = ((ub & -z) | z) & 0xFF;
 	memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
 	READ_STATE(sc);
 	DECODE_BLOCK;
 	INPUT_BLOCK_ADD;
 	XOR_W;
 	APPLY_P;
 	for (i = 0; i < 3; i ++) {
 		SWAP_BC;
 		XOR_W;
 		APPLY_P;
 	}
 	/*
 	 * We just use our local variables; no need to go through
 	 * the state structure. In order to share some code, we
 	 * emit the relevant words into a temporary buffer, which
 	 * we finally copy into the destination array.
 	 */
 	switch (size_words) {
 	case 16:
 		sph_enc32le_aligned(u.tmp_out +  0, B0);
 		sph_enc32le_aligned(u.tmp_out +  4, B1);
 		sph_enc32le_aligned(u.tmp_out +  8, B2);
 		sph_enc32le_aligned(u.tmp_out + 12, B3);
 		/* fall through */
 	case 12:
 		sph_enc32le_aligned(u.tmp_out + 16, B4);
 		sph_enc32le_aligned(u.tmp_out + 20, B5);
 		sph_enc32le_aligned(u.tmp_out + 24, B6);
 		sph_enc32le_aligned(u.tmp_out + 28, B7);
 		/* fall through */
 	case 8:
 		sph_enc32le_aligned(u.tmp_out + 32, B8);
 		/* fall through */
 	case 7:
 		sph_enc32le_aligned(u.tmp_out + 36, B9);
 		/* fall through */
 	case 6:
 		sph_enc32le_aligned(u.tmp_out + 40, BA);
 		sph_enc32le_aligned(u.tmp_out + 44, BB);
 		sph_enc32le_aligned(u.tmp_out + 48, BC);
 		sph_enc32le_aligned(u.tmp_out + 52, BD);
 		sph_enc32le_aligned(u.tmp_out + 56, BE);
 		sph_enc32le_aligned(u.tmp_out + 60, BF);
 		break;
 	default:
 		return;
 	}
 	out_len = size_words << 2;
 	memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
 	shabal_init(sc, size_words << 5);
 }
 #if 0
 /* see sph_shabal.h */
 void
 sph_shabal192_init(void *cc)
 {
 	shabal_init(cc, 192);
 }
 /* see sph_shabal.h */
 void
 sph_shabal192(void *cc, const void *data, size_t len)
 {
 	shabal_core(cc, data, len);
 }
 /* see sph_shabal.h */
 void
 sph_shabal192_close(void *cc, void *dst)
 {
 	shabal_close(cc, 0, 0, dst, 6);
 }
 /* see sph_shabal.h */
 void
 sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 {
 	shabal_close(cc, ub, n, dst, 6);
 }
 /* see sph_shabal.h */
 void
 sph_shabal224_init(void *cc)
 {
 	shabal_init(cc, 224);
 }
 /* see sph_shabal.h */
 void
 sph_shabal224(void *cc, const void *data, size_t len)
 {
 	shabal_core(cc, data, len);
 }
 /* see sph_shabal.h */
 void
 sph_shabal224_close(void *cc, void *dst)
 {
 	shabal_close(cc, 0, 0, dst, 7);
 }
 /* see sph_shabal.h */
 void
 sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 {
 	shabal_close(cc, ub, n, dst, 7);
 }
 /* see sph_shabal.h */
 void
 sph_shabal256_init(void *cc)
 {
 	shabal_init(cc, 256);
 }
 /* see sph_shabal.h */
 void
 sph_shabal256(void *cc, const void *data, size_t len)
 {
 	shabal_core(cc, data, len);
 }
 /* see sph_shabal.h */
 void
 sph_shabal256_close(void *cc, void *dst)
 {
 	shabal_close(cc, 0, 0, dst, 8);
 }
 /* see sph_shabal.h */
 void
 sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 {
 	shabal_close(cc, ub, n, dst, 8);
 }
 /* see sph_shabal.h */
 void
 sph_shabal384_init(void *cc)
 {
 	shabal_init(cc, 384);
 }
 /* see sph_shabal.h */
 void
 sph_shabal384(void *cc, const void *data, size_t len)
 {
 	shabal_core(cc, data, len);
 }
 /* see sph_shabal.h */
 void
 sph_shabal384_close(void *cc, void *dst)
 {
 	shabal_close(cc, 0, 0, dst, 12);
 }
 /* see sph_shabal.h */
 void
 sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 {
 	shabal_close(cc, ub, n, dst, 12);
 }
 #endif
 /* see sph_shabal.h */
 void
 sph_shabal512_init(void *cc)
 {
 	shabal_init(cc, 512);
 }
 /* see sph_shabal.h */
 void
 sph_shabal512(void *cc, const void *data, size_t len)
 {
 	shabal_core(cc, data, len);
 }
 /* see sph_shabal.h */
 void
 sph_shabal512_close(void *cc, void *dst)
 {
 	shabal_close(cc, 0, 0, dst, 16);
 }
 /* see sph_shabal.h */
 void
 sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 {
 	shabal_close(cc, ub, n, dst, 16);
 }
 #ifdef __cplusplus
 }
 #endif
--- a/sph/sph_shabal.h
+++ b/sph/sph_shabal.h
@ -0,0 +1,344 @@
 /* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
 /**
 * Shabal interface. Shabal is a family of functions which differ by
 * their output size; this implementation defines Shabal for output
 * sizes 192, 224, 256, 384 and 512 bits.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @file     sph_shabal.h
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */
 #ifndef SPH_SHABAL_H__
 #define SPH_SHABAL_H__
 #ifdef __cplusplus
 extern "C"{
 #endif
 #include <stddef.h>
 #include "sph_types.h"
 /**
 * Output size (in bits) for Shabal-192.
 */
 #define SPH_SIZE_shabal192   192
 /**
 * Output size (in bits) for Shabal-224.
 */
 #define SPH_SIZE_shabal224   224
 /**
 * Output size (in bits) for Shabal-256.
 */
 #define SPH_SIZE_shabal256   256
 /**
 * Output size (in bits) for Shabal-384.
 */
 #define SPH_SIZE_shabal384   384
 /**
 * Output size (in bits) for Shabal-512.
 */
 #define SPH_SIZE_shabal512   512
 /**
 * This structure is a context for Shabal computations: it contains the
 * intermediate values and some data from the last entered block. Once
 * a Shabal computation has been performed, the context can be reused for
 * another computation.
 *
 * The contents of this structure are private. A running Shabal computation
 * can be cloned by copying the context (e.g. with a simple
 * <code>memcpy()</code>).
 */
 typedef struct {
 #ifndef DOXYGEN_IGNORE
 	unsigned char buf[64];    /* first field, for alignment */
 	size_t ptr;
 	sph_u32 A[12], B[16], C[16];
 	sph_u32 Whigh, Wlow;
 #endif
 } sph_shabal_context;
 /**
 * Type for a Shabal-192 context (identical to the common context).
 */
 typedef sph_shabal_context sph_shabal192_context;
 /**
 * Type for a Shabal-224 context (identical to the common context).
 */
 typedef sph_shabal_context sph_shabal224_context;
 /**
 * Type for a Shabal-256 context (identical to the common context).
 */
 typedef sph_shabal_context sph_shabal256_context;
 /**
 * Type for a Shabal-384 context (identical to the common context).
 */
 typedef sph_shabal_context sph_shabal384_context;
 /**
 * Type for a Shabal-512 context (identical to the common context).
 */
 typedef sph_shabal_context sph_shabal512_context;
 /**
 * Initialize a Shabal-192 context. This process performs no memory allocation.
 *
 * @param cc   the Shabal-192 context (pointer to a
 *             <code>sph_shabal192_context</code>)
 */
 void sph_shabal192_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing).
 *
 * @param cc     the Shabal-192 context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_shabal192(void *cc, const void *data, size_t len);
 /**
 * Terminate the current Shabal-192 computation and output the result into
 * the provided buffer. The destination buffer must be wide enough to
 * accomodate the result (24 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the Shabal-192 context
 * @param dst   the destination buffer
 */
 void sph_shabal192_close(void *cc, void *dst);
 /**
 * Add a few additional bits (0 to 7) to the current computation, then
 * terminate it and output the result in the provided buffer, which must
 * be wide enough to accomodate the result (24 bytes). If bit number i
 * in <code>ub</code> has value 2^i, then the extra bits are those
 * numbered 7 downto 8-n (this is the big-endian convention at the byte
 * level). The context is automatically reinitialized.
 *
 * @param cc    the Shabal-192 context
 * @param ub    the extra bits
 * @param n     the number of extra bits (0 to 7)
 * @param dst   the destination buffer
 */
 void sph_shabal192_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
 /**
 * Initialize a Shabal-224 context. This process performs no memory allocation.
 *
 * @param cc   the Shabal-224 context (pointer to a
 *             <code>sph_shabal224_context</code>)
 */
 void sph_shabal224_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing).
 *
 * @param cc     the Shabal-224 context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_shabal224(void *cc, const void *data, size_t len);
 /**
 * Terminate the current Shabal-224 computation and output the result into
 * the provided buffer. The destination buffer must be wide enough to
 * accomodate the result (28 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the Shabal-224 context
 * @param dst   the destination buffer
 */
 void sph_shabal224_close(void *cc, void *dst);
 /**
 * Add a few additional bits (0 to 7) to the current computation, then
 * terminate it and output the result in the provided buffer, which must
 * be wide enough to accomodate the result (28 bytes). If bit number i
 * in <code>ub</code> has value 2^i, then the extra bits are those
 * numbered 7 downto 8-n (this is the big-endian convention at the byte
 * level). The context is automatically reinitialized.
 *
 * @param cc    the Shabal-224 context
 * @param ub    the extra bits
 * @param n     the number of extra bits (0 to 7)
 * @param dst   the destination buffer
 */
 void sph_shabal224_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
 /**
 * Initialize a Shabal-256 context. This process performs no memory allocation.
 *
 * @param cc   the Shabal-256 context (pointer to a
 *             <code>sph_shabal256_context</code>)
 */
 void sph_shabal256_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing).
 *
 * @param cc     the Shabal-256 context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_shabal256(void *cc, const void *data, size_t len);
 /**
 * Terminate the current Shabal-256 computation and output the result into
 * the provided buffer. The destination buffer must be wide enough to
 * accomodate the result (32 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the Shabal-256 context
 * @param dst   the destination buffer
 */
 void sph_shabal256_close(void *cc, void *dst);
 /**
 * Add a few additional bits (0 to 7) to the current computation, then
 * terminate it and output the result in the provided buffer, which must
 * be wide enough to accomodate the result (32 bytes). If bit number i
 * in <code>ub</code> has value 2^i, then the extra bits are those
 * numbered 7 downto 8-n (this is the big-endian convention at the byte
 * level). The context is automatically reinitialized.
 *
 * @param cc    the Shabal-256 context
 * @param ub    the extra bits
 * @param n     the number of extra bits (0 to 7)
 * @param dst   the destination buffer
 */
 void sph_shabal256_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
 /**
 * Initialize a Shabal-384 context. This process performs no memory allocation.
 *
 * @param cc   the Shabal-384 context (pointer to a
 *             <code>sph_shabal384_context</code>)
 */
 void sph_shabal384_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing).
 *
 * @param cc     the Shabal-384 context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_shabal384(void *cc, const void *data, size_t len);
 /**
 * Terminate the current Shabal-384 computation and output the result into
 * the provided buffer. The destination buffer must be wide enough to
 * accomodate the result (48 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the Shabal-384 context
 * @param dst   the destination buffer
 */
 void sph_shabal384_close(void *cc, void *dst);
 /**
 * Add a few additional bits (0 to 7) to the current computation, then
 * terminate it and output the result in the provided buffer, which must
 * be wide enough to accomodate the result (48 bytes). If bit number i
 * in <code>ub</code> has value 2^i, then the extra bits are those
 * numbered 7 downto 8-n (this is the big-endian convention at the byte
 * level). The context is automatically reinitialized.
 *
 * @param cc    the Shabal-384 context
 * @param ub    the extra bits
 * @param n     the number of extra bits (0 to 7)
 * @param dst   the destination buffer
 */
 void sph_shabal384_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
 /**
 * Initialize a Shabal-512 context. This process performs no memory allocation.
 *
 * @param cc   the Shabal-512 context (pointer to a
 *             <code>sph_shabal512_context</code>)
 */
 void sph_shabal512_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing).
 *
 * @param cc     the Shabal-512 context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_shabal512(void *cc, const void *data, size_t len);
 /**
 * Terminate the current Shabal-512 computation and output the result into
 * the provided buffer. The destination buffer must be wide enough to
 * accomodate the result (64 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the Shabal-512 context
 * @param dst   the destination buffer
 */
 void sph_shabal512_close(void *cc, void *dst);
 /**
 * Add a few additional bits (0 to 7) to the current computation, then
 * terminate it and output the result in the provided buffer, which must
 * be wide enough to accomodate the result (64 bytes). If bit number i
 * in <code>ub</code> has value 2^i, then the extra bits are those
 * numbered 7 downto 8-n (this is the big-endian convention at the byte
 * level). The context is automatically reinitialized.
 *
 * @param cc    the Shabal-512 context
 * @param ub    the extra bits
 * @param n     the number of extra bits (0 to 7)
 * @param dst   the destination buffer
 */
 void sph_shabal512_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/sph/sph_whirlpool.h
+++ b/sph/sph_whirlpool.h
@ -0,0 +1,216 @@
 /* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
 /**
 * WHIRLPOOL interface.
 *
 * WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
 * version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
 * (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
 * version, 2003, with a new diffusion matrix, also described as "plain
 * WHIRLPOOL"). All three variants are implemented here.
 *
 * The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
 * M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
 * NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
 *
 * The current WHIRLPOOL specification and a reference implementation
 * can be found on the WHIRLPOOL web page:
 * http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @file     sph_whirlpool.h
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */
 #ifndef SPH_WHIRLPOOL_H__
 #define SPH_WHIRLPOOL_H__
 #include <stddef.h>
 #include "sph_types.h"
 #ifdef __cplusplus
 extern "C"{
 #endif
 #if SPH_64
 /**
 * Output size (in bits) for WHIRLPOOL.
 */
 #define SPH_SIZE_whirlpool   512
 /**
 * Output size (in bits) for WHIRLPOOL-0.
 */
 #define SPH_SIZE_whirlpool0   512
 /**
 * Output size (in bits) for WHIRLPOOL-1.
 */
 #define SPH_SIZE_whirlpool1   512
 /**
 * This structure is a context for WHIRLPOOL computations: it contains the
 * intermediate values and some data from the last entered block. Once
 * a WHIRLPOOL computation has been performed, the context can be reused for
 * another computation.
 *
 * The contents of this structure are private. A running WHIRLPOOL computation
 * can be cloned by copying the context (e.g. with a simple
 * <code>memcpy()</code>).
 */
 typedef struct {
 #ifndef DOXYGEN_IGNORE
 	unsigned char buf[64];    /* first field, for alignment */
 	sph_u64 state[8];
 #if SPH_64
 	sph_u64 count;
 #else
 	sph_u32 count_high, count_low;
 #endif
 #endif
 } sph_whirlpool_context;
 /**
 * Initialize a WHIRLPOOL context. This process performs no memory allocation.
 *
 * @param cc   the WHIRLPOOL context (pointer to a
 *             <code>sph_whirlpool_context</code>)
 */
 void sph_whirlpool_init(void *cc);
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing). This function applies the
 * plain WHIRLPOOL algorithm.
 *
 * @param cc     the WHIRLPOOL context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_whirlpool(void *cc, const void *data, size_t len);
 /**
 * Terminate the current WHIRLPOOL computation and output the result into the
 * provided buffer. The destination buffer must be wide enough to
 * accomodate the result (64 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the WHIRLPOOL context
 * @param dst   the destination buffer
 */
 void sph_whirlpool_close(void *cc, void *dst);
 /**
 * WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
 */
 typedef sph_whirlpool_context sph_whirlpool0_context;
 #ifdef DOXYGEN_IGNORE
 /**
 * Initialize a WHIRLPOOL-0 context. This function is identical to
 * <code>sph_whirlpool_init()</code>.
 *
 * @param cc   the WHIRLPOOL context (pointer to a
 *             <code>sph_whirlpool0_context</code>)
 */
 void sph_whirlpool0_init(void *cc);
 #endif
 #ifndef DOXYGEN_IGNORE
 #define sph_whirlpool0_init   sph_whirlpool_init
 #endif
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing). This function applies the
 * WHIRLPOOL-0 algorithm.
 *
 * @param cc     the WHIRLPOOL context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_whirlpool0(void *cc, const void *data, size_t len);
 /**
 * Terminate the current WHIRLPOOL-0 computation and output the result into the
 * provided buffer. The destination buffer must be wide enough to
 * accomodate the result (64 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the WHIRLPOOL-0 context
 * @param dst   the destination buffer
 */
 void sph_whirlpool0_close(void *cc, void *dst);
 /**
 * WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
 */
 typedef sph_whirlpool_context sph_whirlpool1_context;
 #ifdef DOXYGEN_IGNORE
 /**
 * Initialize a WHIRLPOOL-1 context. This function is identical to
 * <code>sph_whirlpool_init()</code>.
 *
 * @param cc   the WHIRLPOOL context (pointer to a
 *             <code>sph_whirlpool1_context</code>)
 */
 void sph_whirlpool1_init(void *cc);
 #endif
 #ifndef DOXYGEN_IGNORE
 #define sph_whirlpool1_init   sph_whirlpool_init
 #endif
 /**
 * Process some data bytes. It is acceptable that <code>len</code> is zero
 * (in which case this function does nothing). This function applies the
 * WHIRLPOOL-1 algorithm.
 *
 * @param cc     the WHIRLPOOL context
 * @param data   the input data
 * @param len    the input data length (in bytes)
 */
 void sph_whirlpool1(void *cc, const void *data, size_t len);
 /**
 * Terminate the current WHIRLPOOL-1 computation and output the result into the
 * provided buffer. The destination buffer must be wide enough to
 * accomodate the result (64 bytes). The context is automatically
 * reinitialized.
 *
 * @param cc    the WHIRLPOOL-1 context
 * @param dst   the destination buffer
 */
 void sph_whirlpool1_close(void *cc, void *dst);
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/sph/whirlpool.c
+++ b/sph/whirlpool.c
--- a/sph/x15_helper.c
+++ b/sph/x15_helper.c
@ -0,0 +1,346 @@
 /* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
 /*
 * This file contains some functions which implement the external data
 * handling and padding for Merkle-Damgard hash functions which follow
 * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
 *
 * API: this file is meant to be included, not compiled as a stand-alone
 * file. Some macros must be defined:
 *   RFUN   name for the round function
 *   HASH   "short name" for the hash function
 *   BE32   defined for big-endian, 32-bit based (e.g. SHA-1)
 *   LE32   defined for little-endian, 32-bit based (e.g. MD5)
 *   BE64   defined for big-endian, 64-bit based (e.g. SHA-512)
 *   LE64   defined for little-endian, 64-bit based (no example yet)
 *   PW01   if defined, append 0x01 instead of 0x80 (for Tiger)
 *   BLEN   if defined, length of a message block (in bytes)
 *   PLW1   if defined, length is defined on one 64-bit word only (for Tiger)
 *   PLW4   if defined, length is defined on four 64-bit words (for WHIRLPOOL)
 *   SVAL   if defined, reference to the context state information
 *
 * BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
 * this is used for instance for Tiger, which works on 64-bit words but
 * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
 * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
 * set, then only one word (64 bits) will be used to encode the input
 * message length (in bits), otherwise two words will be used (as in
 * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
 * not PLW1), four 64-bit words will be used to encode the message length
 * (in bits). Note that regardless of those settings, only 64-bit message
 * lengths are supported (in bits): messages longer than 2 Exabytes will be
 * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
 * 2 millions Terabytes, which is huge).
 *
 * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
 * function. This is used for Tiger2, which is identical to Tiger except
 * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
 * of the 0x01 from original Tiger).
 *
 * The RFUN function is invoked with two arguments, the first pointing to
 * aligned data (as a "const void *"), the second being state information
 * from the context structure. By default, this state information is the
 * "val" field from the context, and this field is assumed to be an array
 * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
 * from the context structure. The "val" field can have any type, except
 * for the output encoding which assumes that it is an array of "sph_u32"
 * values. By defining NO_OUTPUT, this last step is deactivated; the
 * includer code is then responsible for writing out the hash result. When
 * NO_OUTPUT is defined, the third parameter to the "close()" function is
 * ignored.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */
 #ifdef _MSC_VER
 #pragma warning (disable: 4146)
 #endif
 #undef SPH_XCAT
 #define SPH_XCAT(a, b)     SPH_XCAT_(a, b)
 #undef SPH_XCAT_
 #define SPH_XCAT_(a, b)    a ## b
 #undef SPH_BLEN
 #undef SPH_WLEN
 #if defined BE64 || defined LE64
 #define SPH_BLEN    128U
 #define SPH_WLEN      8U
 #else
 #define SPH_BLEN     64U
 #define SPH_WLEN      4U
 #endif
 #ifdef BLEN
 #undef SPH_BLEN
 #define SPH_BLEN    BLEN
 #endif
 #undef SPH_MAXPAD
 #if defined PLW1
 #define SPH_MAXPAD   (SPH_BLEN - SPH_WLEN)
 #elif defined PLW4
 #define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 2))
 #else
 #define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 1))
 #endif
 #undef SPH_VAL
 #undef SPH_NO_OUTPUT
 #ifdef SVAL
 #define SPH_VAL         SVAL
 #define SPH_NO_OUTPUT   1
 #else
 #define SPH_VAL   sc->val
 #endif
 #ifndef CLOSE_ONLY
 #ifdef SPH_UPTR
 static void
 SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
 #else
 void
 SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
 #endif
 {
 	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
 	unsigned current;
 	sc = cc;
 #if SPH_64
 	current = (unsigned)sc->count & (SPH_BLEN - 1U);
 #else
 	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
 #endif
 	while (len > 0) {
 		unsigned clen;
 #if !SPH_64
 		sph_u32 clow, clow2;
 #endif
 		clen = SPH_BLEN - current;
 		if (clen > len)
 			clen = len;
 		memcpy(sc->buf + current, data, clen);
 		data = (const unsigned char *)data + clen;
 		current += clen;
 		len -= clen;
 		if (current == SPH_BLEN) {
 			RFUN(sc->buf, SPH_VAL);
 			current = 0;
 		}
 #if SPH_64
 		sc->count += clen;
 #else
 		clow = sc->count_low;
 		clow2 = SPH_T32(clow + clen);
 		sc->count_low = clow2;
 		if (clow2 < clow)
 			sc->count_high ++;
 #endif
 	}
 }
 #ifdef SPH_UPTR
 void
 SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
 {
 	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
 	unsigned current;
 	size_t orig_len;
 #if !SPH_64
 	sph_u32 clow, clow2;
 #endif
 	if (len < (2 * SPH_BLEN)) {
 		SPH_XCAT(HASH, _short)(cc, data, len);
 		return;
 	}
 	sc = cc;
 #if SPH_64
 	current = (unsigned)sc->count & (SPH_BLEN - 1U);
 #else
 	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
 #endif
 	if (current > 0) {
 		unsigned t;
 		t = SPH_BLEN - current;
 		SPH_XCAT(HASH, _short)(cc, data, t);
 		data = (const unsigned char *)data + t;
 		len -= t;
 	}
 #if !SPH_UNALIGNED
 	if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
 		SPH_XCAT(HASH, _short)(cc, data, len);
 		return;
 	}
 #endif
 	orig_len = len;
 	while (len >= SPH_BLEN) {
 		RFUN(data, SPH_VAL);
 		len -= SPH_BLEN;
 		data = (const unsigned char *)data + SPH_BLEN;
 	}
 	if (len > 0)
 		memcpy(sc->buf, data, len);
 #if SPH_64
 	sc->count += (sph_u64)orig_len;
 #else
 	clow = sc->count_low;
 	clow2 = SPH_T32(clow + orig_len);
 	sc->count_low = clow2;
 	if (clow2 < clow)
 		sc->count_high ++;
 	/*
 	 * This code handles the improbable situation where "size_t" is
 	 * greater than 32 bits, and yet we do not have a 64-bit type.
 	 */
 	orig_len >>= 12;
 	orig_len >>= 10;
 	orig_len >>= 10;
 	sc->count_high += orig_len;
 #endif
 }
 #endif
 #endif
 /*
 * Perform padding and produce result. The context is NOT reinitialized
 * by this function.
 */
 static void
 SPH_XCAT(HASH, _addbits_and_close)(void *cc,
 	unsigned ub, unsigned n, void *dst, unsigned rnum)
 {
 	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
 	unsigned current, u;
 #if !SPH_64
 	sph_u32 low, high;
 #endif
 	sc = cc;
 #if SPH_64
 	current = (unsigned)sc->count & (SPH_BLEN - 1U);
 #else
 	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
 #endif
 #ifdef PW01
 	sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
 #else
 	{
 		unsigned z;
 		z = 0x80 >> n;
 		sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
 	}
 #endif
 	if (current > SPH_MAXPAD) {
 		memset(sc->buf + current, 0, SPH_BLEN - current);
 		RFUN(sc->buf, SPH_VAL);
 		memset(sc->buf, 0, SPH_MAXPAD);
 	} else {
 		memset(sc->buf + current, 0, SPH_MAXPAD - current);
 	}
 #if defined BE64
 #if defined PLW1
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #elif defined PLW4
 	memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
 		sc->count >> 61);
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #else
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #endif
 #elif defined LE64
 #if defined PLW1
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #elif defined PLW1
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
 	memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
 #else
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
 #endif
 #else
 #if SPH_64
 #ifdef BE32
 	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #else
 	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
 		SPH_T64(sc->count << 3) + (sph_u64)n);
 #endif
 #else
 	low = sc->count_low;
 	high = SPH_T32((sc->count_high << 3) | (low >> 29));
 	low = SPH_T32(low << 3) + (sph_u32)n;
 #ifdef BE32
 	sph_enc32be(sc->buf + SPH_MAXPAD, high);
 	sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
 #else
 	sph_enc32le(sc->buf + SPH_MAXPAD, low);
 	sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
 #endif
 #endif
 #endif
 	RFUN(sc->buf, SPH_VAL);
 #ifdef SPH_NO_OUTPUT
 	(void)dst;
 	(void)rnum;
 	(void)u;
 #else
 	for (u = 0; u < rnum; u ++) {
 #if defined BE64
 		sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
 #elif defined LE64
 		sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
 #elif defined BE32
 		sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
 #else
 		sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
 #endif
 	}
 #endif
 }
 static void
 SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
 {
 	SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
 }
--- a/util.c
+++ b/util.c
@ -1318,3 +1318,33 @@ out:
 	pthread_mutex_unlock(&tq->mutex);
 	return rval;
 }
 static void print_hash(unsigned char *hash)
 {
 	for (int i=0; i < 32; i++) {
 		printf("%02x", hash[i]);
 	}
 }
 void print_hash_tests(void)
 {
 	unsigned char buf[128], hash[128];
 	memset(buf, 0, sizeof buf);
 	printf("CPU HASH ON EMPTY BUFFER RESULTS:\n");
 	memset(hash, 0, sizeof hash);
 	x11hash(&hash[0], &buf[0]);
 	printf("\nX11: "); print_hash(hash);
 	memset(hash, 0, sizeof hash);
 	x13hash(&hash[0], &buf[0]);
 	printf("\nX13: "); print_hash(hash);
 	memset(hash, 0, sizeof hash);
 	x14hash(&hash[0], &buf[0]);
 	printf("\nX14: "); print_hash(hash);
 	memset(hash, 0, sizeof hash);
 	x15hash(&hash[0], &buf[0]);
 	printf("\nX15: "); print_hash(hash);
 }
--- a/x11/cuda_x11_cubehash512.cu
+++ b/x11/cuda_x11_cubehash512.cu
@ -1,3 +1,5 @@
 #include <cuda_runtime.h>
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
@ -8,10 +10,12 @@ typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
-static __device__ uint32_t cuda_swab32(uint32_t x)
+#if 0
 __device__ static uint32_t cuda_swab32(uint32_t x)
 {
 	return __byte_perm(x, 0, 0x0123);
 }
 #endif
 typedef unsigned char BitSequence;
 typedef unsigned long long DataLength;
--- a/x11/cuda_x11_echo.cu
+++ b/x11/cuda_x11_echo.cu
@ -1,16 +1,13 @@
 #include <cuda.h>
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
 #include "device_launch_parameters.h"
 #include <stdio.h>
 #include <stdint.h>
 #include <memory.h>
 // Folgende Definitionen später durch header ersetzen
 typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 // das Hi Word aus einem 64 Bit Typen extrahieren
 #if 0
 static __device__ uint32_t HIWORD(const uint64_t &x) {
 #if __CUDA_ARCH__ >= 130
 	return (uint32_t)__double2hiint(__longlong_as_double(x));
@ -27,6 +24,7 @@ static __device__ uint32_t LOWORD(const uint64_t &x) {
 	return (uint32_t)(x & 0xFFFFFFFFULL);
 #endif
 }
 #endif
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
--- a/x11/cuda_x11_luffa512.cu
+++ b/x11/cuda_x11_luffa512.cu
@ -18,6 +18,8 @@
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 #include <cuda_runtime.h>
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
--- a/x11/cuda_x11_shavite512.cu
+++ b/x11/cuda_x11_shavite512.cu
@ -1,14 +1,12 @@
 #include <stdint.h>
 #include <cuda_runtime.h>
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 typedef unsigned char BitSequence;
 typedef unsigned long long DataLength;
 typedef unsigned char uint8_t;
 typedef unsigned short uint16_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
--- a/x11/x11.cu
+++ b/x11/x11.cu
@ -18,6 +18,7 @@ extern "C"
 }
 #include <stdint.h>
 #include <cuda_runtime.h>
 // aus cpu-miner.c
 extern int device_map[8];
@ -71,7 +72,7 @@ extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t star
 											int order);
 // X11 Hashfunktion
-inline void x11hash(void *state, const void *input)
+extern "C" void x11hash(void *output, const void *input)
 {
 	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11
@ -87,70 +88,54 @@ inline void x11hash(void *state, const void *input)
 	sph_simd512_context ctx_simd;
 	sph_echo512_context ctx_echo;
-    unsigned char hash[64];
+	unsigned char hash[128];
 	memset(hash, 0, sizeof hash);
 	sph_blake512_init(&ctx_blake);
    // ZBLAKE;
 	sph_blake512 (&ctx_blake, input, 80);
 	sph_blake512_close(&ctx_blake, (void*) hash);
 	sph_bmw512_init(&ctx_bmw);
    // ZBMW;
 	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
 	sph_bmw512_close(&ctx_bmw, (void*) hash);
 	sph_groestl512_init(&ctx_groestl);
    // ZGROESTL;
 	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
 	sph_groestl512_close(&ctx_groestl, (void*) hash);
 	sph_skein512_init(&ctx_skein);
    // ZSKEIN;
 	sph_skein512 (&ctx_skein, (const void*) hash, 64);
 	sph_skein512_close(&ctx_skein, (void*) hash);
 	sph_jh512_init(&ctx_jh);
    // ZJH;
 	sph_jh512 (&ctx_jh, (const void*) hash, 64);
 	sph_jh512_close(&ctx_jh, (void*) hash);
 	sph_keccak512_init(&ctx_keccak);
    // ZKECCAK;
 	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
 	sph_keccak512_close(&ctx_keccak, (void*) hash);
 	sph_luffa512_init(&ctx_luffa);
    // ZLUFFA;
 	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
 	sph_luffa512_close (&ctx_luffa, (void*) hash);
 #if 1
 	sph_cubehash512_init(&ctx_cubehash);
    // ZCUBEHASH;
 	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
 	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
 #endif
 #if 1
 	sph_shavite512_init(&ctx_shavite);
    // ZSHAVITE;
 	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
 	sph_shavite512_close(&ctx_shavite, (void*) hash);
 #endif
 	sph_simd512_init(&ctx_simd);
    // ZSIMD
 	sph_simd512 (&ctx_simd, (const void*) hash, 64);
 	sph_simd512_close(&ctx_simd, (void*) hash);
 #if 1
 	sph_echo512_init(&ctx_echo);
    // ZECHO
 	sph_echo512 (&ctx_echo, (const void*) hash, 64);
 	sph_echo512_close(&ctx_echo, (void*) hash);
 #endif
-    memcpy(state, hash, 32);
+	memcpy(output, hash, 32);
 }
@ -242,13 +227,16 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
 			be32enc(&endiandata[19], foundNonce);
 			x11hash(vhash64, endiandata);
-			if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
+			if ((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) {
 				pdata[19] = foundNonce;
 				*hashes_done = foundNonce - first_nonce + 1;
 				return 1;
-			} else {
+			}
-				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			else if (vhash64[7] > Htarg) {
 				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
 			}
 			else {
 				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}
--- a/x13/cuda_x13_fugue512.cu
+++ b/x13/cuda_x13_fugue512.cu
@ -5,6 +5,25 @@
 * heavily based on phm's sgminer
 *
 */
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include "device_launch_parameters.h"
 #include <stdint.h>
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 #if __CUDA_ARCH__ < 350
 // Kepler (Compute 3.0)
 #define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
 #else
 // Kepler (Compute 3.5, 5.0)
 #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
 #endif
 /*
 * X13 kernel implementation.
@ -37,25 +56,8 @@
 * @author   phm <phm@inbox.com>
 */
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 #include <stdint.h>
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 #define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
 #if __CUDA_ARCH__ < 350 
    // Kepler (Compute 3.0)
    #define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
 #else
    // Kepler (Compute 3.5)
    #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
 #endif
 #define mixtab0(x) (*((uint32_t*)mixtabs + (    (x))))
 #define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
 #define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
--- a/x13/x13.cu
+++ b/x13/x13.cu
@ -1,8 +1,6 @@
 /*
- * X13 algorithm built on cbuchner1's original X11
+ * X13 algorithm
 * 
 */
 extern "C"
 {
 #include "sph/sph_blake.h"
@ -24,6 +22,9 @@ extern "C"
 #include "miner.h"
 }
 #include <stdint.h>
 #include <cuda_runtime.h>
 // aus cpu-miner.c
 extern int device_map[8];
@ -82,7 +83,7 @@ extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t star
 											int order);
 // X13 Hashfunktion
-inline void x13hash(void *state, const void *input)
+extern "C" void x13hash(void *output, const void *input)
 {
 	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13
@ -100,60 +101,50 @@ inline void x13hash(void *state, const void *input)
 	sph_hamsi512_context ctx_hamsi;
 	sph_fugue512_context ctx_fugue;
-    uint32_t hash[16];
+	uint32_t hash[32];
 	memset(hash, 0, sizeof hash);
 	sph_blake512_init(&ctx_blake);
    // ZBLAKE;
 	sph_blake512 (&ctx_blake, input, 80);
 	sph_blake512_close(&ctx_blake, (void*) hash);
 	sph_bmw512_init(&ctx_bmw);
    // ZBMW;
 	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
 	sph_bmw512_close(&ctx_bmw, (void*) hash);
 	sph_groestl512_init(&ctx_groestl);
    // ZGROESTL;
 	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
 	sph_groestl512_close(&ctx_groestl, (void*) hash);
 	sph_skein512_init(&ctx_skein);
    // ZSKEIN;
 	sph_skein512 (&ctx_skein, (const void*) hash, 64);
 	sph_skein512_close(&ctx_skein, (void*) hash);
 	sph_jh512_init(&ctx_jh);
    // ZJH;
 	sph_jh512 (&ctx_jh, (const void*) hash, 64);
 	sph_jh512_close(&ctx_jh, (void*) hash);
 	sph_keccak512_init(&ctx_keccak);
    // ZKECCAK;
 	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
 	sph_keccak512_close(&ctx_keccak, (void*) hash);
 	sph_luffa512_init(&ctx_luffa);
    // ZLUFFA;
 	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
 	sph_luffa512_close (&ctx_luffa, (void*) hash);
 	sph_cubehash512_init(&ctx_cubehash);
    // ZCUBEHASH;
 	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
 	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
 	sph_shavite512_init(&ctx_shavite);
    // ZSHAVITE;
 	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
 	sph_shavite512_close(&ctx_shavite, (void*) hash);
 	sph_simd512_init(&ctx_simd);
    // ZSIMD
 	sph_simd512 (&ctx_simd, (const void*) hash, 64);
 	sph_simd512_close(&ctx_simd, (void*) hash);
 	sph_echo512_init(&ctx_echo);
    // ZECHO
 	sph_echo512 (&ctx_echo, (const void*) hash, 64);
 	sph_echo512_close(&ctx_echo, (void*) hash);
@ -165,7 +156,7 @@ inline void x13hash(void *state, const void *input)
 	sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
 	sph_fugue512_close(&ctx_fugue, (void*) hash);
-    memcpy(state, hash, 32);
+	memcpy(output, hash, 32);
 }
@ -266,12 +257,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
 			x13hash(vhash64, endiandata);
 			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
 				pdata[19] = foundNonce;
 				*hashes_done = foundNonce - first_nonce + 1;
 				return 1;
-			} else {
+			}
-				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			else if (vhash64[7] > Htarg) {
 				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
 			}
 			else {
 				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}
--- a/x15/cuda_x14_shabal512.cu
+++ b/x15/cuda_x14_shabal512.cu
@ -0,0 +1,492 @@
 /*
 * Shabal-512 for X14/X15 (STUB)
 */
 #include <stdint.h>
 #include <cuda_runtime.h>
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 #define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
 #if __CUDA_ARCH__ < 350
 	// Kepler (Compute 3.0)
 	#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
 #else
 	// Kepler (Compute 3.5)
 	#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
 #endif
 /* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
 /*
 * Shabal implementation.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010 Projet RNRT SAPHIR
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @author Thomas Pornin <thomas.pornin@cryptolog.com>
 */
 /*
 * Part of this code was automatically generated (the part between
 * the "BEGIN" and "END" markers).
 */
 #define sM    16
 #define C32   SPH_C32
 #define T32   SPH_T32
 #define O1   13
 #define O2    9
 #define O3    6
 /*
 * We copy the state into local variables, so that the compiler knows
 * that it can optimize them at will.
 */
 /* BEGIN -- automatically generated code. */
 #define INPUT_BLOCK_ADD   do { \
 		B0 = T32(B0 + M0); \
 		B1 = T32(B1 + M1); \
 		B2 = T32(B2 + M2); \
 		B3 = T32(B3 + M3); \
 		B4 = T32(B4 + M4); \
 		B5 = T32(B5 + M5); \
 		B6 = T32(B6 + M6); \
 		B7 = T32(B7 + M7); \
 		B8 = T32(B8 + M8); \
 		B9 = T32(B9 + M9); \
 		BA = T32(BA + MA); \
 		BB = T32(BB + MB); \
 		BC = T32(BC + MC); \
 		BD = T32(BD + MD); \
 		BE = T32(BE + ME); \
 		BF = T32(BF + MF); \
 	} while (0)
 #define INPUT_BLOCK_SUB   do { \
 		C0 = T32(C0 - M0); \
 		C1 = T32(C1 - M1); \
 		C2 = T32(C2 - M2); \
 		C3 = T32(C3 - M3); \
 		C4 = T32(C4 - M4); \
 		C5 = T32(C5 - M5); \
 		C6 = T32(C6 - M6); \
 		C7 = T32(C7 - M7); \
 		C8 = T32(C8 - M8); \
 		C9 = T32(C9 - M9); \
 		CA = T32(CA - MA); \
 		CB = T32(CB - MB); \
 		CC = T32(CC - MC); \
 		CD = T32(CD - MD); \
 		CE = T32(CE - ME); \
 		CF = T32(CF - MF); \
 	} while (0)
 #define XOR_W   do { \
 		A00 ^= Wlow; \
 		A01 ^= Whigh; \
 	} while (0)
 #define SWAP(v1, v2)   do { \
 		uint32_t tmp = (v1); \
 		(v1) = (v2); \
 		(v2) = tmp; \
 	} while (0)
 #define SWAP_BC   do { \
 		SWAP(B0, C0); \
 		SWAP(B1, C1); \
 		SWAP(B2, C2); \
 		SWAP(B3, C3); \
 		SWAP(B4, C4); \
 		SWAP(B5, C5); \
 		SWAP(B6, C6); \
 		SWAP(B7, C7); \
 		SWAP(B8, C8); \
 		SWAP(B9, C9); \
 		SWAP(BA, CA); \
 		SWAP(BB, CB); \
 		SWAP(BC, CC); \
 		SWAP(BD, CD); \
 		SWAP(BE, CE); \
 		SWAP(BF, CF); \
 	} while (0)
 #define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
 		xa0 = T32((xa0 \
 			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
 			^ xc) * 3U) \
 			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
 		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
 	} while (0)
 #define PERM_STEP_0   do { \
 		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define PERM_STEP_1   do { \
 		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define PERM_STEP_2   do { \
 		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
 		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
 		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
 		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
 		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
 		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
 		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
 		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
 		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
 		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
 		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
 		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
 		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
 		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
 		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
 		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
 	} while (0)
 #define APPLY_P   do { \
 		B0 = T32(B0 << 17) | (B0 >> 15); \
 		B1 = T32(B1 << 17) | (B1 >> 15); \
 		B2 = T32(B2 << 17) | (B2 >> 15); \
 		B3 = T32(B3 << 17) | (B3 >> 15); \
 		B4 = T32(B4 << 17) | (B4 >> 15); \
 		B5 = T32(B5 << 17) | (B5 >> 15); \
 		B6 = T32(B6 << 17) | (B6 >> 15); \
 		B7 = T32(B7 << 17) | (B7 >> 15); \
 		B8 = T32(B8 << 17) | (B8 >> 15); \
 		B9 = T32(B9 << 17) | (B9 >> 15); \
 		BA = T32(BA << 17) | (BA >> 15); \
 		BB = T32(BB << 17) | (BB >> 15); \
 		BC = T32(BC << 17) | (BC >> 15); \
 		BD = T32(BD << 17) | (BD >> 15); \
 		BE = T32(BE << 17) | (BE >> 15); \
 		BF = T32(BF << 17) | (BF >> 15); \
 		PERM_STEP_0; \
 		PERM_STEP_1; \
 		PERM_STEP_2; \
 		A0B = T32(A0B + C6); \
 		A0A = T32(A0A + C5); \
 		A09 = T32(A09 + C4); \
 		A08 = T32(A08 + C3); \
 		A07 = T32(A07 + C2); \
 		A06 = T32(A06 + C1); \
 		A05 = T32(A05 + C0); \
 		A04 = T32(A04 + CF); \
 		A03 = T32(A03 + CE); \
 		A02 = T32(A02 + CD); \
 		A01 = T32(A01 + CC); \
 		A00 = T32(A00 + CB); \
 		A0B = T32(A0B + CA); \
 		A0A = T32(A0A + C9); \
 		A09 = T32(A09 + C8); \
 		A08 = T32(A08 + C7); \
 		A07 = T32(A07 + C6); \
 		A06 = T32(A06 + C5); \
 		A05 = T32(A05 + C4); \
 		A04 = T32(A04 + C3); \
 		A03 = T32(A03 + C2); \
 		A02 = T32(A02 + C1); \
 		A01 = T32(A01 + C0); \
 		A00 = T32(A00 + CF); \
 		A0B = T32(A0B + CE); \
 		A0A = T32(A0A + CD); \
 		A09 = T32(A09 + CC); \
 		A08 = T32(A08 + CB); \
 		A07 = T32(A07 + CA); \
 		A06 = T32(A06 + C9); \
 		A05 = T32(A05 + C8); \
 		A04 = T32(A04 + C7); \
 		A03 = T32(A03 + C6); \
 		A02 = T32(A02 + C5); \
 		A01 = T32(A01 + C4); \
 		A00 = T32(A00 + C3); \
 	} while (0)
 #define INCR_W   do { \
 		if ((Wlow = T32(Wlow + 1)) == 0) \
 			Whigh = T32(Whigh + 1); \
 	} while (0)
 #if 0 /* other hash sizes init */
 static const uint32_t A_init_192[] = {
 	C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
 	C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
 	C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
 };
 static const uint32_t B_init_192[] = {
 	C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
 	C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
 	C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
 	C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
 };
 static const uint32_t C_init_192[] = {
 	C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
 	C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
 	C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
 	C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
 };
 static const uint32_t A_init_224[] = {
 	C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
 	C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
 	C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
 };
 static const uint32_t B_init_224[] = {
 	C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
 	C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
 	C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
 	C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
 };
 static const uint32_t C_init_224[] = {
 	C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
 	C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
 	C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
 	C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
 };
 static const uint32_t A_init_256[] = {
 	C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
 	C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
 	C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
 };
 static const uint32_t B_init_256[] = {
 	C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
 	C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
 	C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
 	C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
 };
 static const uint32_t C_init_256[] = {
 	C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
 	C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
 	C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
 	C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
 };
 static const uint32_t A_init_384[] = {
 	C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
 	C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
 	C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
 };
 static const uint32_t B_init_384[] = {
 	C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
 	C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
 	C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
 	C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
 };
 static const uint32_t C_init_384[] = {
 	C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
 	C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
 	C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
 	C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
 };
 #endif
 __device__
 static const uint32_t d_A512[] = {
 	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
 	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
 	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
 };
 __device__
 static const uint32_t d_B512[] = {
 	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
 	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
 	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
 	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
 };
 __device__
 static const uint32_t d_C512[] = {
 	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
 	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
 	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
 	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
 };
 /***************************************************/
 // GPU Hash Function
 __global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
 {
 	__syncthreads();
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
 		uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
 		int hashPosition = nounce - startNounce;
 		uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3]; // [hashPosition * 8]
 		uint32_t A00 = d_A512[0], A01 = d_A512[1], A02 = d_A512[2], A03 = d_A512[3],
 			A04 = d_A512[4], A05 = d_A512[5], A06 = d_A512[6], A07 = d_A512[7],
 			A08 = d_A512[8], A09 = d_A512[9], A0A = d_A512[10], A0B = d_A512[11];
 		uint32_t B0 = d_B512[0], B1 = d_B512[1], B2 = d_B512[2], B3 = d_B512[3],
 			B4 = d_B512[4], B5 = d_B512[5], B6 = d_B512[6], B7 = d_B512[7],
 			B8 = d_B512[8], B9 = d_B512[9], BA = d_B512[10], BB = d_B512[11],
 			BC = d_B512[12], BD = d_B512[13], BE = d_B512[14], BF = d_B512[15];
 		uint32_t C0 = d_C512[0], C1 = d_C512[1], C2 = d_C512[2], C3 = d_C512[3],
 			C4 = d_C512[4], C5 = d_C512[5], C6 = d_C512[6], C7 = d_C512[7],
 			C8 = d_C512[8], C9 = d_C512[9], CA = d_C512[10], CB = d_C512[11],
 			CC = d_C512[12], CD = d_C512[13], CE = d_C512[14], CF = d_C512[15];
 		uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
 		uint32_t Wlow = 1, Whigh = 0;
 		M0 = Hash[0];
 		M1 = Hash[1];
 		M2 = Hash[2];
 		M3 = Hash[3];
 		M4 = Hash[4];
 		M5 = Hash[5];
 		M6 = Hash[6];
 		M7 = Hash[7];
 		M8 = Hash[8];
 		M9 = Hash[9];
 		MA = Hash[10];
 		MB = Hash[11];
 		MC = Hash[12];
 		MD = Hash[13];
 		ME = Hash[14];
 		MF = Hash[15];
 		INPUT_BLOCK_ADD;
 		XOR_W;
 		APPLY_P;
 		INPUT_BLOCK_SUB;
 		SWAP_BC;
 		INCR_W;
 		M0 = 0x80;
 		M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0;
 		INPUT_BLOCK_ADD;
 		XOR_W;
 		APPLY_P;
 		for (uint8_t i = 0; i < 3; i ++)
 		{
 			SWAP_BC;
 			XOR_W;
 			APPLY_P;
 		}
 		Hash[0] = B0;
 		Hash[1] = B1;
 		Hash[2] = B2;
 		Hash[3] = B3;
 		Hash[4] = B4;
 		Hash[5] = B5;
 		Hash[6] = B6;
 		Hash[7] = B7;
 		Hash[8] = B8;
 		Hash[9] = B9;
 		Hash[10] = BA;
 		Hash[11] = BB;
 		Hash[12] = BC;
 		Hash[13] = BD;
 		Hash[14] = BE;
 		Hash[15] = BF;
 		//result = (Hash[3] <= target);
 		uint32_t *outpHash = (uint32_t*)&g_hash[hashPosition << 3]; // [8 * hashPosition];
 		for (int i = 0; i < 16; i++)
 			outpHash[i] = Hash[i];
 	}
 }
 __host__ void x14_shabal512_cpu_init(int thr_id, int threads)
 {
 }
 #include <stdio.h>
 __host__ void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
 {
 	const int threadsperblock = 192;
 	// berechne wie viele Thread Blocks wir brauchen
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 	size_t shared_size = 0;
 	// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
 	x14_shabal512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
 	MyStreamSynchronize(NULL, order, thr_id);
 }
--- a/x15/cuda_x15_whirlpool.cu
+++ b/x15/cuda_x15_whirlpool.cu
--- a/x15/x14.cu
+++ b/x15/x14.cu
@ -0,0 +1,259 @@
 /*
 * X14 algorithm
 * Added in ccminer by Tanguy Pruvot - 2014
 */
 extern "C" {
 #include "sph/sph_blake.h"
 #include "sph/sph_bmw.h"
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
 #include "sph/sph_luffa.h"
 #include "sph/sph_cubehash.h"
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
 #include "sph/sph_hamsi.h"
 #include "sph/sph_fugue.h"
 #include "sph/sph_shabal.h"
 #include "miner.h"
 }
 #include <stdint.h>
 #include <cuda_runtime.h>
 // from cpu-miner.c
 extern int device_map[8];
 // Memory for the hash functions
 static uint32_t *d_hash[8];
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
 extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
 extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_bmw512_cpu_init(int thr_id, int threads);
 extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_groestl512_cpu_init(int thr_id, int threads);
 extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_skein512_cpu_init(int thr_id, int threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_keccak512_cpu_init(int thr_id, int threads);
 extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_jh512_cpu_init(int thr_id, int threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_luffa512_cpu_init(int thr_id, int threads);
 extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_cubehash512_cpu_init(int thr_id, int threads);
 extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_shavite512_cpu_init(int thr_id, int threads);
 extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_simd512_cpu_init(int thr_id, int threads);
 extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_echo512_cpu_init(int thr_id, int threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x13_hamsi512_cpu_init(int thr_id, int threads);
 extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x13_fugue512_cpu_init(int thr_id, int threads);
 extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x14_shabal512_cpu_init(int thr_id, int threads);
 extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_check_cpu_init(int thr_id, int threads);
 extern void quark_check_cpu_setTarget(const void *ptarget);
 extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 // X14 CPU Hash function
 extern "C" void x14hash(void *output, const void *input)
 {
 	unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
 	#define hashB hash+64
 	memset(hash, 0, sizeof hash);
 	sph_blake512_context ctx_blake;
 	sph_bmw512_context ctx_bmw;
 	sph_groestl512_context ctx_groestl;
 	sph_jh512_context ctx_jh;
 	sph_keccak512_context ctx_keccak;
 	sph_skein512_context ctx_skein;
 	sph_luffa512_context ctx_luffa;
 	sph_cubehash512_context ctx_cubehash;
 	sph_shavite512_context ctx_shavite;
 	sph_simd512_context ctx_simd;
 	sph_echo512_context ctx_echo;
 	sph_hamsi512_context ctx_hamsi;
 	sph_fugue512_context ctx_fugue;
 	sph_shabal512_context ctx_shabal;
 	sph_blake512_init(&ctx_blake);
 	sph_blake512(&ctx_blake, input, 80);
 	sph_blake512_close(&ctx_blake, hash);
 	sph_bmw512_init(&ctx_bmw);
 	sph_bmw512(&ctx_bmw, hash, 64);
 	sph_bmw512_close(&ctx_bmw, hashB);
 	sph_groestl512_init(&ctx_groestl);
 	sph_groestl512(&ctx_groestl, hashB, 64);
 	sph_groestl512_close(&ctx_groestl, hash);
 	sph_skein512_init(&ctx_skein);
 	sph_skein512(&ctx_skein, hash, 64);
 	sph_skein512_close(&ctx_skein, hashB);
 	sph_jh512_init(&ctx_jh);
 	sph_jh512(&ctx_jh, hashB, 64);
 	sph_jh512_close(&ctx_jh, hash);
 	sph_keccak512_init(&ctx_keccak);
 	sph_keccak512(&ctx_keccak, hash, 64);
 	sph_keccak512_close(&ctx_keccak, hashB);
 	sph_luffa512_init(&ctx_luffa);
 	sph_luffa512(&ctx_luffa, hashB, 64);
 	sph_luffa512_close(&ctx_luffa, hash);
 	sph_cubehash512_init(&ctx_cubehash);
 	sph_cubehash512(&ctx_cubehash, hash, 64);
 	sph_cubehash512_close(&ctx_cubehash, hashB);
 	sph_shavite512_init(&ctx_shavite);
 	sph_shavite512(&ctx_shavite, hashB, 64);
 	sph_shavite512_close(&ctx_shavite, hash);
 	sph_simd512_init(&ctx_simd);
 	sph_simd512(&ctx_simd, hash, 64);
 	sph_simd512_close(&ctx_simd, hashB);
 	sph_echo512_init(&ctx_echo);
 	sph_echo512(&ctx_echo, hashB, 64);
 	sph_echo512_close(&ctx_echo, hash);
 	sph_hamsi512_init(&ctx_hamsi);
 	sph_hamsi512(&ctx_hamsi, hash, 64);
 	sph_hamsi512_close(&ctx_hamsi, hashB);
 	sph_fugue512_init(&ctx_fugue);
 	sph_fugue512(&ctx_fugue, hashB, 64);
 	sph_fugue512_close(&ctx_fugue, hash);
 	sph_shabal512_init(&ctx_shabal);
 	sph_shabal512(&ctx_shabal, hash, 64);
 	sph_shabal512_close(&ctx_shabal, hash);
 	memcpy(output, hash, 32);
 }
 extern bool opt_benchmark;
 extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	const int throughput = 256*256*8;
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	uint32_t Htarg = ptarget[7];
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0xff;
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 		quark_bmw512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		x11_luffa512_cpu_init(thr_id, throughput);
 		x11_cubehash512_cpu_init(thr_id, throughput);
 		x11_shavite512_cpu_init(thr_id, throughput);
 		x11_simd512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		x13_hamsi512_cpu_init(thr_id, throughput);
 		x13_fugue512_cpu_init(thr_id, throughput);
 		x14_shabal512_cpu_init(thr_id, throughput);
 		quark_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 	for (int k = 0; k < 20; k++)
 		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
 	quark_blake512_cpu_setBlock_80((void*)endiandata);
 	quark_check_cpu_setTarget(ptarget);
 	do {
 		int order = 0;
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		if (foundNonce != 0xffffffff)
 		{
 			/* check now with the CPU to confirm */
 			uint32_t vhash64[8];
 			be32enc(&endiandata[19], foundNonce);
 			x14hash(vhash64, endiandata);
 			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
 				pdata[19] = foundNonce;
 				*hashes_done = foundNonce - first_nonce + 1;
 				return 1;
 			}
 			else if (vhash64[7] > Htarg) {
 				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
 			}
 			else {
 				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}
 		pdata[19] += throughput;
 	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
--- a/x15/x15.cu
+++ b/x15/x15.cu
@ -0,0 +1,295 @@
 /*
 * X15 algorithm (CHC, BBC, X15C)
 * Added in ccminer by Tanguy Pruvot - 2014
 */
 extern "C" {
 #include "sph/sph_blake.h"
 #include "sph/sph_bmw.h"
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
 #include "sph/sph_luffa.h"
 #include "sph/sph_cubehash.h"
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
 #include "sph/sph_hamsi.h"
 #include "sph/sph_fugue.h"
 #include "sph/sph_shabal.h"
 #include "sph/sph_whirlpool.h"
 #include "miner.h"
 }
 #include <stdint.h>
 #include <cuda_runtime.h>
 // to test gpu hash on a null buffer
 #define NULLTEST 0
 // from cpu-miner.c
 extern int device_map[8];
 // Memory for the hash functions
 static uint32_t *d_hash[8];
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
 extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
 extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_bmw512_cpu_init(int thr_id, int threads);
 extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_groestl512_cpu_init(int thr_id, int threads);
 extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_skein512_cpu_init(int thr_id, int threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_keccak512_cpu_init(int thr_id, int threads);
 extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_jh512_cpu_init(int thr_id, int threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_luffa512_cpu_init(int thr_id, int threads);
 extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_cubehash512_cpu_init(int thr_id, int threads);
 extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_shavite512_cpu_init(int thr_id, int threads);
 extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_simd512_cpu_init(int thr_id, int threads);
 extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_echo512_cpu_init(int thr_id, int threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x13_hamsi512_cpu_init(int thr_id, int threads);
 extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x13_fugue512_cpu_init(int thr_id, int threads);
 extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x14_shabal512_cpu_init(int thr_id, int threads);
 extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x15_whirlpool_cpu_init(int thr_id, int threads);
 extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_check_cpu_init(int thr_id, int threads);
 extern void quark_check_cpu_setTarget(const void *ptarget);
 extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 // X15 CPU Hash function
 extern "C" void x15hash(void *output, const void *input)
 {
 	sph_blake512_context     ctx_blake;
 	sph_bmw512_context       ctx_bmw;
 	sph_groestl512_context   ctx_groestl;
 	sph_jh512_context        ctx_jh;
 	sph_keccak512_context    ctx_keccak;
 	sph_skein512_context     ctx_skein;
 	sph_luffa512_context     ctx_luffa;
 	sph_cubehash512_context  ctx_cubehash;
 	sph_shavite512_context   ctx_shavite;
 	sph_simd512_context      ctx_simd;
 	sph_echo512_context      ctx_echo;
 	sph_hamsi512_context     ctx_hamsi;
 	sph_fugue512_context     ctx_fugue;
 	sph_shabal512_context    ctx_shabal;
 	sph_whirlpool_context    ctx_whirlpool;
 	unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
 	#define hashB hash+64
 	memset(hash, 0, sizeof hash);
 	sph_blake512_init(&ctx_blake);
 	sph_blake512(&ctx_blake, input, 80);
 	sph_blake512_close(&ctx_blake, hash);
 	sph_bmw512_init(&ctx_bmw);
 	sph_bmw512(&ctx_bmw, hash, 64);
 	sph_bmw512_close(&ctx_bmw, hashB);
 	sph_groestl512_init(&ctx_groestl);
 	sph_groestl512(&ctx_groestl, hashB, 64);
 	sph_groestl512_close(&ctx_groestl, hash);
 	sph_skein512_init(&ctx_skein);
 	sph_skein512(&ctx_skein, hash, 64);
 	sph_skein512_close(&ctx_skein, hashB);
 	sph_jh512_init(&ctx_jh);
 	sph_jh512(&ctx_jh, hashB, 64);
 	sph_jh512_close(&ctx_jh, hash);
 	sph_keccak512_init(&ctx_keccak);
 	sph_keccak512(&ctx_keccak, hash, 64);
 	sph_keccak512_close(&ctx_keccak, hashB);
 	sph_luffa512_init(&ctx_luffa);
 	sph_luffa512(&ctx_luffa, hashB, 64);
 	sph_luffa512_close(&ctx_luffa, hash);
 	sph_cubehash512_init(&ctx_cubehash);
 	sph_cubehash512(&ctx_cubehash, hash, 64);
 	sph_cubehash512_close(&ctx_cubehash, hashB);
 	sph_shavite512_init(&ctx_shavite);
 	sph_shavite512(&ctx_shavite, hashB, 64);
 	sph_shavite512_close(&ctx_shavite, hash);
 	sph_simd512_init(&ctx_simd);
 	sph_simd512(&ctx_simd, hash, 64);
 	sph_simd512_close(&ctx_simd, hashB);
 	sph_echo512_init(&ctx_echo);
 	sph_echo512(&ctx_echo, hashB, 64);
 	sph_echo512_close(&ctx_echo, hash);
 	sph_hamsi512_init(&ctx_hamsi);
 	sph_hamsi512(&ctx_hamsi, hash, 64);
 	sph_hamsi512_close(&ctx_hamsi, hashB);
 	sph_fugue512_init(&ctx_fugue);
 	sph_fugue512(&ctx_fugue, hashB, 64);
 	sph_fugue512_close(&ctx_fugue, hash);
 	sph_shabal512_init(&ctx_shabal);
 	sph_shabal512(&ctx_shabal, hash, 64);
 	sph_shabal512_close(&ctx_shabal, hashB);
 	sph_whirlpool_init(&ctx_whirlpool);
 	sph_whirlpool(&ctx_whirlpool, hashB, 64);
 	sph_whirlpool_close(&ctx_whirlpool, hash);
 	memcpy(output, hash, 32);
 }
 extern bool opt_benchmark;
 extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	const int throughput = 256*256*8;
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	uint32_t Htarg = ptarget[7];
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 		quark_bmw512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		x11_luffa512_cpu_init(thr_id, throughput);
 		x11_cubehash512_cpu_init(thr_id, throughput);
 		x11_shavite512_cpu_init(thr_id, throughput);
 		x11_simd512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		x13_hamsi512_cpu_init(thr_id, throughput);
 		x13_fugue512_cpu_init(thr_id, throughput);
 		x14_shabal512_cpu_init(thr_id, throughput);
 		x15_whirlpool_cpu_init(thr_id, throughput);
 		quark_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 	for (int k=0; k < 20; k++)
 #if NULLTEST
 		endiandata[k] = 0;
 #else
 		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
 #endif
 	quark_blake512_cpu_setBlock_80((void*)endiandata);
 	quark_check_cpu_setTarget(ptarget);
 	do {
 		int order = 0;
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		/* Scan with GPU */
 		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 #if NULLTEST
 		uint32_t buf[16]; memset(buf, 0, sizeof(buf));
 		cudaMemcpy(buf, d_hash[thr_id], 16 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
 		MyStreamSynchronize(NULL, order, thr_id);
 		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[0], buf[1], buf[2], buf[3]);
 		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[4], buf[5], buf[6], buf[7]);
 		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[8], buf[9], buf[10], buf[11]);
 		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[12], buf[13], buf[14], buf[15]);
 		return 0;
 #endif
 		if (foundNonce != 0xffffffff)
 		{
 			/* check now with the CPU to confirm */
 			uint32_t vhash64[8];
 			be32enc(&endiandata[19], foundNonce);
 			x15hash(vhash64, endiandata);
 			if ((vhash64[7] <= Htarg) /* && fulltest(vhash64, ptarget) */) {
 				pdata[19] = foundNonce;
 				*hashes_done = foundNonce - first_nonce + 1;
 				applog(LOG_INFO, "GPU #%d: result for nonce $%08X is in wanted range, %x <= %x", thr_id, foundNonce, vhash64[7], Htarg);
 				return 1;
 			}
 			else if (vhash64[7] > Htarg) {
 				applog(LOG_NOTICE, "Hash0 %08x %08x %08x %08x", vhash64[0], vhash64[1], vhash64[2], vhash64[3]);
 				applog(LOG_NOTICE, "Hash1 %08x %08x %08x %08x", vhash64[4], vhash64[5], vhash64[6], vhash64[7]);
 				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x",
 					thr_id, foundNonce, vhash64[7], Htarg);
 			}
 			else {
 				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}
 		pdata[19] += throughput;
 	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }