Implement x14 (cuda + cpu functions)

Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
2025-02-02 18:04:22 +00:00 · 2014-08-08 08:06:06 +02:00 · 2014-08-08 08:06:06 +02:00 · 06763c20b1
commit 06763c20b1
parent df840b7dfb
32 changed files with 8152 additions and 409 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -29,16 +29,18 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
 			  myriadgroestl.cpp cuda_myriadgroestl.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
-			  JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \
+			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
 			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
 			  quark/cuda_quark_compactionTest.cu \
 			  cuda_nist5.cu \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
+			  sph/shabal.c sph/whirlpool.c \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
+			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu

 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
--- a/README.md
+++ b/README.md
@ -2,3 +2,6 @@ ccminer
 =======

 Christian Buchner's &amp; Christian H.'s CUDA miner project
+
+Fork by tpruvot@github with X14 support
+   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
--- a/README.txt
+++ b/README.txt
@ -19,6 +19,10 @@ If you find this tool useful and like to support its continued
   SFR donation address: SR4b87aEnPfTs77bo9NnnaV21fiF6jQpAp
   MNC donation address: MShgNUSYwybEbXLvJUtdNg1a7rUeiNgooK
   BTQ donation address: 13GFwLiZL2DaA9XeE733PNrQX5QYLFsonS
+
+X14/X15 (tpruvot@github)
+   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
+
 ***************************************************************

 >>> Introduction <<<
@ -88,6 +92,7 @@ its command line interface and options.
  -P, --protocol-dump   verbose dump of protocol-level activities
  -B, --background      run the miner in the background
      --benchmark       run in offline benchmark mode
+      --cputest         debug hashes from cpu algorithms
  -c, --config=FILE     load a JSON-format configuration file
  -V, --version         display version information and exit
  -h, --help            display this help text and exit
--- a/ccminer.sln
+++ b/ccminer.sln
@ -1,6 +1,7 @@
-
-Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual Studio 2010
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.30723.0
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ccminer", "ccminer.vcxproj", "{36DC07F9-A4A6-4877-A146-1B960083CF6F}"
 EndProject
 Global
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -1,53 +1,61 @@
 <?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{36DC07F9-A4A6-4877-A146-1B960083CF6F}</ProjectGuid>
    <RootNamespace>ccminer</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+    <CLRSupport>false</CLRSupport>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+    <CLRSupport>false</CLRSupport>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+    <CLRSupport>false</CLRSupport>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -64,6 +72,8 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(CUDA_INC_PATH);$(IncludePath)</IncludePath>
+    <LibraryPath>$(CUDA_LIB_PATH);$(LibraryPath)</LibraryPath>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
@ -92,17 +102,15 @@
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
    </CudaCompile>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
-      <Include>
-      </Include>
+      <Keep>false</Keep>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
    </CudaCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -123,7 +131,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
@ -142,7 +150,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;MAXWELL_OR_FERMI=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
@ -158,18 +166,23 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
    </CudaCompile>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
-      <Include>
-      </Include>
+      <Keep>false</Keep>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
+      <Defines>--optimize 2</Defines>
    </CudaCompile>
+    <CudaLink>
+      <GPUDebugInfo>false</GPUDebugInfo>
+    </CudaLink>
+    <CudaLink>
+      <Optimization>O2</Optimization>
+    </CudaLink>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
@ -193,7 +206,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    </PostBuildEvent>
    <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
    </CudaCompile>
    <CudaCompile>
      <MaxRegCount>80</MaxRegCount>
@ -201,7 +214,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <CudaCompile>
      <PtxAsOptionV>true</PtxAsOptionV>
      <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_50,sm_50</CodeGeneration>
      <Include>
      </Include>
    </CudaCompile>
@ -216,10 +229,12 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="compat\jansson\utf.c" />
    <ClCompile Include="compat\jansson\value.c" />
    <ClCompile Include="cpu-miner.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <TreatWChar_tAsBuiltInType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</TreatWChar_tAsBuiltInType>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Full</Optimization>
    </ClCompile>
    <ClCompile Include="fuguecoin.cpp" />
    <ClCompile Include="groestlcoin.cpp" />
@ -227,10 +242,10 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="myriadgroestl.cpp" />
    <ClCompile Include="scrypt.c" />
    <ClCompile Include="sha2.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
    <ClCompile Include="sph\aes_helper.c" />
    <ClCompile Include="sph\blake.c" />
@ -242,16 +257,21 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="sph\jh.c" />
    <ClCompile Include="sph\keccak.c" />
    <ClCompile Include="sph\luffa.c" />
+    <ClCompile Include="sph\shabal.c" />
    <ClCompile Include="sph\shavite.c" />
    <ClCompile Include="sph\simd.c" />
    <ClCompile Include="sph\skein.c" />
    <ClCompile Include="sph\hamsi.c" />
    <ClCompile Include="sph\hamsi_helper.c" />
+    <ClCompile Include="sph\whirlpool.c" />
+    <ClCompile Include="sph\x15_helper.c">
+      <ExcludedFromBuild>true</ExcludedFromBuild>
+    </ClCompile>
    <ClCompile Include="util.c">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/Tp %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/Tp %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
@ -277,241 +297,263 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClInclude Include="sph\sph_bmw.h" />
    <ClInclude Include="sph\sph_cubehash.h" />
    <ClInclude Include="sph\sph_echo.h" />
+    <ClInclude Include="sph\sph_fugue.h" />
    <ClInclude Include="sph\sph_groestl.h" />
    <ClInclude Include="sph\sph_jh.h" />
    <ClInclude Include="sph\sph_keccak.h" />
    <ClInclude Include="sph\sph_luffa.h" />
+    <ClInclude Include="sph\sph_shabal.h" />
    <ClInclude Include="sph\sph_shavite.h" />
    <ClInclude Include="sph\sph_simd.h" />
    <ClInclude Include="sph\sph_skein.h" />
    <ClInclude Include="sph\sph_hamsi.h" />
    <ClInclude Include="sph\sph_types.h" />
+    <ClInclude Include="sph\sph_whirlpool.h" />
    <ClInclude Include="uint256.h" />
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda_fugue256.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_groestlcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_myriadgroestl.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="cuda_nist5.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_blake512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_combine.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_groestl512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_hefty1.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\cuda_sha256.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="heavy\heavy.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="JHA\jackpotcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\animecoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_bmw512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_jh512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_blake512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
-    <CudaCompile Include="quark\cuda_quark_checkhash.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    <CudaCompile Include="quark\cuda_checkhash.cu">
+      <CInterleavedPTX>false</CInterleavedPTX>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_groestl512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\cuda_skein512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="quark\quarkcoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_aes.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild>true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_cubehash512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_echo.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_luffa512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_shavite512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">128</MaxRegCount>
      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">128</MaxRegCount>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\simd_functions.cu">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <ExcludedFromBuild>true</ExcludedFromBuild>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x11\x11.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\cuda_x13_fugue512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
    <CudaCompile Include="x13\x13.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x14_shabal512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x15_whirlpool.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x15\x14.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x15\x15.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
  </ImportGroup>
 </Project>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -58,6 +58,9 @@
    <Filter Include="Source Files\CUDA\x13">
      <UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
    </Filter>
+    <Filter Include="Source Files\CUDA\x15">
+      <UniqueIdentifier>{a2403c22-6777-46ab-a55a-3fcc7386c974}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="compat\jansson\dump.c">
@ -153,6 +156,15 @@
    <ClCompile Include="sph\hamsi_helper.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
+    <ClCompile Include="sph\shabal.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\whirlpool.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\x15_helper.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="compat.h">
@ -254,6 +266,12 @@
    <ClInclude Include="sph\sph_hamsi.h">
      <Filter>Header Files\sph</Filter>
    </ClInclude>
+    <ClInclude Include="sph\sph_shabal.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\sph_whirlpool.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda_fugue256.cu">
@ -268,9 +286,6 @@
    <CudaCompile Include="JHA\jackpotcoin.cu">
      <Filter>Source Files\CUDA\JHA</Filter>
    </CudaCompile>
-    <CudaCompile Include="quark\cuda_quark_checkhash.cu">
-      <Filter>Source Files\CUDA\quark</Filter>
-    </CudaCompile>
    <CudaCompile Include="cuda_myriadgroestl.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
@ -361,5 +376,20 @@
    <CudaCompile Include="x13\x13.cu">
      <Filter>Source Files\CUDA\x13</Filter>
    </CudaCompile>
+    <CudaCompile Include="quark\cuda_checkhash.cu">
+      <Filter>Source Files\CUDA</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\x14.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x14_shabal512.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\x15.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x15_whirlpool.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
  </ItemGroup>
 </Project>
--- a/compat/thrust/system/detail/generic/find.inl
+++ b/compat/thrust/system/detail/generic/find.inl
@ -86,7 +86,7 @@ InputIterator find_if(thrust::execution_policy<DerivedPolicy> &exec,

    // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32
    const difference_type interval_threshold = 1 << 20;
-    const difference_type interval_size = (std::min)(interval_threshold, n);
+    const difference_type interval_size = min(interval_threshold, n);

    // force transform_iterator output to bool
    typedef thrust::transform_iterator<Predicate, InputIterator, bool> XfrmIterator;
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -21,6 +21,7 @@
 #include <time.h>
 #ifdef WIN32
 #include <windows.h>
+#include <stdint.h>
 #else
 #include <errno.h>
 #include <signal.h>
@ -133,6 +134,8 @@ typedef enum {
 	ALGO_NIST5,
 	ALGO_X11,
 	ALGO_X13,
+	ALGO_X14,
+	ALGO_X15,
 	ALGO_DMD_GR,
 } sha256_algos;

@ -148,6 +151,8 @@ static const char *algo_names[] = {
 	"nist5",
 	"x11",
 	"x13",
+	"x14",
+	"x15",
 	"dmd-gr",
 };

@ -222,6 +227,8 @@ Options:\n\
                        nist5     NIST5 (TalkCoin) hash\n\
                        x11       X11 (DarkCoin) hash\n\
                        x13       X13 (MaruCoin) hash\n\
+                        x14       X14 hash\n\
+                        x15       X15 hash\n\
                        dmd-gr    Diamond-Groestl hash\n\
  -d, --devices         takes a comma separated list of CUDA devices to use.\n\
                        Device IDs start counting from 0! Alternatively takes\n\
@ -258,6 +265,7 @@ Options:\n\
 #endif
 "\
      --benchmark       run in offline benchmark mode\n\
+      --cputest         debug hashes from cpu algorithms\n\
  -c, --config=FILE     load a JSON-format configuration file\n\
  -V, --version         display version information and exit\n\
  -h, --help            display this help text and exit\n\
@ -278,6 +286,7 @@ static struct option const options[] = {
 	{ "background", 0, NULL, 'B' },
 #endif
 	{ "benchmark", 0, NULL, 1005 },
+	{ "cputest", 0, NULL, 1006 },
 	{ "cert", 1, NULL, 1001 },
 	{ "config", 1, NULL, 'c' },
 	{ "debug", 0, NULL, 'D' },
@ -924,6 +933,17 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;

+		case ALGO_X14:
+			rc = scanhash_x14(thr_id, work.data, work.target,
+				max_nonce, &hashes_done);
+			break;
+
+		case ALGO_X15:
+			rc = scanhash_x15(thr_id, work.data, work.target,
+				max_nonce, &hashes_done);
+			exit(0);
+			break;
+
 		default:
 			/* should never happen */
 			goto out;
@ -1345,6 +1365,10 @@ static void parse_arg (int key, char *arg)
 		want_stratum = false;
 		have_stratum = false;
 		break;
+	case 1006:
+		print_hash_tests();
+		exit(0);
+		break;
 	case 1003:
 		want_longpoll = false;
 		break;
@ -1481,26 +1505,26 @@ static void signal_handler(int sig)
 }
 #endif

-#define PROGRAM_VERSION "1.2"
+#define PROGRAM_VERSION "1.2-VC12"
 int main(int argc, char *argv[])
 {
 	struct thr_info *thr;
 	long flags;
 	int i;

+	printf("*** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
+	printf("\t This is version "PROGRAM_VERSION" (tpruvot@github)\n");
 #ifdef WIN32
-	SYSTEM_INFO sysinfo;
+	printf("\t  Built with VC++ 2013 and nVidia CUDA SDK 6.5 RC (DC 5.0)\n\n");
+#else
+	printf("\t  Built with the nVidia CUDA SDK 6.5 RC\n\n");
 #endif
-
-	printf("     *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
-	printf("\t             This is version "PROGRAM_VERSION" (beta)\n");
 	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
-	printf("\t  based on pooler-cpuminer extension for HVC from\n\t       https://github.com/heavycoin/cpuminer-heavycoin\n");
-	printf("\t\t\tand\n\t       http://hvc.1gh.com/\n");
+	printf("\t  based on pooler-cpuminer extension for HVC from http://hvc.1gh.com/" "\n\n");
 	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
-	printf("\t  LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n");
 	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
-	printf("\t  YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n");
+	printf("\tCuda X14 and X15 added by Tanguy Pruvot (also in cpuminer-multi)\n");
+	printf("\t  BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");

 	rpc_user = strdup("");
 	rpc_pass = strdup("");
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -152,7 +152,7 @@
 #define PACKAGE_NAME "ccminer"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.06.15"
+#define PACKAGE_STRING "ccminer 2014.08.12"

 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
@ -161,7 +161,7 @@
 #undef PACKAGE_URL

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.06.15"
+#define PACKAGE_VERSION "2014.08.12-VC12"

 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@ -1,10 +1,15 @@
-#include <string.h>
-#include <openssl/sha.h>
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+
 #include <map>

+#include <openssl/sha.h>
+
 #ifndef _WIN32
 #include <unistd.h>
 #endif
@ -337,7 +342,7 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
    blake512_cpu_setBlock(pdata, blocklen);

    do {
-        int i;
+        uint32_t i;

        ////// Compaction init
        thrust::device_ptr<uint32_t> devNoncePtr(d_nonceVector[thr_id]);
--- a/miner.h
+++ b/miner.h
@ -243,6 +243,14 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

+extern int scanhash_x14(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_x15(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
@ -342,6 +350,12 @@ extern void *tq_pop(struct thread_q *tq, const struct timespec *abstime);
 extern void tq_freeze(struct thread_q *tq);
 extern void tq_thaw(struct thread_q *tq);

+void print_hash_tests(void);
+void x11hash(void *output, const void *input);
+void x13hash(void *output, const void *input);
+void x14hash(void *output, const void *input);
+void x15hash(void *output, const void *input);
+
 #ifdef __cplusplus
 }
 #endif
--- a/quark/cuda_bmw512.cu
+++ b/quark/cuda_bmw512.cu
@ -23,6 +23,13 @@ static __device__ uint32_t cuda_swab32(uint32_t x)
 {
 	return __byte_perm(x, 0, 0x0123);
 }
+
+// das Hi Word in einem 64 Bit Typen ersetzen
+static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
+	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
+}
+
+#if 0
 // Endian Drehung für 64 Bit Typen
 static __device__ unsigned long long cuda_swab64(unsigned long long x) {
    uint32_t h = (x >> 32);
@ -39,11 +46,6 @@ static __device__ uint32_t HIWORD(const unsigned long long &x) {
 #endif
 }

-// das Hi Word in einem 64 Bit Typen ersetzen
-static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
-	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
-}
-
 // das Lo Word aus einem 64 Bit Typen extrahieren
 static __device__ uint32_t LOWORD(const unsigned long long &x) {
 #if __CUDA_ARCH__ >= 130
@ -66,6 +68,7 @@ static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 static __device__ unsigned long long REPLACE_LOWORD(const unsigned long long &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((unsigned long long)y);
 }
+#endif

 // der Versuch, einen Wrapper für einen aus 32 Bit Registern zusammengesetzten uin64_t Typen zu entferfen...
 #if 1
--- a/quark/cuda_quark_checkhash.cu
+++ b/quark/cuda_quark_checkhash.cu
@ -3,14 +3,10 @@
 #include "device_launch_parameters.h"

 #include <stdio.h>
+#include <stdint.h>
 #include <memory.h>

-// Folgende Definitionen später durch header ersetzen
-typedef unsigned char uint8_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-
-// das Hash Target gegen das wir testen sollen
+// Hash Target gegen das wir testen sollen
 __constant__ uint32_t pTarget[8];

 uint32_t *d_resNounce[8];
@ -19,7 +15,7 @@ uint32_t *h_resNounce[8];
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

-__global__ void quark_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
+__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
 {
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@ -89,7 +85,7 @@ __host__ uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t star
 	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 0;

-	quark_check_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
+	cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);

 	// Strategisches Sleep Kommando zur Senkung der CPU Last
 	MyStreamSynchronize(NULL, order, thr_id);
--- a/quark/cuda_quark_blake512.cu
+++ b/quark/cuda_quark_blake512.cu
@ -64,11 +64,12 @@ static __device__ uint32_t LOWORD(const uint64_t &x) {
 	return (uint32_t)(x & 0xFFFFFFFFULL);
 #endif
 }
-
+#if 0
 // das Lo Word in einem 64 Bit Typen ersetzen
 static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
 }
+#endif

 __device__ __forceinline__ uint64_t SWAP64(uint64_t x)
 {
--- a/quark/cuda_quark_groestl512.cu
+++ b/quark/cuda_quark_groestl512.cu
@ -1,7 +1,7 @@
 // Auf QuarkCoin spezialisierte Version von Groestl inkl. Bitslice

 #include <cuda.h>
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
 #include "device_launch_parameters.h"

 #include <stdio.h>
--- a/sph/shabal.c
+++ b/sph/shabal.c
@ -0,0 +1,810 @@
+/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
+/*
+ * Shabal implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_shabal.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+/*
+ * Part of this code was automatically generated (the part between
+ * the "BEGIN" and "END" markers).
+ */
+
+#define sM    16
+
+#define C32   SPH_C32
+#define T32   SPH_T32
+
+#define O1   13
+#define O2    9
+#define O3    6
+
+/*
+ * We copy the state into local variables, so that the compiler knows
+ * that it can optimize them at will.
+ */
+
+/* BEGIN -- automatically generated code. */
+
+#define DECL_STATE   \
+	sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
+	        A08, A09, A0A, A0B; \
+	sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
+	        B8, B9, BA, BB, BC, BD, BE, BF; \
+	sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
+	        C8, C9, CA, CB, CC, CD, CE, CF; \
+	sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
+	        M8, M9, MA, MB, MC, MD, ME, MF; \
+	sph_u32 Wlow, Whigh;
+
+#define READ_STATE(state)   do { \
+		A00 = (state)->A[0]; \
+		A01 = (state)->A[1]; \
+		A02 = (state)->A[2]; \
+		A03 = (state)->A[3]; \
+		A04 = (state)->A[4]; \
+		A05 = (state)->A[5]; \
+		A06 = (state)->A[6]; \
+		A07 = (state)->A[7]; \
+		A08 = (state)->A[8]; \
+		A09 = (state)->A[9]; \
+		A0A = (state)->A[10]; \
+		A0B = (state)->A[11]; \
+		B0 = (state)->B[0]; \
+		B1 = (state)->B[1]; \
+		B2 = (state)->B[2]; \
+		B3 = (state)->B[3]; \
+		B4 = (state)->B[4]; \
+		B5 = (state)->B[5]; \
+		B6 = (state)->B[6]; \
+		B7 = (state)->B[7]; \
+		B8 = (state)->B[8]; \
+		B9 = (state)->B[9]; \
+		BA = (state)->B[10]; \
+		BB = (state)->B[11]; \
+		BC = (state)->B[12]; \
+		BD = (state)->B[13]; \
+		BE = (state)->B[14]; \
+		BF = (state)->B[15]; \
+		C0 = (state)->C[0]; \
+		C1 = (state)->C[1]; \
+		C2 = (state)->C[2]; \
+		C3 = (state)->C[3]; \
+		C4 = (state)->C[4]; \
+		C5 = (state)->C[5]; \
+		C6 = (state)->C[6]; \
+		C7 = (state)->C[7]; \
+		C8 = (state)->C[8]; \
+		C9 = (state)->C[9]; \
+		CA = (state)->C[10]; \
+		CB = (state)->C[11]; \
+		CC = (state)->C[12]; \
+		CD = (state)->C[13]; \
+		CE = (state)->C[14]; \
+		CF = (state)->C[15]; \
+		Wlow = (state)->Wlow; \
+		Whigh = (state)->Whigh; \
+	} while (0)
+
+#define WRITE_STATE(state)   do { \
+		(state)->A[0] = A00; \
+		(state)->A[1] = A01; \
+		(state)->A[2] = A02; \
+		(state)->A[3] = A03; \
+		(state)->A[4] = A04; \
+		(state)->A[5] = A05; \
+		(state)->A[6] = A06; \
+		(state)->A[7] = A07; \
+		(state)->A[8] = A08; \
+		(state)->A[9] = A09; \
+		(state)->A[10] = A0A; \
+		(state)->A[11] = A0B; \
+		(state)->B[0] = B0; \
+		(state)->B[1] = B1; \
+		(state)->B[2] = B2; \
+		(state)->B[3] = B3; \
+		(state)->B[4] = B4; \
+		(state)->B[5] = B5; \
+		(state)->B[6] = B6; \
+		(state)->B[7] = B7; \
+		(state)->B[8] = B8; \
+		(state)->B[9] = B9; \
+		(state)->B[10] = BA; \
+		(state)->B[11] = BB; \
+		(state)->B[12] = BC; \
+		(state)->B[13] = BD; \
+		(state)->B[14] = BE; \
+		(state)->B[15] = BF; \
+		(state)->C[0] = C0; \
+		(state)->C[1] = C1; \
+		(state)->C[2] = C2; \
+		(state)->C[3] = C3; \
+		(state)->C[4] = C4; \
+		(state)->C[5] = C5; \
+		(state)->C[6] = C6; \
+		(state)->C[7] = C7; \
+		(state)->C[8] = C8; \
+		(state)->C[9] = C9; \
+		(state)->C[10] = CA; \
+		(state)->C[11] = CB; \
+		(state)->C[12] = CC; \
+		(state)->C[13] = CD; \
+		(state)->C[14] = CE; \
+		(state)->C[15] = CF; \
+		(state)->Wlow = Wlow; \
+		(state)->Whigh = Whigh; \
+	} while (0)
+
+#define DECODE_BLOCK   do { \
+		M0 = sph_dec32le_aligned(buf + 0); \
+		M1 = sph_dec32le_aligned(buf + 4); \
+		M2 = sph_dec32le_aligned(buf + 8); \
+		M3 = sph_dec32le_aligned(buf + 12); \
+		M4 = sph_dec32le_aligned(buf + 16); \
+		M5 = sph_dec32le_aligned(buf + 20); \
+		M6 = sph_dec32le_aligned(buf + 24); \
+		M7 = sph_dec32le_aligned(buf + 28); \
+		M8 = sph_dec32le_aligned(buf + 32); \
+		M9 = sph_dec32le_aligned(buf + 36); \
+		MA = sph_dec32le_aligned(buf + 40); \
+		MB = sph_dec32le_aligned(buf + 44); \
+		MC = sph_dec32le_aligned(buf + 48); \
+		MD = sph_dec32le_aligned(buf + 52); \
+		ME = sph_dec32le_aligned(buf + 56); \
+		MF = sph_dec32le_aligned(buf + 60); \
+	} while (0)
+
+#define INPUT_BLOCK_ADD   do { \
+		B0 = T32(B0 + M0); \
+		B1 = T32(B1 + M1); \
+		B2 = T32(B2 + M2); \
+		B3 = T32(B3 + M3); \
+		B4 = T32(B4 + M4); \
+		B5 = T32(B5 + M5); \
+		B6 = T32(B6 + M6); \
+		B7 = T32(B7 + M7); \
+		B8 = T32(B8 + M8); \
+		B9 = T32(B9 + M9); \
+		BA = T32(BA + MA); \
+		BB = T32(BB + MB); \
+		BC = T32(BC + MC); \
+		BD = T32(BD + MD); \
+		BE = T32(BE + ME); \
+		BF = T32(BF + MF); \
+	} while (0)
+
+#define INPUT_BLOCK_SUB   do { \
+		C0 = T32(C0 - M0); \
+		C1 = T32(C1 - M1); \
+		C2 = T32(C2 - M2); \
+		C3 = T32(C3 - M3); \
+		C4 = T32(C4 - M4); \
+		C5 = T32(C5 - M5); \
+		C6 = T32(C6 - M6); \
+		C7 = T32(C7 - M7); \
+		C8 = T32(C8 - M8); \
+		C9 = T32(C9 - M9); \
+		CA = T32(CA - MA); \
+		CB = T32(CB - MB); \
+		CC = T32(CC - MC); \
+		CD = T32(CD - MD); \
+		CE = T32(CE - ME); \
+		CF = T32(CF - MF); \
+	} while (0)
+
+#define XOR_W   do { \
+		A00 ^= Wlow; \
+		A01 ^= Whigh; \
+	} while (0)
+
+#define SWAP(v1, v2)   do { \
+		sph_u32 tmp = (v1); \
+		(v1) = (v2); \
+		(v2) = tmp; \
+	} while (0)
+
+#define SWAP_BC   do { \
+		SWAP(B0, C0); \
+		SWAP(B1, C1); \
+		SWAP(B2, C2); \
+		SWAP(B3, C3); \
+		SWAP(B4, C4); \
+		SWAP(B5, C5); \
+		SWAP(B6, C6); \
+		SWAP(B7, C7); \
+		SWAP(B8, C8); \
+		SWAP(B9, C9); \
+		SWAP(BA, CA); \
+		SWAP(BB, CB); \
+		SWAP(BC, CC); \
+		SWAP(BD, CD); \
+		SWAP(BE, CE); \
+		SWAP(BF, CF); \
+	} while (0)
+
+#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
+		xa0 = T32((xa0 \
+			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
+			^ xc) * 3U) \
+			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
+		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
+	} while (0)
+
+#define PERM_STEP_0   do { \
+		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_1   do { \
+		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_2   do { \
+		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define APPLY_P   do { \
+		B0 = T32(B0 << 17) | (B0 >> 15); \
+		B1 = T32(B1 << 17) | (B1 >> 15); \
+		B2 = T32(B2 << 17) | (B2 >> 15); \
+		B3 = T32(B3 << 17) | (B3 >> 15); \
+		B4 = T32(B4 << 17) | (B4 >> 15); \
+		B5 = T32(B5 << 17) | (B5 >> 15); \
+		B6 = T32(B6 << 17) | (B6 >> 15); \
+		B7 = T32(B7 << 17) | (B7 >> 15); \
+		B8 = T32(B8 << 17) | (B8 >> 15); \
+		B9 = T32(B9 << 17) | (B9 >> 15); \
+		BA = T32(BA << 17) | (BA >> 15); \
+		BB = T32(BB << 17) | (BB >> 15); \
+		BC = T32(BC << 17) | (BC >> 15); \
+		BD = T32(BD << 17) | (BD >> 15); \
+		BE = T32(BE << 17) | (BE >> 15); \
+		BF = T32(BF << 17) | (BF >> 15); \
+		PERM_STEP_0; \
+		PERM_STEP_1; \
+		PERM_STEP_2; \
+		A0B = T32(A0B + C6); \
+		A0A = T32(A0A + C5); \
+		A09 = T32(A09 + C4); \
+		A08 = T32(A08 + C3); \
+		A07 = T32(A07 + C2); \
+		A06 = T32(A06 + C1); \
+		A05 = T32(A05 + C0); \
+		A04 = T32(A04 + CF); \
+		A03 = T32(A03 + CE); \
+		A02 = T32(A02 + CD); \
+		A01 = T32(A01 + CC); \
+		A00 = T32(A00 + CB); \
+		A0B = T32(A0B + CA); \
+		A0A = T32(A0A + C9); \
+		A09 = T32(A09 + C8); \
+		A08 = T32(A08 + C7); \
+		A07 = T32(A07 + C6); \
+		A06 = T32(A06 + C5); \
+		A05 = T32(A05 + C4); \
+		A04 = T32(A04 + C3); \
+		A03 = T32(A03 + C2); \
+		A02 = T32(A02 + C1); \
+		A01 = T32(A01 + C0); \
+		A00 = T32(A00 + CF); \
+		A0B = T32(A0B + CE); \
+		A0A = T32(A0A + CD); \
+		A09 = T32(A09 + CC); \
+		A08 = T32(A08 + CB); \
+		A07 = T32(A07 + CA); \
+		A06 = T32(A06 + C9); \
+		A05 = T32(A05 + C8); \
+		A04 = T32(A04 + C7); \
+		A03 = T32(A03 + C6); \
+		A02 = T32(A02 + C5); \
+		A01 = T32(A01 + C4); \
+		A00 = T32(A00 + C3); \
+	} while (0)
+
+#define INCR_W   do { \
+		if ((Wlow = T32(Wlow + 1)) == 0) \
+			Whigh = T32(Whigh + 1); \
+	} while (0)
+#if 0
+static const sph_u32 A_init_192[] = {
+	C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
+	C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
+	C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
+};
+
+static const sph_u32 B_init_192[] = {
+	C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
+	C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
+	C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
+	C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
+};
+
+static const sph_u32 C_init_192[] = {
+	C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
+	C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
+	C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
+	C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
+};
+
+static const sph_u32 A_init_224[] = {
+	C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
+	C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
+	C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
+};
+
+static const sph_u32 B_init_224[] = {
+	C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
+	C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
+	C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
+	C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
+};
+
+static const sph_u32 C_init_224[] = {
+	C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
+	C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
+	C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
+	C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
+};
+
+static const sph_u32 A_init_256[] = {
+	C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
+	C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
+	C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
+};
+
+static const sph_u32 B_init_256[] = {
+	C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
+	C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
+	C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
+	C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
+};
+
+static const sph_u32 C_init_256[] = {
+	C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
+	C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
+	C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
+	C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
+};
+
+static const sph_u32 A_init_384[] = {
+	C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
+	C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
+	C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
+};
+
+static const sph_u32 B_init_384[] = {
+	C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
+	C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
+	C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
+	C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
+};
+
+static const sph_u32 C_init_384[] = {
+	C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
+	C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
+	C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
+	C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
+};
+#endif
+static const sph_u32 A_init_512[] = {
+	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
+	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
+	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
+};
+
+static const sph_u32 B_init_512[] = {
+	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
+	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
+	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
+	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
+};
+
+static const sph_u32 C_init_512[] = {
+	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
+	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
+	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
+	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
+};
+
+/* END -- automatically generated code. */
+
+static void
+shabal_init(void *cc, unsigned size)
+{
+	/*
+	 * We have precomputed initial states for all the supported
+	 * output bit lengths.
+	 */
+	const sph_u32 *A_init, *B_init, *C_init;
+	sph_shabal_context *sc;
+
+	switch (size) {
+#if 0
+	case 192:
+		A_init = A_init_192;
+		B_init = B_init_192;
+		C_init = C_init_192;
+		break;
+	case 224:
+		A_init = A_init_224;
+		B_init = B_init_224;
+		C_init = C_init_224;
+		break;
+	case 256:
+		A_init = A_init_256;
+		B_init = B_init_256;
+		C_init = C_init_256;
+		break;
+	case 384:
+		A_init = A_init_384;
+		B_init = B_init_384;
+		C_init = C_init_384;
+		break;
+#endif
+	case 512:
+		A_init = A_init_512;
+		B_init = B_init_512;
+		C_init = C_init_512;
+		break;
+	default:
+		return;
+	}
+	sc = cc;
+	memcpy(sc->A, A_init, sizeof sc->A);
+	memcpy(sc->B, B_init, sizeof sc->B);
+	memcpy(sc->C, C_init, sizeof sc->C);
+	sc->Wlow = 1;
+	sc->Whigh = 0;
+	sc->ptr = 0;
+}
+
+static void
+shabal_core(void *cc, const unsigned char *data, size_t len)
+{
+	sph_shabal_context *sc;
+	unsigned char *buf;
+	size_t ptr;
+	DECL_STATE
+
+	sc = cc;
+	buf = sc->buf;
+	ptr = sc->ptr;
+
+	/*
+	 * We do not want to copy the state to local variables if the
+	 * amount of data is less than what is needed to complete the
+	 * current block. Note that it is anyway suboptimal to call
+	 * this method many times for small chunks of data.
+	 */
+	if (len < (sizeof sc->buf) - ptr) {
+		memcpy(buf + ptr, data, len);
+		ptr += len;
+		sc->ptr = ptr;
+		return;
+	}
+
+	READ_STATE(sc);
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		ptr += clen;
+		data += clen;
+		len -= clen;
+		if (ptr == sizeof sc->buf) {
+			DECODE_BLOCK;
+			INPUT_BLOCK_ADD;
+			XOR_W;
+			APPLY_P;
+			INPUT_BLOCK_SUB;
+			SWAP_BC;
+			INCR_W;
+			ptr = 0;
+		}
+	}
+	WRITE_STATE(sc);
+	sc->ptr = ptr;
+}
+
+static void
+shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
+{
+	sph_shabal_context *sc;
+	unsigned char *buf;
+	size_t ptr;
+	int i;
+	unsigned z;
+	union {
+		unsigned char tmp_out[64];
+		sph_u32 dummy;
+	} u;
+	size_t out_len;
+	DECL_STATE
+
+	sc = cc;
+	buf = sc->buf;
+	ptr = sc->ptr;
+	z = 0x80 >> n;
+	buf[ptr] = ((ub & -z) | z) & 0xFF;
+	memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
+	READ_STATE(sc);
+	DECODE_BLOCK;
+	INPUT_BLOCK_ADD;
+	XOR_W;
+	APPLY_P;
+	for (i = 0; i < 3; i ++) {
+		SWAP_BC;
+		XOR_W;
+		APPLY_P;
+	}
+
+	/*
+	 * We just use our local variables; no need to go through
+	 * the state structure. In order to share some code, we
+	 * emit the relevant words into a temporary buffer, which
+	 * we finally copy into the destination array.
+	 */
+	switch (size_words) {
+	case 16:
+		sph_enc32le_aligned(u.tmp_out +  0, B0);
+		sph_enc32le_aligned(u.tmp_out +  4, B1);
+		sph_enc32le_aligned(u.tmp_out +  8, B2);
+		sph_enc32le_aligned(u.tmp_out + 12, B3);
+		/* fall through */
+	case 12:
+		sph_enc32le_aligned(u.tmp_out + 16, B4);
+		sph_enc32le_aligned(u.tmp_out + 20, B5);
+		sph_enc32le_aligned(u.tmp_out + 24, B6);
+		sph_enc32le_aligned(u.tmp_out + 28, B7);
+		/* fall through */
+	case 8:
+		sph_enc32le_aligned(u.tmp_out + 32, B8);
+		/* fall through */
+	case 7:
+		sph_enc32le_aligned(u.tmp_out + 36, B9);
+		/* fall through */
+	case 6:
+		sph_enc32le_aligned(u.tmp_out + 40, BA);
+		sph_enc32le_aligned(u.tmp_out + 44, BB);
+		sph_enc32le_aligned(u.tmp_out + 48, BC);
+		sph_enc32le_aligned(u.tmp_out + 52, BD);
+		sph_enc32le_aligned(u.tmp_out + 56, BE);
+		sph_enc32le_aligned(u.tmp_out + 60, BF);
+		break;
+	default:
+		return;
+	}
+	out_len = size_words << 2;
+	memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
+	shabal_init(sc, size_words << 5);
+}
+
+#if 0
+/* see sph_shabal.h */
+void
+sph_shabal192_init(void *cc)
+{
+	shabal_init(cc, 192);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 6);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 6);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_init(void *cc)
+{
+	shabal_init(cc, 224);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 7);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 7);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_init(void *cc)
+{
+	shabal_init(cc, 256);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 8);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 8);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_init(void *cc)
+{
+	shabal_init(cc, 384);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 12);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 12);
+}
+
+#endif
+
+/* see sph_shabal.h */
+void
+sph_shabal512_init(void *cc)
+{
+	shabal_init(cc, 512);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 16);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 16);
+}
+#ifdef __cplusplus
+}
+#endif
--- a/sph/sph_shabal.h
+++ b/sph/sph_shabal.h
@ -0,0 +1,344 @@
+/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
+/**
+ * Shabal interface. Shabal is a family of functions which differ by
+ * their output size; this implementation defines Shabal for output
+ * sizes 192, 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_shabal.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_SHABAL_H__
+#define SPH_SHABAL_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for Shabal-192.
+ */
+#define SPH_SIZE_shabal192   192
+
+/**
+ * Output size (in bits) for Shabal-224.
+ */
+#define SPH_SIZE_shabal224   224
+
+/**
+ * Output size (in bits) for Shabal-256.
+ */
+#define SPH_SIZE_shabal256   256
+
+/**
+ * Output size (in bits) for Shabal-384.
+ */
+#define SPH_SIZE_shabal384   384
+
+/**
+ * Output size (in bits) for Shabal-512.
+ */
+#define SPH_SIZE_shabal512   512
+
+/**
+ * This structure is a context for Shabal computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a Shabal computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running Shabal computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 A[12], B[16], C[16];
+	sph_u32 Whigh, Wlow;
+#endif
+} sph_shabal_context;
+
+/**
+ * Type for a Shabal-192 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal192_context;
+
+/**
+ * Type for a Shabal-224 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal224_context;
+
+/**
+ * Type for a Shabal-256 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal256_context;
+
+/**
+ * Type for a Shabal-384 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal384_context;
+
+/**
+ * Type for a Shabal-512 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal512_context;
+
+/**
+ * Initialize a Shabal-192 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-192 context (pointer to a
+ *             <code>sph_shabal192_context</code>)
+ */
+void sph_shabal192_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-192 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal192(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-192 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-192 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal192_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (24 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-192 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal192_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-224 context (pointer to a
+ *             <code>sph_shabal224_context</code>)
+ */
+void sph_shabal224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-224 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-256 context (pointer to a
+ *             <code>sph_shabal256_context</code>)
+ */
+void sph_shabal256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-256 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-384 context (pointer to a
+ *             <code>sph_shabal384_context</code>)
+ */
+void sph_shabal384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-384 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-512 context (pointer to a
+ *             <code>sph_shabal512_context</code>)
+ */
+void sph_shabal512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-512 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/sph/sph_whirlpool.h
+++ b/sph/sph_whirlpool.h
@ -0,0 +1,216 @@
+/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * WHIRLPOOL interface.
+ *
+ * WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
+ * version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
+ * (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
+ * version, 2003, with a new diffusion matrix, also described as "plain
+ * WHIRLPOOL"). All three variants are implemented here.
+ *
+ * The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
+ * M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
+ * NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
+ *
+ * The current WHIRLPOOL specification and a reference implementation
+ * can be found on the WHIRLPOOL web page:
+ * http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_whirlpool.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_WHIRLPOOL_H__
+#define SPH_WHIRLPOOL_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for WHIRLPOOL.
+ */
+#define SPH_SIZE_whirlpool   512
+
+/**
+ * Output size (in bits) for WHIRLPOOL-0.
+ */
+#define SPH_SIZE_whirlpool0   512
+
+/**
+ * Output size (in bits) for WHIRLPOOL-1.
+ */
+#define SPH_SIZE_whirlpool1   512
+
+/**
+ * This structure is a context for WHIRLPOOL computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a WHIRLPOOL computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running WHIRLPOOL computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u64 state[8];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_whirlpool_context;
+
+/**
+ * Initialize a WHIRLPOOL context. This process performs no memory allocation.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool_context</code>)
+ */
+void sph_whirlpool_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * plain WHIRLPOOL algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool_close(void *cc, void *dst);
+
+/**
+ * WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
+ */
+typedef sph_whirlpool_context sph_whirlpool0_context;
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Initialize a WHIRLPOOL-0 context. This function is identical to
+ * <code>sph_whirlpool_init()</code>.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool0_context</code>)
+ */
+void sph_whirlpool0_init(void *cc);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_whirlpool0_init   sph_whirlpool_init
+#endif
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * WHIRLPOOL-0 algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool0(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL-0 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL-0 context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool0_close(void *cc, void *dst);
+
+/**
+ * WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
+ */
+typedef sph_whirlpool_context sph_whirlpool1_context;
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Initialize a WHIRLPOOL-1 context. This function is identical to
+ * <code>sph_whirlpool_init()</code>.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool1_context</code>)
+ */
+void sph_whirlpool1_init(void *cc);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_whirlpool1_init   sph_whirlpool_init
+#endif
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * WHIRLPOOL-1 algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool1(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL-1 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL-1 context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool1_close(void *cc, void *dst);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/sph/whirlpool.c
+++ b/sph/whirlpool.c
--- a/sph/x15_helper.c
+++ b/sph/x15_helper.c
@ -0,0 +1,346 @@
+/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * This file contains some functions which implement the external data
+ * handling and padding for Merkle-Damgard hash functions which follow
+ * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
+ *
+ * API: this file is meant to be included, not compiled as a stand-alone
+ * file. Some macros must be defined:
+ *   RFUN   name for the round function
+ *   HASH   "short name" for the hash function
+ *   BE32   defined for big-endian, 32-bit based (e.g. SHA-1)
+ *   LE32   defined for little-endian, 32-bit based (e.g. MD5)
+ *   BE64   defined for big-endian, 64-bit based (e.g. SHA-512)
+ *   LE64   defined for little-endian, 64-bit based (no example yet)
+ *   PW01   if defined, append 0x01 instead of 0x80 (for Tiger)
+ *   BLEN   if defined, length of a message block (in bytes)
+ *   PLW1   if defined, length is defined on one 64-bit word only (for Tiger)
+ *   PLW4   if defined, length is defined on four 64-bit words (for WHIRLPOOL)
+ *   SVAL   if defined, reference to the context state information
+ *
+ * BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
+ * this is used for instance for Tiger, which works on 64-bit words but
+ * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
+ * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
+ * set, then only one word (64 bits) will be used to encode the input
+ * message length (in bits), otherwise two words will be used (as in
+ * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
+ * not PLW1), four 64-bit words will be used to encode the message length
+ * (in bits). Note that regardless of those settings, only 64-bit message
+ * lengths are supported (in bits): messages longer than 2 Exabytes will be
+ * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
+ * 2 millions Terabytes, which is huge).
+ *
+ * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
+ * function. This is used for Tiger2, which is identical to Tiger except
+ * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
+ * of the 0x01 from original Tiger).
+ *
+ * The RFUN function is invoked with two arguments, the first pointing to
+ * aligned data (as a "const void *"), the second being state information
+ * from the context structure. By default, this state information is the
+ * "val" field from the context, and this field is assumed to be an array
+ * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
+ * from the context structure. The "val" field can have any type, except
+ * for the output encoding which assumes that it is an array of "sph_u32"
+ * values. By defining NO_OUTPUT, this last step is deactivated; the
+ * includer code is then responsible for writing out the hash result. When
+ * NO_OUTPUT is defined, the third parameter to the "close()" function is
+ * ignored.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#undef SPH_XCAT
+#define SPH_XCAT(a, b)     SPH_XCAT_(a, b)
+#undef SPH_XCAT_
+#define SPH_XCAT_(a, b)    a ## b
+
+#undef SPH_BLEN
+#undef SPH_WLEN
+#if defined BE64 || defined LE64
+#define SPH_BLEN    128U
+#define SPH_WLEN      8U
+#else
+#define SPH_BLEN     64U
+#define SPH_WLEN      4U
+#endif
+
+#ifdef BLEN
+#undef SPH_BLEN
+#define SPH_BLEN    BLEN
+#endif
+
+#undef SPH_MAXPAD
+#if defined PLW1
+#define SPH_MAXPAD   (SPH_BLEN - SPH_WLEN)
+#elif defined PLW4
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 2))
+#else
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 1))
+#endif
+
+#undef SPH_VAL
+#undef SPH_NO_OUTPUT
+#ifdef SVAL
+#define SPH_VAL         SVAL
+#define SPH_NO_OUTPUT   1
+#else
+#define SPH_VAL   sc->val
+#endif
+
+#ifndef CLOSE_ONLY
+
+#ifdef SPH_UPTR
+static void
+SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
+#else
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+#endif
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current;
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	while (len > 0) {
+		unsigned clen;
+#if !SPH_64
+		sph_u32 clow, clow2;
+#endif
+
+		clen = SPH_BLEN - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->buf + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		current += clen;
+		len -= clen;
+		if (current == SPH_BLEN) {
+			RFUN(sc->buf, SPH_VAL);
+			current = 0;
+		}
+#if SPH_64
+		sc->count += clen;
+#else
+		clow = sc->count_low;
+		clow2 = SPH_T32(clow + clen);
+		sc->count_low = clow2;
+		if (clow2 < clow)
+			sc->count_high ++;
+#endif
+	}
+}
+
+#ifdef SPH_UPTR
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current;
+	size_t orig_len;
+#if !SPH_64
+	sph_u32 clow, clow2;
+#endif
+
+	if (len < (2 * SPH_BLEN)) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	if (current > 0) {
+		unsigned t;
+
+		t = SPH_BLEN - current;
+		SPH_XCAT(HASH, _short)(cc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+#endif
+	orig_len = len;
+	while (len >= SPH_BLEN) {
+		RFUN(data, SPH_VAL);
+		len -= SPH_BLEN;
+		data = (const unsigned char *)data + SPH_BLEN;
+	}
+	if (len > 0)
+		memcpy(sc->buf, data, len);
+#if SPH_64
+	sc->count += (sph_u64)orig_len;
+#else
+	clow = sc->count_low;
+	clow2 = SPH_T32(clow + orig_len);
+	sc->count_low = clow2;
+	if (clow2 < clow)
+		sc->count_high ++;
+	/*
+	 * This code handles the improbable situation where "size_t" is
+	 * greater than 32 bits, and yet we do not have a 64-bit type.
+	 */
+	orig_len >>= 12;
+	orig_len >>= 10;
+	orig_len >>= 10;
+	sc->count_high += orig_len;
+#endif
+}
+#endif
+
+#endif
+
+/*
+ * Perform padding and produce result. The context is NOT reinitialized
+ * by this function.
+ */
+static void
+SPH_XCAT(HASH, _addbits_and_close)(void *cc,
+	unsigned ub, unsigned n, void *dst, unsigned rnum)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current, u;
+#if !SPH_64
+	sph_u32 low, high;
+#endif
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+#ifdef PW01
+	sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
+#else
+	{
+		unsigned z;
+
+		z = 0x80 >> n;
+		sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
+	}
+#endif
+	if (current > SPH_MAXPAD) {
+		memset(sc->buf + current, 0, SPH_BLEN - current);
+		RFUN(sc->buf, SPH_VAL);
+		memset(sc->buf, 0, SPH_MAXPAD);
+	} else {
+		memset(sc->buf + current, 0, SPH_MAXPAD - current);
+	}
+#if defined BE64
+#if defined PLW1
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW4
+	memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
+		sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#elif defined LE64
+#if defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+	memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+#endif
+#else
+#if SPH_64
+#ifdef BE32
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#else
+	low = sc->count_low;
+	high = SPH_T32((sc->count_high << 3) | (low >> 29));
+	low = SPH_T32(low << 3) + (sph_u32)n;
+#ifdef BE32
+	sph_enc32be(sc->buf + SPH_MAXPAD, high);
+	sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
+#else
+	sph_enc32le(sc->buf + SPH_MAXPAD, low);
+	sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
+#endif
+#endif
+#endif
+	RFUN(sc->buf, SPH_VAL);
+#ifdef SPH_NO_OUTPUT
+	(void)dst;
+	(void)rnum;
+	(void)u;
+#else
+	for (u = 0; u < rnum; u ++) {
+#if defined BE64
+		sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined LE64
+		sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined BE32
+		sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
+#else
+		sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
+#endif
+	}
+#endif
+}
+
+static void
+SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
+{
+	SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
+}
--- a/util.c
+++ b/util.c
@ -1318,3 +1318,33 @@ out:
 	pthread_mutex_unlock(&tq->mutex);
 	return rval;
 }
+
+static void print_hash(unsigned char *hash)
+{
+	for (int i=0; i < 32; i++) {
+		printf("%02x", hash[i]);
+	}
+}
+
+void print_hash_tests(void)
+{
+	unsigned char buf[128], hash[128];
+	memset(buf, 0, sizeof buf);
+	printf("CPU HASH ON EMPTY BUFFER RESULTS:\n");
+
+	memset(hash, 0, sizeof hash);
+	x11hash(&hash[0], &buf[0]);
+	printf("\nX11: "); print_hash(hash);
+
+	memset(hash, 0, sizeof hash);
+	x13hash(&hash[0], &buf[0]);
+	printf("\nX13: "); print_hash(hash);
+
+	memset(hash, 0, sizeof hash);
+	x14hash(&hash[0], &buf[0]);
+	printf("\nX14: "); print_hash(hash);
+
+	memset(hash, 0, sizeof hash);
+	x15hash(&hash[0], &buf[0]);
+	printf("\nX15: "); print_hash(hash);
+}
--- a/x11/cuda_x11_cubehash512.cu
+++ b/x11/cuda_x11_cubehash512.cu
@ -1,3 +1,5 @@
+#include <cuda_runtime.h>
+
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

@ -8,10 +10,12 @@ typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;

-static __device__ uint32_t cuda_swab32(uint32_t x)
+#if 0
+__device__ static uint32_t cuda_swab32(uint32_t x)
 {
 	return __byte_perm(x, 0, 0x0123);
 }
+#endif

 typedef unsigned char BitSequence;
 typedef unsigned long long DataLength;
--- a/x11/cuda_x11_echo.cu
+++ b/x11/cuda_x11_echo.cu
@ -1,16 +1,13 @@
 #include <cuda.h>
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
 #include "device_launch_parameters.h"

 #include <stdio.h>
+#include <stdint.h>
 #include <memory.h>

-// Folgende Definitionen später durch header ersetzen
-typedef unsigned char uint8_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-
 // das Hi Word aus einem 64 Bit Typen extrahieren
+#if 0
 static __device__ uint32_t HIWORD(const uint64_t &x) {
 #if __CUDA_ARCH__ >= 130
 	return (uint32_t)__double2hiint(__longlong_as_double(x));
@ -27,6 +24,7 @@ static __device__ uint32_t LOWORD(const uint64_t &x) {
 	return (uint32_t)(x & 0xFFFFFFFFULL);
 #endif
 }
+#endif

 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
--- a/x11/cuda_x11_luffa512.cu
+++ b/x11/cuda_x11_luffa512.cu
@ -18,6 +18,8 @@
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

+#include <cuda_runtime.h>
+
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

--- a/x11/cuda_x11_shavite512.cu
+++ b/x11/cuda_x11_shavite512.cu
@ -1,14 +1,12 @@
+#include <stdint.h>
+#include <cuda_runtime.h>
+
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

 typedef unsigned char BitSequence;
 typedef unsigned long long DataLength;

-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
--- a/x11/x11.cu
+++ b/x11/x11.cu
@ -18,6 +18,7 @@ extern "C"
 }

 #include <stdint.h>
+#include <cuda_runtime.h>

 // aus cpu-miner.c
 extern int device_map[8];
@ -71,86 +72,70 @@ extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t star
 											int order);

 // X11 Hashfunktion
-inline void x11hash(void *state, const void *input)
+extern "C" void x11hash(void *output, const void *input)
 {
-    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11
+	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11

-    sph_blake512_context ctx_blake;
-    sph_bmw512_context ctx_bmw;
-    sph_groestl512_context ctx_groestl;
-    sph_jh512_context ctx_jh;
-    sph_keccak512_context ctx_keccak;
-    sph_skein512_context ctx_skein;
-    sph_luffa512_context ctx_luffa;
-    sph_cubehash512_context ctx_cubehash;
-    sph_shavite512_context ctx_shavite;
-    sph_simd512_context ctx_simd;
-    sph_echo512_context ctx_echo;
+	sph_blake512_context ctx_blake;
+	sph_bmw512_context ctx_bmw;
+	sph_groestl512_context ctx_groestl;
+	sph_jh512_context ctx_jh;
+	sph_keccak512_context ctx_keccak;
+	sph_skein512_context ctx_skein;
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;

-    unsigned char hash[64];
+	unsigned char hash[128];
+	memset(hash, 0, sizeof hash);

-    sph_blake512_init(&ctx_blake);
-    // ZBLAKE;
-    sph_blake512 (&ctx_blake, input, 80);
-    sph_blake512_close(&ctx_blake, (void*) hash);
-    
-    sph_bmw512_init(&ctx_bmw);
-    // ZBMW;
-    sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
-    sph_bmw512_close(&ctx_bmw, (void*) hash);
+	sph_blake512_init(&ctx_blake);
+	sph_blake512 (&ctx_blake, input, 80);
+	sph_blake512_close(&ctx_blake, (void*) hash);

-    sph_groestl512_init(&ctx_groestl);
-    // ZGROESTL;
-    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
-    sph_groestl512_close(&ctx_groestl, (void*) hash);
+	sph_bmw512_init(&ctx_bmw);
+	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+	sph_bmw512_close(&ctx_bmw, (void*) hash);

-    sph_skein512_init(&ctx_skein);
-    // ZSKEIN;
-    sph_skein512 (&ctx_skein, (const void*) hash, 64);
-    sph_skein512_close(&ctx_skein, (void*) hash);
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+	sph_groestl512_close(&ctx_groestl, (void*) hash);

-    sph_jh512_init(&ctx_jh);
-    // ZJH;
-    sph_jh512 (&ctx_jh, (const void*) hash, 64);
-    sph_jh512_close(&ctx_jh, (void*) hash);
+	sph_skein512_init(&ctx_skein);
+	sph_skein512 (&ctx_skein, (const void*) hash, 64);
+	sph_skein512_close(&ctx_skein, (void*) hash);

-    sph_keccak512_init(&ctx_keccak);
-    // ZKECCAK;
-    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
-    sph_keccak512_close(&ctx_keccak, (void*) hash);
+	sph_jh512_init(&ctx_jh);
+	sph_jh512 (&ctx_jh, (const void*) hash, 64);
+	sph_jh512_close(&ctx_jh, (void*) hash);

-    sph_luffa512_init(&ctx_luffa);
-    // ZLUFFA;
-    sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
-    sph_luffa512_close (&ctx_luffa, (void*) hash);
+	sph_keccak512_init(&ctx_keccak);
+	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+	sph_keccak512_close(&ctx_keccak, (void*) hash);

-#if 1
-    sph_cubehash512_init(&ctx_cubehash);
-    // ZCUBEHASH;
-    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
-    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
-#endif
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+	sph_luffa512_close (&ctx_luffa, (void*) hash);

-#if 1
-    sph_shavite512_init(&ctx_shavite);
-    // ZSHAVITE;
-    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
-    sph_shavite512_close(&ctx_shavite, (void*) hash);
-#endif
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);

-    sph_simd512_init(&ctx_simd);
-    // ZSIMD
-    sph_simd512 (&ctx_simd, (const void*) hash, 64);
-    sph_simd512_close(&ctx_simd, (void*) hash);
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);

-#if 1
-    sph_echo512_init(&ctx_echo);
-    // ZECHO
-    sph_echo512 (&ctx_echo, (const void*) hash, 64);
-    sph_echo512_close(&ctx_echo, (void*) hash); 
-#endif
+	sph_simd512_init(&ctx_simd);
+	sph_simd512 (&ctx_simd, (const void*) hash, 64);
+	sph_simd512_close(&ctx_simd, (void*) hash);

-    memcpy(state, hash, 32);
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(output, hash, 32);
 }


@ -242,13 +227,16 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
 			be32enc(&endiandata[19], foundNonce);
 			x11hash(vhash64, endiandata);

-			if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
-
+			if ((vhash64[7] <= Htarg) && fulltest(vhash64, ptarget)) {
 				pdata[19] = foundNonce;
 				*hashes_done = foundNonce - first_nonce + 1;
 				return 1;
-			} else {
-				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+			else if (vhash64[7] > Htarg) {
+				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
+			}
+			else {
+				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}

--- a/x13/cuda_x13_fugue512.cu
+++ b/x13/cuda_x13_fugue512.cu
@ -5,6 +5,25 @@
 * heavily based on phm's sgminer
 *
 */
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "device_launch_parameters.h"
+
+#include <stdint.h>
+
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#if __CUDA_ARCH__ < 350
+// Kepler (Compute 3.0)
+#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#else
+// Kepler (Compute 3.5, 5.0)
+#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
+#endif

 /*
 * X13 kernel implementation.
@ -37,25 +56,8 @@
 * @author   phm <phm@inbox.com>
 */

-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
-#include <stdint.h>
-
-#define SPH_C64(x)    ((uint64_t)(x ## ULL))
-#define SPH_C32(x)    ((uint32_t)(x ## U))
-#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
-
 #define SWAB32(x) ( __byte_perm(x, x, 0x0123) )

-#if __CUDA_ARCH__ < 350 
-    // Kepler (Compute 3.0)
-    #define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
-#else
-    // Kepler (Compute 3.5)
-    #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
-#endif
-
 #define mixtab0(x) (*((uint32_t*)mixtabs + (    (x))))
 #define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
 #define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
--- a/x13/x13.cu
+++ b/x13/x13.cu
@ -1,8 +1,6 @@
 /*
- * X13 algorithm built on cbuchner1's original X11
- * 
+ * X13 algorithm
 */
-
 extern "C"
 {
 #include "sph/sph_blake.h"
@ -24,6 +22,9 @@ extern "C"
 #include "miner.h"
 }

+#include <stdint.h>
+#include <cuda_runtime.h>
+
 // aus cpu-miner.c
 extern int device_map[8];

@ -82,90 +83,80 @@ extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t star
 											int order);

 // X13 Hashfunktion
-inline void x13hash(void *state, const void *input)
+extern "C" void x13hash(void *output, const void *input)
 {
-    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13
+	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13

-    sph_blake512_context ctx_blake;
-    sph_bmw512_context ctx_bmw;
-    sph_groestl512_context ctx_groestl;
-    sph_jh512_context ctx_jh;
-    sph_keccak512_context ctx_keccak;
-    sph_skein512_context ctx_skein;
-    sph_luffa512_context ctx_luffa;
-    sph_cubehash512_context ctx_cubehash;
-    sph_shavite512_context ctx_shavite;
-    sph_simd512_context ctx_simd;
-    sph_echo512_context ctx_echo;
-    sph_hamsi512_context ctx_hamsi;
-    sph_fugue512_context ctx_fugue;
+	sph_blake512_context ctx_blake;
+	sph_bmw512_context ctx_bmw;
+	sph_groestl512_context ctx_groestl;
+	sph_jh512_context ctx_jh;
+	sph_keccak512_context ctx_keccak;
+	sph_skein512_context ctx_skein;
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;
+	sph_hamsi512_context ctx_hamsi;
+	sph_fugue512_context ctx_fugue;

-    uint32_t hash[16];
+	uint32_t hash[32];
+	memset(hash, 0, sizeof hash);

-    sph_blake512_init(&ctx_blake);
-    // ZBLAKE;
-    sph_blake512 (&ctx_blake, input, 80);
-    sph_blake512_close(&ctx_blake, (void*) hash);
+	sph_blake512_init(&ctx_blake);
+	sph_blake512 (&ctx_blake, input, 80);
+	sph_blake512_close(&ctx_blake, (void*) hash);

-    sph_bmw512_init(&ctx_bmw);
-    // ZBMW;
-    sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
-    sph_bmw512_close(&ctx_bmw, (void*) hash);
+	sph_bmw512_init(&ctx_bmw);
+	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+	sph_bmw512_close(&ctx_bmw, (void*) hash);

-    sph_groestl512_init(&ctx_groestl);
-    // ZGROESTL;
-    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
-    sph_groestl512_close(&ctx_groestl, (void*) hash);
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+	sph_groestl512_close(&ctx_groestl, (void*) hash);

-    sph_skein512_init(&ctx_skein);
-    // ZSKEIN;
-    sph_skein512 (&ctx_skein, (const void*) hash, 64);
-    sph_skein512_close(&ctx_skein, (void*) hash);
+	sph_skein512_init(&ctx_skein);
+	sph_skein512 (&ctx_skein, (const void*) hash, 64);
+	sph_skein512_close(&ctx_skein, (void*) hash);

-    sph_jh512_init(&ctx_jh);
-    // ZJH;
-    sph_jh512 (&ctx_jh, (const void*) hash, 64);
-    sph_jh512_close(&ctx_jh, (void*) hash);
+	sph_jh512_init(&ctx_jh);
+	sph_jh512 (&ctx_jh, (const void*) hash, 64);
+	sph_jh512_close(&ctx_jh, (void*) hash);

-    sph_keccak512_init(&ctx_keccak);
-    // ZKECCAK;
-    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
-    sph_keccak512_close(&ctx_keccak, (void*) hash);
+	sph_keccak512_init(&ctx_keccak);
+	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+	sph_keccak512_close(&ctx_keccak, (void*) hash);

-    sph_luffa512_init(&ctx_luffa);
-    // ZLUFFA;
-    sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
-    sph_luffa512_close (&ctx_luffa, (void*) hash);
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+	sph_luffa512_close (&ctx_luffa, (void*) hash);

-    sph_cubehash512_init(&ctx_cubehash);
-    // ZCUBEHASH;
-    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
-    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);

-    sph_shavite512_init(&ctx_shavite);
-    // ZSHAVITE;
-    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
-    sph_shavite512_close(&ctx_shavite, (void*) hash);
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);

-    sph_simd512_init(&ctx_simd);
-    // ZSIMD
-    sph_simd512 (&ctx_simd, (const void*) hash, 64);
-    sph_simd512_close(&ctx_simd, (void*) hash);
+	sph_simd512_init(&ctx_simd);
+	sph_simd512 (&ctx_simd, (const void*) hash, 64);
+	sph_simd512_close(&ctx_simd, (void*) hash);

-    sph_echo512_init(&ctx_echo);
-    // ZECHO
-    sph_echo512 (&ctx_echo, (const void*) hash, 64);
-    sph_echo512_close(&ctx_echo, (void*) hash); 
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);

-    sph_hamsi512_init(&ctx_hamsi);
-    sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
-    sph_hamsi512_close(&ctx_hamsi, (void*) hash); 
+	sph_hamsi512_init(&ctx_hamsi);
+	sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
+	sph_hamsi512_close(&ctx_hamsi, (void*) hash);

-    sph_fugue512_init(&ctx_fugue);
-    sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
-    sph_fugue512_close(&ctx_fugue, (void*) hash); 
+	sph_fugue512_init(&ctx_fugue);
+	sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
+	sph_fugue512_close(&ctx_fugue, (void*) hash);

-    memcpy(state, hash, 32);
+	memcpy(output, hash, 32);
 }


@ -266,12 +257,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
 			x13hash(vhash64, endiandata);

 			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
-                
-                pdata[19] = foundNonce;
-                *hashes_done = foundNonce - first_nonce + 1;
-                return 1;
-			} else {
-				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			}
+			else if (vhash64[7] > Htarg) {
+				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
+			}
+			else {
+				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
 			}
 		}

--- a/x15/cuda_x14_shabal512.cu
+++ b/x15/cuda_x14_shabal512.cu
@ -0,0 +1,492 @@
+/*
+ * Shabal-512 for X14/X15 (STUB)
+ */
+#include <stdint.h>
+#include <cuda_runtime.h>
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
+
+#if __CUDA_ARCH__ < 350
+	// Kepler (Compute 3.0)
+	#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#else
+	// Kepler (Compute 3.5)
+	#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
+#endif
+
+/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
+/*
+ * Shabal implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010 Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+/*
+ * Part of this code was automatically generated (the part between
+ * the "BEGIN" and "END" markers).
+ */
+
+#define sM    16
+
+#define C32   SPH_C32
+#define T32   SPH_T32
+
+#define O1   13
+#define O2    9
+#define O3    6
+
+/*
+ * We copy the state into local variables, so that the compiler knows
+ * that it can optimize them at will.
+ */
+
+/* BEGIN -- automatically generated code. */
+
+#define INPUT_BLOCK_ADD   do { \
+		B0 = T32(B0 + M0); \
+		B1 = T32(B1 + M1); \
+		B2 = T32(B2 + M2); \
+		B3 = T32(B3 + M3); \
+		B4 = T32(B4 + M4); \
+		B5 = T32(B5 + M5); \
+		B6 = T32(B6 + M6); \
+		B7 = T32(B7 + M7); \
+		B8 = T32(B8 + M8); \
+		B9 = T32(B9 + M9); \
+		BA = T32(BA + MA); \
+		BB = T32(BB + MB); \
+		BC = T32(BC + MC); \
+		BD = T32(BD + MD); \
+		BE = T32(BE + ME); \
+		BF = T32(BF + MF); \
+	} while (0)
+
+#define INPUT_BLOCK_SUB   do { \
+		C0 = T32(C0 - M0); \
+		C1 = T32(C1 - M1); \
+		C2 = T32(C2 - M2); \
+		C3 = T32(C3 - M3); \
+		C4 = T32(C4 - M4); \
+		C5 = T32(C5 - M5); \
+		C6 = T32(C6 - M6); \
+		C7 = T32(C7 - M7); \
+		C8 = T32(C8 - M8); \
+		C9 = T32(C9 - M9); \
+		CA = T32(CA - MA); \
+		CB = T32(CB - MB); \
+		CC = T32(CC - MC); \
+		CD = T32(CD - MD); \
+		CE = T32(CE - ME); \
+		CF = T32(CF - MF); \
+	} while (0)
+
+#define XOR_W   do { \
+		A00 ^= Wlow; \
+		A01 ^= Whigh; \
+	} while (0)
+
+#define SWAP(v1, v2)   do { \
+		uint32_t tmp = (v1); \
+		(v1) = (v2); \
+		(v2) = tmp; \
+	} while (0)
+
+#define SWAP_BC   do { \
+		SWAP(B0, C0); \
+		SWAP(B1, C1); \
+		SWAP(B2, C2); \
+		SWAP(B3, C3); \
+		SWAP(B4, C4); \
+		SWAP(B5, C5); \
+		SWAP(B6, C6); \
+		SWAP(B7, C7); \
+		SWAP(B8, C8); \
+		SWAP(B9, C9); \
+		SWAP(BA, CA); \
+		SWAP(BB, CB); \
+		SWAP(BC, CC); \
+		SWAP(BD, CD); \
+		SWAP(BE, CE); \
+		SWAP(BF, CF); \
+	} while (0)
+
+#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
+		xa0 = T32((xa0 \
+			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
+			^ xc) * 3U) \
+			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
+		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
+	} while (0)
+
+#define PERM_STEP_0   do { \
+		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_1   do { \
+		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_2   do { \
+		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define APPLY_P   do { \
+		B0 = T32(B0 << 17) | (B0 >> 15); \
+		B1 = T32(B1 << 17) | (B1 >> 15); \
+		B2 = T32(B2 << 17) | (B2 >> 15); \
+		B3 = T32(B3 << 17) | (B3 >> 15); \
+		B4 = T32(B4 << 17) | (B4 >> 15); \
+		B5 = T32(B5 << 17) | (B5 >> 15); \
+		B6 = T32(B6 << 17) | (B6 >> 15); \
+		B7 = T32(B7 << 17) | (B7 >> 15); \
+		B8 = T32(B8 << 17) | (B8 >> 15); \
+		B9 = T32(B9 << 17) | (B9 >> 15); \
+		BA = T32(BA << 17) | (BA >> 15); \
+		BB = T32(BB << 17) | (BB >> 15); \
+		BC = T32(BC << 17) | (BC >> 15); \
+		BD = T32(BD << 17) | (BD >> 15); \
+		BE = T32(BE << 17) | (BE >> 15); \
+		BF = T32(BF << 17) | (BF >> 15); \
+		PERM_STEP_0; \
+		PERM_STEP_1; \
+		PERM_STEP_2; \
+		A0B = T32(A0B + C6); \
+		A0A = T32(A0A + C5); \
+		A09 = T32(A09 + C4); \
+		A08 = T32(A08 + C3); \
+		A07 = T32(A07 + C2); \
+		A06 = T32(A06 + C1); \
+		A05 = T32(A05 + C0); \
+		A04 = T32(A04 + CF); \
+		A03 = T32(A03 + CE); \
+		A02 = T32(A02 + CD); \
+		A01 = T32(A01 + CC); \
+		A00 = T32(A00 + CB); \
+		A0B = T32(A0B + CA); \
+		A0A = T32(A0A + C9); \
+		A09 = T32(A09 + C8); \
+		A08 = T32(A08 + C7); \
+		A07 = T32(A07 + C6); \
+		A06 = T32(A06 + C5); \
+		A05 = T32(A05 + C4); \
+		A04 = T32(A04 + C3); \
+		A03 = T32(A03 + C2); \
+		A02 = T32(A02 + C1); \
+		A01 = T32(A01 + C0); \
+		A00 = T32(A00 + CF); \
+		A0B = T32(A0B + CE); \
+		A0A = T32(A0A + CD); \
+		A09 = T32(A09 + CC); \
+		A08 = T32(A08 + CB); \
+		A07 = T32(A07 + CA); \
+		A06 = T32(A06 + C9); \
+		A05 = T32(A05 + C8); \
+		A04 = T32(A04 + C7); \
+		A03 = T32(A03 + C6); \
+		A02 = T32(A02 + C5); \
+		A01 = T32(A01 + C4); \
+		A00 = T32(A00 + C3); \
+	} while (0)
+
+#define INCR_W   do { \
+		if ((Wlow = T32(Wlow + 1)) == 0) \
+			Whigh = T32(Whigh + 1); \
+	} while (0)
+
+
+#if 0 /* other hash sizes init */
+
+static const uint32_t A_init_192[] = {
+	C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
+	C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
+	C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
+};
+
+static const uint32_t B_init_192[] = {
+	C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
+	C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
+	C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
+	C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
+};
+
+static const uint32_t C_init_192[] = {
+	C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
+	C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
+	C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
+	C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
+};
+
+static const uint32_t A_init_224[] = {
+	C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
+	C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
+	C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
+};
+
+static const uint32_t B_init_224[] = {
+	C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
+	C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
+	C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
+	C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
+};
+
+static const uint32_t C_init_224[] = {
+	C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
+	C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
+	C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
+	C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
+};
+
+static const uint32_t A_init_256[] = {
+	C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
+	C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
+	C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
+};
+
+static const uint32_t B_init_256[] = {
+	C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
+	C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
+	C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
+	C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
+};
+
+static const uint32_t C_init_256[] = {
+	C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
+	C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
+	C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
+	C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
+};
+
+static const uint32_t A_init_384[] = {
+	C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
+	C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
+	C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
+};
+
+static const uint32_t B_init_384[] = {
+	C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
+	C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
+	C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
+	C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
+};
+
+static const uint32_t C_init_384[] = {
+	C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
+	C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
+	C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
+	C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
+};
+#endif
+
+__device__
+static const uint32_t d_A512[] = {
+	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
+	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
+	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
+};
+
+__device__
+static const uint32_t d_B512[] = {
+	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
+	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
+	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
+	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
+};
+
+__device__
+static const uint32_t d_C512[] = {
+	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
+	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
+	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
+	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
+};
+
+/***************************************************/
+// GPU Hash Function
+__global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
+{
+	__syncthreads();
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+		int hashPosition = nounce - startNounce;
+		uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3]; // [hashPosition * 8]
+
+		uint32_t A00 = d_A512[0], A01 = d_A512[1], A02 = d_A512[2], A03 = d_A512[3],
+			A04 = d_A512[4], A05 = d_A512[5], A06 = d_A512[6], A07 = d_A512[7],
+			A08 = d_A512[8], A09 = d_A512[9], A0A = d_A512[10], A0B = d_A512[11];
+		uint32_t B0 = d_B512[0], B1 = d_B512[1], B2 = d_B512[2], B3 = d_B512[3],
+			B4 = d_B512[4], B5 = d_B512[5], B6 = d_B512[6], B7 = d_B512[7],
+			B8 = d_B512[8], B9 = d_B512[9], BA = d_B512[10], BB = d_B512[11],
+			BC = d_B512[12], BD = d_B512[13], BE = d_B512[14], BF = d_B512[15];
+		uint32_t C0 = d_C512[0], C1 = d_C512[1], C2 = d_C512[2], C3 = d_C512[3],
+			C4 = d_C512[4], C5 = d_C512[5], C6 = d_C512[6], C7 = d_C512[7],
+			C8 = d_C512[8], C9 = d_C512[9], CA = d_C512[10], CB = d_C512[11],
+			CC = d_C512[12], CD = d_C512[13], CE = d_C512[14], CF = d_C512[15];
+		uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
+		uint32_t Wlow = 1, Whigh = 0;
+
+		M0 = Hash[0];
+		M1 = Hash[1];
+		M2 = Hash[2];
+		M3 = Hash[3];
+		M4 = Hash[4];
+		M5 = Hash[5];
+		M6 = Hash[6];
+		M7 = Hash[7];
+
+		M8 = Hash[8];
+		M9 = Hash[9];
+		MA = Hash[10];
+		MB = Hash[11];
+		MC = Hash[12];
+		MD = Hash[13];
+		ME = Hash[14];
+		MF = Hash[15];
+
+		INPUT_BLOCK_ADD;
+		XOR_W;
+		APPLY_P;
+		INPUT_BLOCK_SUB;
+		SWAP_BC;
+		INCR_W;
+
+		M0 = 0x80;
+		M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0;
+
+		INPUT_BLOCK_ADD;
+		XOR_W;
+		APPLY_P;
+
+		for (uint8_t i = 0; i < 3; i ++)
+		{
+			SWAP_BC;
+			XOR_W;
+			APPLY_P;
+		}
+
+		Hash[0] = B0;
+		Hash[1] = B1;
+		Hash[2] = B2;
+		Hash[3] = B3;
+		Hash[4] = B4;
+		Hash[5] = B5;
+		Hash[6] = B6;
+		Hash[7] = B7;
+
+		Hash[8] = B8;
+		Hash[9] = B9;
+		Hash[10] = BA;
+		Hash[11] = BB;
+		Hash[12] = BC;
+		Hash[13] = BD;
+		Hash[14] = BE;
+		Hash[15] = BF;
+
+		//result = (Hash[3] <= target);
+
+		uint32_t *outpHash = (uint32_t*)&g_hash[hashPosition << 3]; // [8 * hashPosition];
+
+		for (int i = 0; i < 16; i++)
+			outpHash[i] = Hash[i];
+	}
+}
+
+__host__ void x14_shabal512_cpu_init(int thr_id, int threads)
+{
+}
+#include <stdio.h>
+__host__ void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+	const int threadsperblock = 192;
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
+
+	x14_shabal512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+	MyStreamSynchronize(NULL, order, thr_id);
+}
--- a/x15/cuda_x15_whirlpool.cu
+++ b/x15/cuda_x15_whirlpool.cu
--- a/x15/x14.cu
+++ b/x15/x14.cu
@ -0,0 +1,259 @@
+/*
+ * X14 algorithm
+ * Added in ccminer by Tanguy Pruvot - 2014
+ */
+
+extern "C" {
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+#include "sph/sph_shabal.h"
+
+#include "miner.h"
+}
+
+#include <stdint.h>
+#include <cuda_runtime.h>
+
+// from cpu-miner.c
+extern int device_map[8];
+
+// Memory for the hash functions
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x14_shabal512_cpu_init(int thr_id, int threads);
+extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
+
+// X14 CPU Hash function
+extern "C" void x14hash(void *output, const void *input)
+{
+	unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
+	#define hashB hash+64
+
+	memset(hash, 0, sizeof hash);
+
+	sph_blake512_context ctx_blake;
+	sph_bmw512_context ctx_bmw;
+	sph_groestl512_context ctx_groestl;
+	sph_jh512_context ctx_jh;
+	sph_keccak512_context ctx_keccak;
+	sph_skein512_context ctx_skein;
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;
+	sph_hamsi512_context ctx_hamsi;
+	sph_fugue512_context ctx_fugue;
+	sph_shabal512_context ctx_shabal;
+
+	sph_blake512_init(&ctx_blake);
+	sph_blake512(&ctx_blake, input, 80);
+	sph_blake512_close(&ctx_blake, hash);
+
+	sph_bmw512_init(&ctx_bmw);
+	sph_bmw512(&ctx_bmw, hash, 64);
+	sph_bmw512_close(&ctx_bmw, hashB);
+
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512(&ctx_groestl, hashB, 64);
+	sph_groestl512_close(&ctx_groestl, hash);
+
+	sph_skein512_init(&ctx_skein);
+	sph_skein512(&ctx_skein, hash, 64);
+	sph_skein512_close(&ctx_skein, hashB);
+
+	sph_jh512_init(&ctx_jh);
+	sph_jh512(&ctx_jh, hashB, 64);
+	sph_jh512_close(&ctx_jh, hash);
+
+	sph_keccak512_init(&ctx_keccak);
+	sph_keccak512(&ctx_keccak, hash, 64);
+	sph_keccak512_close(&ctx_keccak, hashB);
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512(&ctx_luffa, hashB, 64);
+	sph_luffa512_close(&ctx_luffa, hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512(&ctx_cubehash, hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, hashB);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512(&ctx_shavite, hashB, 64);
+	sph_shavite512_close(&ctx_shavite, hash);
+
+	sph_simd512_init(&ctx_simd);
+	sph_simd512(&ctx_simd, hash, 64);
+	sph_simd512_close(&ctx_simd, hashB);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512(&ctx_echo, hashB, 64);
+	sph_echo512_close(&ctx_echo, hash);
+
+	sph_hamsi512_init(&ctx_hamsi);
+	sph_hamsi512(&ctx_hamsi, hash, 64);
+	sph_hamsi512_close(&ctx_hamsi, hashB);
+
+	sph_fugue512_init(&ctx_fugue);
+	sph_fugue512(&ctx_fugue, hashB, 64);
+	sph_fugue512_close(&ctx_fugue, hash);
+
+	sph_shabal512_init(&ctx_shabal);
+	sph_shabal512(&ctx_shabal, hash, 64);
+	sph_shabal512_close(&ctx_shabal, hash);
+
+	memcpy(output, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+	uint32_t Htarg = ptarget[7];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0xff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		x13_hamsi512_cpu_init(thr_id, throughput);
+		x13_fugue512_cpu_init(thr_id, throughput);
+		x14_shabal512_cpu_init(thr_id, throughput);
+
+		quark_check_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	for (int k = 0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			/* check now with the CPU to confirm */
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			x14hash(vhash64, endiandata);
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			}
+			else if (vhash64[7] > Htarg) {
+				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
+			}
+			else {
+				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
--- a/x15/x15.cu
+++ b/x15/x15.cu
@ -0,0 +1,295 @@
+/*
+ * X15 algorithm (CHC, BBC, X15C)
+ * Added in ccminer by Tanguy Pruvot - 2014
+ */
+
+extern "C" {
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+#include "sph/sph_shabal.h"
+#include "sph/sph_whirlpool.h"
+
+#include "miner.h"
+}
+
+#include <stdint.h>
+#include <cuda_runtime.h>
+
+// to test gpu hash on a null buffer
+#define NULLTEST 0
+
+// from cpu-miner.c
+extern int device_map[8];
+
+// Memory for the hash functions
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x14_shabal512_cpu_init(int thr_id, int threads);
+extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x15_whirlpool_cpu_init(int thr_id, int threads);
+extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+// X15 CPU Hash function
+extern "C" void x15hash(void *output, const void *input)
+{
+	sph_blake512_context     ctx_blake;
+	sph_bmw512_context       ctx_bmw;
+	sph_groestl512_context   ctx_groestl;
+	sph_jh512_context        ctx_jh;
+	sph_keccak512_context    ctx_keccak;
+	sph_skein512_context     ctx_skein;
+	sph_luffa512_context     ctx_luffa;
+	sph_cubehash512_context  ctx_cubehash;
+	sph_shavite512_context   ctx_shavite;
+	sph_simd512_context      ctx_simd;
+	sph_echo512_context      ctx_echo;
+	sph_hamsi512_context     ctx_hamsi;
+	sph_fugue512_context     ctx_fugue;
+	sph_shabal512_context    ctx_shabal;
+	sph_whirlpool_context    ctx_whirlpool;
+
+	unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
+	#define hashB hash+64
+
+	memset(hash, 0, sizeof hash);
+
+	sph_blake512_init(&ctx_blake);
+	sph_blake512(&ctx_blake, input, 80);
+	sph_blake512_close(&ctx_blake, hash);
+
+	sph_bmw512_init(&ctx_bmw);
+	sph_bmw512(&ctx_bmw, hash, 64);
+	sph_bmw512_close(&ctx_bmw, hashB);
+
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512(&ctx_groestl, hashB, 64);
+	sph_groestl512_close(&ctx_groestl, hash);
+
+	sph_skein512_init(&ctx_skein);
+	sph_skein512(&ctx_skein, hash, 64);
+	sph_skein512_close(&ctx_skein, hashB);
+
+	sph_jh512_init(&ctx_jh);
+	sph_jh512(&ctx_jh, hashB, 64);
+	sph_jh512_close(&ctx_jh, hash);
+
+	sph_keccak512_init(&ctx_keccak);
+	sph_keccak512(&ctx_keccak, hash, 64);
+	sph_keccak512_close(&ctx_keccak, hashB);
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512(&ctx_luffa, hashB, 64);
+	sph_luffa512_close(&ctx_luffa, hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512(&ctx_cubehash, hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, hashB);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512(&ctx_shavite, hashB, 64);
+	sph_shavite512_close(&ctx_shavite, hash);
+
+	sph_simd512_init(&ctx_simd);
+	sph_simd512(&ctx_simd, hash, 64);
+	sph_simd512_close(&ctx_simd, hashB);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512(&ctx_echo, hashB, 64);
+	sph_echo512_close(&ctx_echo, hash);
+
+	sph_hamsi512_init(&ctx_hamsi);
+	sph_hamsi512(&ctx_hamsi, hash, 64);
+	sph_hamsi512_close(&ctx_hamsi, hashB);
+
+	sph_fugue512_init(&ctx_fugue);
+	sph_fugue512(&ctx_fugue, hashB, 64);
+	sph_fugue512_close(&ctx_fugue, hash);
+
+	sph_shabal512_init(&ctx_shabal);
+	sph_shabal512(&ctx_shabal, hash, 64);
+	sph_shabal512_close(&ctx_shabal, hashB);
+
+	sph_whirlpool_init(&ctx_whirlpool);
+	sph_whirlpool(&ctx_whirlpool, hashB, 64);
+	sph_whirlpool_close(&ctx_whirlpool, hash);
+
+	memcpy(output, hash, 32);
+}
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+	uint32_t Htarg = ptarget[7];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		x13_hamsi512_cpu_init(thr_id, throughput);
+		x13_fugue512_cpu_init(thr_id, throughput);
+		x14_shabal512_cpu_init(thr_id, throughput);
+		x15_whirlpool_cpu_init(thr_id, throughput);
+
+		quark_check_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+#if NULLTEST
+		endiandata[k] = 0;
+#else
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+#endif
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		/* Scan with GPU */
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+#if NULLTEST
+		uint32_t buf[16]; memset(buf, 0, sizeof(buf));
+		cudaMemcpy(buf, d_hash[thr_id], 16 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
+		MyStreamSynchronize(NULL, order, thr_id);
+		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[0], buf[1], buf[2], buf[3]);
+		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[4], buf[5], buf[6], buf[7]);
+		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[8], buf[9], buf[10], buf[11]);
+		applog(LOG_NOTICE, "Hash  %08x %08x %08x %08x", buf[12], buf[13], buf[14], buf[15]);
+		return 0;
+#endif
+		if (foundNonce != 0xffffffff)
+		{
+			/* check now with the CPU to confirm */
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			x15hash(vhash64, endiandata);
+			if ((vhash64[7] <= Htarg) /* && fulltest(vhash64, ptarget) */) {
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X is in wanted range, %x <= %x", thr_id, foundNonce, vhash64[7], Htarg);
+				return 1;
+			}
+			else if (vhash64[7] > Htarg) {
+				applog(LOG_NOTICE, "Hash0 %08x %08x %08x %08x", vhash64[0], vhash64[1], vhash64[2], vhash64[3]);
+				applog(LOG_NOTICE, "Hash1 %08x %08x %08x %08x", vhash64[4], vhash64[5], vhash64[6], vhash64[7]);
+				applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x",
+					thr_id, foundNonce, vhash64[7], Htarg);
+			}
+			else {
+				applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}