diff --git a/Makefile.am b/Makefile.am
index 80207b7b..b7f32788 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -78,12 +78,10 @@ sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h
 sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
 sgminer_SOURCES += algorithm/sia.c algorithm/sia.h
 sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
-sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h 
 sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h
 sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h
 sgminer_SOURCES += algorithm/decred.c algorithm/decred.h
 sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h
-sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h
 sgminer_SOURCES += algorithm/gostcoin.c algorithm/gostcoin.h
 
 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl
diff --git a/algorithm.c b/algorithm.c
index 87e59b48..5c6282c8 100644
--- a/algorithm.c
+++ b/algorithm.c
@@ -36,14 +36,12 @@
 #include "algorithm/lyra2re.h"
 #include "algorithm/lyra2rev2.h"
 #include "algorithm/pluck.h"
-#include "algorithm/yescrypt.h"
 #include "algorithm/credits.h"
 #include "algorithm/blake256.h"
 #include "algorithm/blakecoin.h"
 #include "algorithm/sia.h"
 #include "algorithm/decred.h"
 #include "algorithm/pascal.h"
-#include "algorithm/lbry.h"
 #include "algorithm/sibcoin.h"
 #include "algorithm/gostcoin.h"
 
@@ -72,16 +70,13 @@ const char *algorithm_type_str[] = {
   "Neoscrypt",
   "WhirlpoolX",
   "Lyra2RE",
-  "Lyra2REV2"
-  "Pluck"
-  "Yescrypt",
-  "Yescrypt-multi",
+  "Lyra2REV2",
+  "Pluck",
   "Blakecoin",
   "Blake",
   "Sia",
   "Decred",
   "Vanilla",
-  "Lbry",
   "Sibcoin",
   "Gostcoin"
 };
@@ -284,100 +279,6 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_
   return status;
 }
 
-static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
-{
-  cl_kernel *kernel = &clState->kernel;
-  unsigned int num = 0;
-  cl_uint le_target;
-  cl_int status = 0;
-
-
-//  le_target = (*(cl_uint *)(blk->work->device_target + 28));
-  le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
-//  le_target = (cl_uint)((uint32_t *)blk->work->target)[7];
-
-
-//  memcpy(clState->cldata, blk->work->data, 80);
-  flip80(clState->cldata, blk->work->data);
-  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
-
-  CL_SET_ARG(clState->CLbuffer0);
-  CL_SET_ARG(clState->outputBuffer);
-  CL_SET_ARG(clState->padbuffer8);
-  CL_SET_ARG(clState->buffer1);
-  CL_SET_ARG(clState->buffer2);
-  CL_SET_ARG(le_target);
-
-  return status;
-}
-
-static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
-{
-//  cl_kernel *kernel = &clState->kernel;
-  cl_kernel *kernel;
-  unsigned int num = 0;
-  cl_uint le_target;
-  cl_int status = 0;
-
-
-  //  le_target = (*(cl_uint *)(blk->work->device_target + 28));
-  le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
-  memcpy(clState->cldata, blk->work->data, 80);
-//  flip80(clState->cldata, blk->work->data);
-  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
-//pbkdf and initial sha
-  kernel = &clState->kernel;
-
-  CL_SET_ARG(clState->CLbuffer0);
-  CL_SET_ARG(clState->outputBuffer);
-  CL_SET_ARG(clState->padbuffer8);
-  CL_SET_ARG(clState->buffer1);
-  CL_SET_ARG(clState->buffer2);
-  CL_SET_ARG(clState->buffer3);
-  CL_SET_ARG(le_target);
-
-//inactive kernel
-  num = 0;
-  kernel = clState->extra_kernels;
-  CL_SET_ARG_N(0,clState->buffer1);
-  CL_SET_ARG_N(1,clState->buffer2);
-//  CL_SET_ARG_N(3, clState->buffer3);
-
-//mix2_2
-  num = 0;
-  CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8);
-  CL_SET_ARG_N(1,clState->buffer1);
-  CL_SET_ARG_N(2,clState->buffer2);
-  //mix2_2
-//inactive kernel
-  num = 0;
-  CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
-  CL_SET_ARG_N(1, clState->buffer2);
-  //mix2_2
-
-  num = 0;
-  CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8);
-  CL_SET_ARG_N(1, clState->buffer1);
-  CL_SET_ARG_N(2, clState->buffer2);
-
-  //inactive kernel
-  num = 0;
-  CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
-  CL_SET_ARG_N(1, clState->buffer2);
-  //mix2_2
-
-
-//pbkdf and finalization
-    num=0;
-  CL_NEXTKERNEL_SET_ARG(clState->CLbuffer0);
-  CL_SET_ARG(clState->outputBuffer);
-  CL_SET_ARG(clState->buffer2);
-  CL_SET_ARG(clState->buffer3);
-  CL_SET_ARG(le_target);
-
-  return status;
-}
-
 static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
   cl_kernel *kernel = &clState->kernel;
@@ -1127,31 +1028,6 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
   return status;
 }
 
-static cl_int queue_lbry_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
-{
-  cl_kernel *kernel = &clState->kernel;
-  unsigned int num = 0;
-  cl_ulong le_target;
-  cl_int status = 0;
-
-  le_target = *(cl_ulong *)(blk->work->target + 24);
-  flip112(clState->cldata, blk->work->data);
-  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 112, clState->cldata, 0, NULL, NULL);
-
-  CL_SET_ARG(clState->CLbuffer0);
-  CL_SET_ARG(clState->padbuffer8);
-  num = 0;
-  kernel = clState->extra_kernels;
-  CL_SET_ARG_0(clState->padbuffer8);
-  num = 0;
-  
-  CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
-  CL_SET_ARG(clState->outputBuffer);
-  CL_SET_ARG(le_target);
-
-  return status;
-}
-
 static algorithm_settings_t algos[] = {
   // kernels starting from this will have difficulty calculated by using litecoin algorithm
 #define A_SCRYPT(a) \
@@ -1185,16 +1061,6 @@ static algorithm_settings_t algos[] = {
   A_DECRED("decred"),
 #undef A_DECRED
 
-#define A_YESCRYPT(a) \
-  { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
-  A_YESCRYPT("yescrypt"),
-#undef A_YESCRYPT
-
-#define A_YESCRYPT_MULTI(a) \
-  { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options}
-  A_YESCRYPT_MULTI("yescrypt-multi"),
-#undef A_YESCRYPT_MULTI
-
   // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
 #define A_QUARK(a, b) \
   { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }
@@ -1253,8 +1119,6 @@ static algorithm_settings_t algos[] = {
   { "sia",         ALGO_SIA,       "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 128, 0, sia_regenhash, NULL, NULL, queue_sia_kernel, NULL, NULL },
   { "vanilla",     ALGO_VANILLA,   "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL },
 
-  { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL },
-
   { "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL },
 
 
diff --git a/algorithm/lbry.c b/algorithm/lbry.c
deleted file mode 100644
index 163c2e73..00000000
--- a/algorithm/lbry.c
+++ /dev/null
@@ -1,60 +0,0 @@
-#include "config.h"
-#include "miner.h"
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "sph/sph_sha2.h"
-#include "sph/sph_ripemd.h"
-
-typedef struct {
-  sph_sha256_context  sha256;
-  sph_sha512_context  sha512;
-  sph_ripemd160_context  ripemd;
-} lbryhash_context_holder;
-
-void lbryhash(void* output, const void* input)
-{
-  uint32_t hashA[16], hashB[16], hashC[16];
-  lbryhash_context_holder ctx;
-
-  sph_sha256_init(&ctx.sha256);
-  sph_sha512_init(&ctx.sha512);
-  sph_ripemd160_init(&ctx.ripemd);
-
-  sph_sha256 (&ctx.sha256, input, 112);
-  sph_sha256_close(&ctx.sha256, hashA);
-
-  sph_sha256 (&ctx.sha256, hashA, 32);
-  sph_sha256_close(&ctx.sha256, hashA);
-
-  sph_sha512 (&ctx.sha512, hashA, 32);
-  sph_sha512_close(&ctx.sha512, hashA);
-
-  sph_ripemd160 (&ctx.ripemd, hashA, 32);
-  sph_ripemd160_close(&ctx.ripemd, hashB);
-
-  sph_ripemd160 (&ctx.ripemd, hashA+8, 32);
-  sph_ripemd160_close(&ctx.ripemd, hashC);
-
-  sph_sha256 (&ctx.sha256, hashB, 20);
-  sph_sha256 (&ctx.sha256, hashC, 20);
-  sph_sha256_close(&ctx.sha256, hashA);
-
-  sph_sha256 (&ctx.sha256, hashA, 32);
-  sph_sha256_close(&ctx.sha256, hashA);
-
-  memcpy(output, hashA, 32);
-}
-
-void lbry_regenhash(struct work *work)
-{
-  uint32_t data[28];
-  uint32_t *nonce = (uint32_t *)(work->data + 108);
-  uint32_t *ohash = (uint32_t *)(work->hash);
-
-  be32enc_vect(data, (const uint32_t *)work->data, 27);
-  data[27] = htobe32(*nonce);
-  lbryhash(ohash, data);
-}
diff --git a/algorithm/lbry.h b/algorithm/lbry.h
deleted file mode 100644
index c9bbdbdf..00000000
--- a/algorithm/lbry.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef LBRY_H
-#define LBRY_H
-
-#include "miner.h"
-
-extern void lbry_regenhash(struct work *work);
-
-#endif
diff --git a/algorithm/yescrypt-opt.c b/algorithm/yescrypt-opt.c
deleted file mode 100644
index 6adef7e7..00000000
--- a/algorithm/yescrypt-opt.c
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*-
- * Copyright 2009 Colin Percival
- * Copyright 2013,2014 Alexander Peslyak
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * This file was originally written by Colin Percival as part of the Tarsnap
- * online backup system.
- */
-
-#ifdef __i386__
-#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Enable at least SSE2 in the C compiler and use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (portability to older CPUs or testing)."
-#elif defined(__x86_64__)
-#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (for testing only)."
-#endif
-
-#include <errno.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include "algorithm/yescrypt_core.h"
-#include "sph/sha256_Y.h"
-#include "algorithm/sysendian.h"
-
-// #include "sph/yescrypt-platform.c"
-#define HUGEPAGE_THRESHOLD		(12 * 1024 * 1024)
-
-#ifdef __x86_64__
-#define HUGEPAGE_SIZE			(2 * 1024 * 1024)
-#else
-#undef HUGEPAGE_SIZE
-#endif
-
-
-static void *
-alloc_region(yescrypt_region_t * region, size_t size)
-{
-	size_t base_size = size;
-	uint8_t * base, *aligned;
-#ifdef MAP_ANON
-	int flags =
-#ifdef MAP_NOCORE
-		MAP_NOCORE |
-#endif
-		MAP_ANON | MAP_PRIVATE;
-#if defined(MAP_HUGETLB) && defined(HUGEPAGE_SIZE)
-	size_t new_size = size;
-	const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1;
-	if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) {
-		flags |= MAP_HUGETLB;
-		/*
-		* Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of
-		* huge page size, so let's round up to huge page size here.
-		*/
-		new_size = size + hugepage_mask;
-		new_size &= ~hugepage_mask;
-	}
-	base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0);
-	if (base != MAP_FAILED) {
-		base_size = new_size;
-	}
-	else
-		if (flags & MAP_HUGETLB) {
-		flags &= ~MAP_HUGETLB;
-		base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
-		}
-
-#else
-	base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
-#endif
-	if (base == MAP_FAILED)
-		base = NULL;
-	aligned = base;
-#elif defined(HAVE_POSIX_MEMALIGN)
-	if ((errno = posix_memalign((void **)&base, 64, size)) != 0)
-		base = NULL;
-	aligned = base;
-#else
-	base = aligned = NULL;
-	if (size + 63 < size) {
-		errno = ENOMEM;
-	}
-	else if ((base = (uint8_t *)malloc(size + 63)) != NULL) {
-		aligned = base + 63;
-		aligned -= (uintptr_t)aligned & 63;
-	}
-#endif
-	region->base = base;
-	region->aligned = aligned;
-	region->base_size = base ? base_size : 0;
-	region->aligned_size = base ? size : 0;
-	return aligned;
-}
-
-static  void init_region(yescrypt_region_t * region)
-{
-	region->base = region->aligned = NULL;
-	region->base_size = region->aligned_size = 0;
-}
-
-static int
-free_region(yescrypt_region_t * region)
-{
-	if (region->base) {
-#ifdef MAP_ANON
-		if (munmap(region->base, region->base_size))
-			return -1;
-#else
-		free(region->base);
-#endif
-	}
-	init_region(region);
-	return 0;
-}
-
-int
-yescrypt_init_shared(yescrypt_shared_t * shared,
-const uint8_t * param, size_t paramlen,
-uint64_t N, uint32_t r, uint32_t p,
-yescrypt_init_shared_flags_t flags, uint32_t mask,
-uint8_t * buf, size_t buflen)
-{
-	yescrypt_shared1_t * shared1 = &shared->shared1;
-	yescrypt_shared_t dummy, half1, half2;
-	//    yescrypt_shared_t * half2;
-	uint8_t salt[32];
-
-	if (flags & YESCRYPT_SHARED_PREALLOCATED) {
-		if (!shared1->aligned || !shared1->aligned_size)
-			return -1;
-	}
-	else {
-		init_region(shared1);
-	}
-	shared->mask1 = 1;
-	if (!param && !paramlen && !N && !r && !p && !buf && !buflen)
-		return 0;
-
-	init_region(&dummy.shared1);
-	dummy.mask1 = 1;
-	if (yescrypt_kdf(&dummy, shared1,
-		param, paramlen, NULL, 0, N, r, p, 0,
-		YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-		salt, sizeof(salt)))
-		goto out;
-
-	half1 = half2 = *shared;
-	half1.shared1.aligned_size /= 2;
-	half2.shared1.aligned_size = half1.shared1.aligned_size;
-	half2.shared1.aligned = (char*)half2.shared1.aligned + half1.shared1.aligned_size;
-
-	N /= 2;
-
-	if (p > 1 && yescrypt_kdf(&half1, &half2.shared1,
-		param, paramlen, salt, sizeof(salt), N, r, p, 0,
-		YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2,
-		salt, sizeof(salt)))
-		goto out;
-
-	if (yescrypt_kdf(&half2, &half1.shared1,
-		param, paramlen, salt, sizeof(salt), N, r, p, 0,
-		YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-		salt, sizeof(salt)))
-		goto out;
-
-	if (yescrypt_kdf(&half1, &half2.shared1,
-		param, paramlen, salt, sizeof(salt), N, r, p, 0,
-		YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
-		buf, buflen))
-		goto out;
-
-	shared->mask1 = mask;
-
-	return 0;
-
-out:
-	if (!(flags & YESCRYPT_SHARED_PREALLOCATED))
-		free_region(shared1);
-	return -1;
-}
-
-int
-yescrypt_free_shared(yescrypt_shared_t * shared)
-{
-	return free_region(&shared->shared1);
-}
-
-int
-yescrypt_init_local(yescrypt_local_t * local)
-{
-	init_region(local);
-	return 0;
-}
-
-int
-yescrypt_free_local(yescrypt_local_t * local)
-{
-	return free_region(local);
-}
-
-
-static void
-blkcpy(uint64_t * dest, const uint64_t * src, size_t count)
-{
-	do {
-		*dest++ = *src++; *dest++ = *src++;
-		*dest++ = *src++; *dest++ = *src++;
-	} while (count -= 4);
-};
-
-static void
-blkxor(uint64_t * dest, const uint64_t * src, size_t count)
-{
-	do {
-		*dest++ ^= *src++; *dest++ ^= *src++;
-		*dest++ ^= *src++; *dest++ ^= *src++;
-	} while (count -= 4);
-};
-
-typedef union {
-	uint32_t w[16];
-	uint64_t d[8];
-} salsa20_blk_t;
-
-static  void
-salsa20_simd_shuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout)
-{
-#define COMBINE(out, in1, in2) \
-	Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32);
-	COMBINE(0, 0, 2)
-	COMBINE(1, 5, 7)
-	COMBINE(2, 2, 4)
-	COMBINE(3, 7, 1)
-	COMBINE(4, 4, 6)
-	COMBINE(5, 1, 3)
-	COMBINE(6, 6, 0)
-	COMBINE(7, 3, 5)
-#undef COMBINE
-}
-
-static void
-salsa20_simd_unshuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout)
-{
-#define COMBINE(out, in1, in2) \
-	Bout->w[out * 2] = Bin->d[in1]; \
-	Bout->w[out * 2 + 1] = Bin->d[in2] >> 32;
-	COMBINE(0, 0, 6)
-	COMBINE(1, 5, 3)
-	COMBINE(2, 2, 0)
-	COMBINE(3, 7, 5)
-	COMBINE(4, 4, 2)
-	COMBINE(5, 1, 7)
-	COMBINE(6, 6, 4)
-	COMBINE(7, 3, 1)
-#undef COMBINE
-}
-
-/**
- * salsa20_8(B):
- * Apply the salsa20/8 core to the provided block.
- */
-
-static void
-salsa20_8(uint64_t B[8])
-{
-	size_t i;
-	salsa20_blk_t X;
-
-#define x X.w
-
-	salsa20_simd_unshuffle((const salsa20_blk_t *)B, &X);
-
-	for (i = 0; i < 8; i += 2) {
-#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
-		/* Operate on columns */
-		x[ 4] ^= R(x[ 0]+x[12], 7);  x[ 8] ^= R(x[ 4]+x[ 0], 9);
-		x[12] ^= R(x[ 8]+x[ 4],13);  x[ 0] ^= R(x[12]+x[ 8],18);
-
-		x[ 9] ^= R(x[ 5]+x[ 1], 7);  x[13] ^= R(x[ 9]+x[ 5], 9);
-		x[ 1] ^= R(x[13]+x[ 9],13);  x[ 5] ^= R(x[ 1]+x[13],18);
-
-		x[14] ^= R(x[10]+x[ 6], 7);  x[ 2] ^= R(x[14]+x[10], 9);
-		x[ 6] ^= R(x[ 2]+x[14],13);  x[10] ^= R(x[ 6]+x[ 2],18);
-
-		x[ 3] ^= R(x[15]+x[11], 7);  x[ 7] ^= R(x[ 3]+x[15], 9);
-		x[11] ^= R(x[ 7]+x[ 3],13);  x[15] ^= R(x[11]+x[ 7],18);
-
-		/* Operate on rows */
-		x[ 1] ^= R(x[ 0]+x[ 3], 7);  x[ 2] ^= R(x[ 1]+x[ 0], 9);
-		x[ 3] ^= R(x[ 2]+x[ 1],13);  x[ 0] ^= R(x[ 3]+x[ 2],18);
-
-		x[ 6] ^= R(x[ 5]+x[ 4], 7);  x[ 7] ^= R(x[ 6]+x[ 5], 9);
-		x[ 4] ^= R(x[ 7]+x[ 6],13);  x[ 5] ^= R(x[ 4]+x[ 7],18);
-
-		x[11] ^= R(x[10]+x[ 9], 7);  x[ 8] ^= R(x[11]+x[10], 9);
-		x[ 9] ^= R(x[ 8]+x[11],13);  x[10] ^= R(x[ 9]+x[ 8],18);
-
-		x[12] ^= R(x[15]+x[14], 7);  x[13] ^= R(x[12]+x[15], 9);
-		x[14] ^= R(x[13]+x[12],13);  x[15] ^= R(x[14]+x[13],18);
-#undef R
-	}
-#undef x
-
-	{
-		salsa20_blk_t Y;
-		salsa20_simd_shuffle(&X, &Y);
-		for (i = 0; i < 16; i += 4) {
-			((salsa20_blk_t *)B)->w[i] += Y.w[i];
-			((salsa20_blk_t *)B)->w[i + 1] += Y.w[i + 1];
-			((salsa20_blk_t *)B)->w[i + 2] += Y.w[i + 2];
-			((salsa20_blk_t *)B)->w[i + 3] += Y.w[i + 3];
-		}
-	}
-}
-
-/**
- * blockmix_salsa8(Bin, Bout, X, r):
- * Compute Bout = BlockMix_{salsa20/8, r}(Bin).  The input Bin must be 128r
- * bytes in length; the output Bout must also be the same size.  The
- * temporary space X must be 64 bytes.
- */
-static void
-blockmix_salsa8(const uint64_t * Bin, uint64_t * Bout, uint64_t * X, size_t r)
-{
-	size_t i;
-
-	/* 1: X <-- B_{2r - 1} */
-	blkcpy(X, &Bin[(2 * r - 1) * 8], 8);
-
-	/* 2: for i = 0 to 2r - 1 do */
-	for (i = 0; i < 2 * r; i += 2) {
-		/* 3: X <-- H(X \xor B_i) */
-		blkxor(X, &Bin[i * 8], 8);
-		salsa20_8(X);
-
-		/* 4: Y_i <-- X */
-		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		blkcpy(&Bout[i * 4], X, 8);
-
-		/* 3: X <-- H(X \xor B_i) */
-		blkxor(X, &Bin[i * 8 + 8], 8);
-		salsa20_8(X);
-
-		/* 4: Y_i <-- X */
-		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		blkcpy(&Bout[i * 4 + r * 8], X, 8);
-	}
-
-}
-
-/* These are tunable */
-#define S_BITS 8
-#define S_SIMD 2
-#define S_P 4
-#define S_ROUNDS 6
-
-/* Number of S-boxes.  Not tunable, hard-coded in a few places. */
-#define S_N 2
-
-/* Derived values.  Not tunable on their own. */
-#define S_SIZE1 (1 << S_BITS)
-#define S_MASK ((S_SIZE1 - 1) * S_SIMD * 8)
-#define S_MASK2 (((uint64_t)S_MASK << 32) | S_MASK)
-#define S_SIZE_ALL (S_N * S_SIZE1 * S_SIMD)
-#define S_P_SIZE (S_P * S_SIMD)
-#define S_MIN_R ((S_P * S_SIMD + 15) / 16)
-
-/**
- * pwxform(B):
- * Transform the provided block using the provided S-boxes.
- */
-
-static void
-block_pwxform(uint64_t * B, const uint64_t * S)
-{
-	uint64_t(*X)[S_SIMD] = (uint64_t(*)[S_SIMD])B;
-	const uint8_t *S0 = (const uint8_t *)S;
-	const uint8_t *S1 = (const uint8_t *)(S + S_SIZE1 * S_SIMD);
-	size_t i, j;
-
-	for (j = 0; j < S_P; j++) {
-
-		uint64_t *Xj = X[j];
-		uint64_t x0 = Xj[0];
-		uint64_t x1 = Xj[1];
-
-		for (i = 0; i < S_ROUNDS; i++) {
-			uint64_t x = x0 & S_MASK2;
-			const uint64_t *p0, *p1;
-
-			p0 = (const uint64_t *)(S0 + (uint32_t)x);
-			p1 = (const uint64_t *)(S1 + (x >> 32));
-	
-			x0 = (uint64_t)(x0 >> 32) * (uint32_t)x0;
-			x0 += p0[0];
-			x0 ^= p1[0];
-
-			x1 = (uint64_t)(x1 >> 32) * (uint32_t)x1;
-			x1 += p0[1];
-			x1 ^= p1[1];
-		}
-		Xj[0] = x0;
-		Xj[1] = x1;
-	}
-
-
-
-}
-
-
-/**
- * blockmix_pwxform(Bin, Bout, S, r):
- * Compute Bout = BlockMix_pwxform{salsa20/8, S, r}(Bin).  The input Bin must
- * be 128r bytes in length; the output Bout must also be the same size.
- *
- * S lacks const qualifier to match blockmix_salsa8()'s prototype, which we
- * need to refer to both functions via the same function pointers.
- */
-static void
-blockmix_pwxform(const uint64_t * Bin, uint64_t * Bout, uint64_t * S, size_t r)
-{
-	size_t r1, r2, i;
-   // S_P_SIZE = 8;
-	/* Convert 128-byte blocks to (S_P_SIZE * 64-bit) blocks */
-
-	r1 = r * 128 / (S_P_SIZE * 8);
-	/* X <-- B_{r1 - 1} */
-	blkcpy(Bout, &Bin[(r1 - 1) * S_P_SIZE], S_P_SIZE);
-
-	/* X <-- X \xor B_i */
-	blkxor(Bout, Bin, S_P_SIZE);
-
-	/* X <-- H'(X) */
-	/* B'_i <-- X */
-	block_pwxform(Bout, S);
-
-	/* for i = 0 to r1 - 1 do */
-	for (i = 1; i < r1; i++) {
-		/* X <-- X \xor B_i */
-		blkcpy(&Bout[i * S_P_SIZE], &Bout[(i - 1) * S_P_SIZE],S_P_SIZE);
-		blkxor(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE], S_P_SIZE);
-
-		/* X <-- H'(X) */
-		/* B'_i <-- X */
-		block_pwxform(&Bout[i * S_P_SIZE], S);
-	}
-	
-	/* Handle partial blocks */
-	if (i * S_P_SIZE < r * 16) {
-		blkcpy(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE],r * 16 - i * S_P_SIZE);
-}
-
-	i = (r1 - 1) * S_P_SIZE / 8;
-	/* Convert 128-byte blocks to 64-byte blocks */
-	r2 = r * 2;
-
-	/* B'_i <-- H(B'_i) */
-	salsa20_8(&Bout[i * 8]);
-
-
-	i++;
-/// not used yescrypt
-
-	for (; i < r2; i++) {
-		/* B'_i <-- H(B'_i \xor B'_{i-1}) */
-		blkxor(&Bout[i * 8], &Bout[(i - 1) * 8], 8);
-		salsa20_8(&Bout[i * 8]);
-	}
-}
-
-
-
-/**
- * integerify(B, r):
- * Return the result of parsing B_{2r-1} as a little-endian integer.
- */
-static  uint64_t
-integerify(const uint64_t * B, size_t r)
-{
-/*
- * Our 64-bit words are in host byte order, and word 6 holds the second 32-bit
- * word of B_{2r-1} due to SIMD shuffling.  The 64-bit value we return is also
- * in host byte order, as it should be.
- */
-	const uint64_t * X = &B[(2 * r - 1) * 8];
-	uint32_t lo = X[0];
-	uint32_t hi = X[6] >> 32;
-	return ((uint64_t)hi << 32) + lo;
-}
-
-/**
- * smix1(B, r, N, flags, V, NROM, shared, XY, S):
- * Compute first loop of B = SMix_r(B, N).  The input B must be 128r bytes in
- * length; the temporary storage V must be 128rN bytes in length; the temporary
- * storage XY must be 256r + 64 bytes in length.  The value N must be even and
- * no smaller than 2.
- */
-static void
-smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
-	uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
-	uint64_t * XY, uint64_t * S)
-{
-	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
-	uint32_t VROM_mask = shared->mask1;
-	size_t s = 16 * r;
-	uint64_t * X = V;
-	uint64_t * Y = &XY[s];
-	uint64_t * Z = S ? S : &XY[2 * s];
-	uint64_t n, i, j;
-	size_t k;
-
-	/* 1: X <-- B */
-	/* 3: V_i <-- X */
-	for (i = 0; i < 2 * r; i++) {
-		const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8];
-		salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
-		salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8];
-		for (k = 0; k < 16; k++) 
-			tmp->w[k] = le32dec(&src->w[k]);
-                                 
-		salsa20_simd_shuffle(tmp, dst);
-	}
-
-	/* 4: X <-- H(X) */
-	/* 3: V_i <-- X */
-
-	blockmix(X, Y, Z, r);
-	blkcpy(&V[s], Y, s);
-	X = XY;
-
-	if (NROM && (VROM_mask & 1)) {
-		if ((1 & VROM_mask) == 1) {
-			/* j <-- Integerify(X) mod NROM */
-			j = integerify(Y, r) & (NROM - 1);
-
-			/* X <-- H(X \xor VROM_j) */
-			blkxor(Y, &VROM[j * s], s);
-		}
-	
-		blockmix(Y, X, Z, r);
-
-
-		/* 2: for i = 0 to N - 1 do */
-		for (n = 1, i = 2; i < N; i += 2) {
-			/* 3: V_i <-- X */
-			blkcpy(&V[i * s], X, s);
-
-			if ((i & (i - 1)) == 0)
-				n <<= 1;
-
-			/* j <-- Wrap(Integerify(X), i) */
-			j = integerify(X, r) & (n - 1);
-			j += i - n;
-
-			/* X <-- X \xor V_j */
-			blkxor(X, &V[j * s], s);
-
-			/* 4: X <-- H(X) */
-			blockmix(X, Y, Z, r);
-
-			/* 3: V_i <-- X */
-			blkcpy(&V[(i + 1) * s], Y, s);
-
-			j = integerify(Y, r);
-			if (((i + 1) & VROM_mask) == 1) {
-				/* j <-- Integerify(X) mod NROM */
-				j &= NROM - 1;
-
-				/* X <-- H(X \xor VROM_j) */
-				blkxor(Y, &VROM[j * s], s);
-			} else {
-				/* j <-- Wrap(Integerify(X), i) */
-				j &= n - 1;
-				j += i + 1 - n;
-
-				/* X <-- H(X \xor V_j) */
-				blkxor(Y, &V[j * s], s);
-			}
-
-			blockmix(Y, X, Z, r);
-		}
-	} else {
-		yescrypt_flags_t rw = flags & YESCRYPT_RW;
-		/* 4: X <-- H(X) */
-		blockmix(Y, X, Z, r);
-
-		/* 2: for i = 0 to N - 1 do */
-		for (n = 1, i = 2; i < N; i += 2) {
-			/* 3: V_i <-- X */
-			blkcpy(&V[i * s], X, s);
-
-			if (rw) {
-				if ((i & (i - 1)) == 0)
-					n <<= 1;
-
-				/* j <-- Wrap(Integerify(X), i) */
-				j = integerify(X, r) & (n - 1);
-				j += i - n;
-				
-				/* X <-- X \xor V_j */
-				blkxor(X, &V[j * s], s);
-			}
-
-			/* 4: X <-- H(X) */
-			blockmix(X, Y, Z, r);
-
-			/* 3: V_i <-- X */
-			blkcpy(&V[(i + 1) * s], Y, s);
-
-			if (rw) {
-				/* j <-- Wrap(Integerify(X), i) */
-				j = integerify(Y, r) & (n - 1);
-				j += (i + 1) - n;
-				
-
-				/* X <-- X \xor V_j */
-				blkxor(Y, &V[j * s], s);
-			}
-
-			/* 4: X <-- H(X) */
-			blockmix(Y, X, Z, r);
-		}
-	}
-
-	/* B' <-- X */
-	for (i = 0; i < 2 * r; i++) {
-		const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8];
-		salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
-		salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8];
-		for (k = 0; k < 16; k++)
-			le32enc(&tmp->w[k], src->w[k]);
-		salsa20_simd_unshuffle(tmp, dst);
-	}
-}
-
-
-
-/**
- * smix2(B, r, N, Nloop, flags, V, NROM, shared, XY, S):
- * Compute second loop of B = SMix_r(B, N).  The input B must be 128r bytes in
- * length; the temporary storage V must be 128rN bytes in length; the temporary
- * storage XY must be 256r + 64 bytes in length.  The value N must be a
- * power of 2 greater than 1.  The value Nloop must be even.
- */
-static void
-smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
-	yescrypt_flags_t flags,
-	uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
-	uint64_t * XY, uint64_t * S)
-{
-	
-	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) =
-		(S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
-	uint32_t VROM_mask = shared->mask1 | 1;
-	size_t s = 16 * r;
-	yescrypt_flags_t rw = flags & YESCRYPT_RW;
-	uint64_t * X = XY;
-	uint64_t * Y = &XY[s];
-	uint64_t * Z = S ? S : &XY[2 * s];
-	uint64_t i, j;
-	size_t k;
-
-	if (Nloop == 0)
-		return;
-
-	/* X <-- B' */
-	for (i = 0; i < 2 * r; i++) {
-		const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8];
-		salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
-		salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8];
-		for (k = 0; k < 16; k++)
-			tmp->w[k] = le32dec(&src->w[k]);
-		salsa20_simd_shuffle(tmp, dst);
-	}
-	if (NROM) {
-
-		/* 6: for i = 0 to N - 1 do */
-		for (i = 0; i < Nloop; i += 2) {
-			/* 7: j <-- Integerify(X) mod N */
-			j = integerify(X, r) & (N - 1);
-
-			/* 8: X <-- H(X \xor V_j) */
-			blkxor(X, &V[j * s], s);
-			/* V_j <-- Xprev \xor V_j */
-			if (rw)
-				blkcpy(&V[j * s], X, s);
-			blockmix(X, Y, Z, r);
-
-			j = integerify(Y, r);
-			if (((i + 1) & VROM_mask) == 1) {
-				/* j <-- Integerify(X) mod NROM */
-				j &= NROM - 1;
-
-				/* X <-- H(X \xor VROM_j) */
-				blkxor(Y, &VROM[j * s], s);
-			} else {
-				/* 7: j <-- Integerify(X) mod N */
-				j &= N - 1;
-
-				/* 8: X <-- H(X \xor V_j) */
-				blkxor(Y, &V[j * s], s);
-				/* V_j <-- Xprev \xor V_j */
-				if (rw)
-					blkcpy(&V[j * s], Y, s);
-			}
-
-			blockmix(Y, X, Z, r);
-		}
-	} else {
-
-		/* 6: for i = 0 to N - 1 do */
-		i = Nloop / 2;
-		do {
-			/* 7: j <-- Integerify(X) mod N */
-			j = integerify(X, r) & (N - 1);
-
-			/* 8: X <-- H(X \xor V_j) */
-			blkxor(X, &V[j * s], s);
-			/* V_j <-- Xprev \xor V_j */
-			if (rw)
-				blkcpy(&V[j * s], X, s);
-			blockmix(X, Y, Z, r);
-
-			/* 7: j <-- Integerify(X) mod N */
-			j = integerify(Y, r) & (N - 1);
-
-			/* 8: X <-- H(X \xor V_j) */
-			blkxor(Y, &V[j * s], s);
-			/* V_j <-- Xprev \xor V_j */
-			if (rw)
-				blkcpy(&V[j * s], Y, s);
-			blockmix(Y, X, Z, r);
-		} while (--i);
-	}
-
-	/* 10: B' <-- X */
-	for (i = 0; i < 2 * r; i++) {
-		const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8];
-		salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
-		salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8];
-		for (k = 0; k < 16; k++)
-			le32enc(&tmp->w[k], src->w[k]);
-		salsa20_simd_unshuffle(tmp, dst);
-	}
-}
-
-
-
-
-/**
- * p2floor(x):
- * Largest power of 2 not greater than argument.
- */
-static uint64_t
-p2floor(uint64_t x)
-{
-	uint64_t y;
-	while ((y = x & (x - 1)))
-		x = y;
-	return x;
-}
-
-/**
- * smix(B, r, N, p, t, flags, V, NROM, shared, XY, S):
- * Compute B = SMix_r(B, N).  The input B must be 128rp bytes in length; the
- * temporary storage V must be 128rN bytes in length; the temporary storage
- * XY must be 256r+64 or (256r+64)*p bytes in length (the larger size is
- * required with OpenMP-enabled builds).  The value N must be a power of 2
- * greater than 1.
- */
-static void
-smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
-	yescrypt_flags_t flags,
-	uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
-	uint64_t * XY, uint64_t * S)
-{
-	size_t s = 16 * r;
-	uint64_t Nchunk = N / p, Nloop_all, Nloop_rw;
-	uint32_t i;
-
-	Nloop_all = Nchunk;
-	if (flags & YESCRYPT_RW) {
-		if (t <= 1) {
-			if (t)
-				Nloop_all *= 2; /* 2/3 */
-			Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
-		} else {
-			Nloop_all *= t - 1;
-		}
-	} else if (t) {
-		if (t == 1)
-			Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
-		Nloop_all *= t;
-	}
-
-	Nloop_rw = 0;
-	if (flags & __YESCRYPT_INIT_SHARED)
-		Nloop_rw = Nloop_all;
-	else if (flags & YESCRYPT_RW)
-		Nloop_rw = Nloop_all / p;
-
-	Nchunk &= ~(uint64_t)1; /* round down to even */
-	Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
-	Nloop_rw &= ~(uint64_t)1; /* round down to even */
-
-
-	for (i = 0; i < p; i++) {
-		uint64_t Vchunk = i * Nchunk;
-		uint64_t * Bp = &B[i * s];
-		uint64_t * Vp = &V[Vchunk * s];
-		uint64_t * XYp = XY;
-
-	uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
-		uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
-
-		if (Sp) 
-			smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);
-
-	
-
-		if (!(flags & __YESCRYPT_INIT_SHARED_2)) 
-			smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp);
-
-
-			smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp);
-
-
-
-	}
-	if (Nloop_all > Nloop_rw) {
-
-		for (i = 0; i < p; i++) {
-			uint64_t * Bp = &B[i * s];
-
-			uint64_t * XYp = XY;
-
-			uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
-			smix2(Bp, r, N, Nloop_all - Nloop_rw,flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp);
-
-		}
-	}
-
-
-
-
-}
-
-static void
-smix_old(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
-yescrypt_flags_t flags,
-uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
-uint64_t * XY, uint64_t * S)
-{
-	size_t s = 16 * r;
-	uint64_t Nchunk = N / p, Nloop_all, Nloop_rw;
-	uint32_t i;
-
-	Nloop_all = Nchunk;
-	if (flags & YESCRYPT_RW) {
-		if (t <= 1) {
-			if (t)
-				Nloop_all *= 2; /* 2/3 */
-			Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
-		}
-		else {
-			Nloop_all *= t - 1;
-		}
-	}
-	else if (t) {
-		if (t == 1)
-			Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
-		Nloop_all *= t;
-	}
-
-	Nloop_rw = 0;
-	if (flags & __YESCRYPT_INIT_SHARED)
-		Nloop_rw = Nloop_all;
-	else if (flags & YESCRYPT_RW)
-		Nloop_rw = Nloop_all / p;
-
-	Nchunk &= ~(uint64_t)1; /* round down to even */
-	Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
-	Nloop_rw &= ~(uint64_t)1; /* round down to even */
-
-
-	for (i = 0; i < p; i++) {
-		uint64_t Vchunk = i * Nchunk;
-		uint64_t * Bp = &B[i * s];
-		uint64_t * Vp = &V[Vchunk * s];
-		uint64_t * XYp = XY;
-
-		uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
-		uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
-
-		if (Sp) {
-			smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM, Sp, NROM, shared, XYp, NULL);
-
-
-		}
-		if (!(flags & __YESCRYPT_INIT_SHARED_2)) {
-			smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp);
-		}
-
-
-		smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp);
-	}
-	
-	if (Nloop_all > Nloop_rw) {
-
-		for (i = 0; i < p; i++) {
-			uint64_t * Bp = &B[i * s];
-
-			uint64_t * XYp = XY;
-
-			uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
-			smix2(Bp, r, N, Nloop_all - Nloop_rw, flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp);
-		}
-	}
-}
-
-/**
- * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
- *     N, r, p, t, flags, buf, buflen):
- * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
- * p, buflen), or a revision of scrypt as requested by flags and shared, and
- * write the result into buf.  The parameters r, p, and buflen must satisfy
- * r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N must be a power
- * of 2 greater than 1.
- *
- * t controls computation time while not affecting peak memory usage.  shared
- * and flags may request special modes as described in yescrypt.h.  local is
- * the thread-local data structure, allowing to preserve and reuse a memory
- * allocation across calls, thereby reducing its overhead.
- *
- * Return 0 on success; or -1 on error.
- */
-int
-yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
-	const uint8_t * passwd, size_t passwdlen,
-	const uint8_t * salt, size_t saltlen,
-	uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
-	uint8_t * buf, size_t buflen)
-{
-	yescrypt_region_t tmp;
-	uint64_t NROM;
-	size_t B_size, V_size, XY_size, need;
-	uint64_t * B, * V, * XY, * S;
-	uint64_t sha256[4];
-
-	/*
-	 * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
-	 * so don't let it have side-effects.  Without this adjustment, it'd
-	 * enable the SHA-256 password pre-hashing and output post-hashing,
-	 * because any deviation from classic scrypt implies those.
-	 */
-	if (p == 1)
-		flags &= ~YESCRYPT_PARALLEL_SMIX;
-
-	/* Sanity-check parameters */
-	if (flags & ~YESCRYPT_KNOWN_FLAGS) {
-		errno = EINVAL;
-		return -1;
-	}
-#if SIZE_MAX > UINT32_MAX
-	if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
-		errno = EFBIG;
-		return -1;
-	}
-#endif
-	if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) {
-		errno = EFBIG;
-		return -1;
-	}
-	if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) {
-		errno = EINVAL;
-		return -1;
-	}
-	if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) {
-		errno = EINVAL;
-		return -1;
-	}
-#if S_MIN_R > 1
-	if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) {
-		errno = EINVAL;
-		return -1;
-	}
-#endif
-	if ((p > SIZE_MAX / ((size_t)256 * r + 64)) ||
-#if SIZE_MAX / 256 <= UINT32_MAX
-		(r > SIZE_MAX / 256) ||
-#endif
-		(N > SIZE_MAX / 128 / r)) {
-		errno = ENOMEM;
-		return -1;
-	}
-	if (N > UINT64_MAX / ((uint64_t)t + 1)) {
-		errno = EFBIG;
-		return -1;
-	}
-
-	if ((flags & YESCRYPT_PWXFORM) &&
-		p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) {
-		errno = ENOMEM;
-		return -1;
-	}
-
-	NROM = 0;
-	if (shared->shared1.aligned) {
-		NROM = shared->shared1.aligned_size / ((size_t)128 * r);
-		if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) ||
-			!(flags & YESCRYPT_RW)) {
-			errno = EINVAL;
-			return -1;
-		}
-	}
-
-	/* Allocate memory */
-	V = NULL;
-	V_size = (size_t)128 * r * N;
-
-	need = V_size;
-	if (flags & __YESCRYPT_INIT_SHARED) {
-		if (local->aligned_size < need) {
-			if (local->base || local->aligned ||
-				local->base_size || local->aligned_size) {
-				errno = EINVAL;
-				return -1;
-			}
-			if (!alloc_region(local, need))
-				return -1;
-		}
-		V = (uint64_t *)local->aligned;
-		need = 0;
-	}
-	B_size = (size_t)128 * r * p;
-	need += B_size;
-	if (need < B_size) {
-		errno = ENOMEM;
-		return -1;
-	}
-	XY_size = (size_t)256 * r + 64;
-
-	need += XY_size;
-	if (need < XY_size) {
-		errno = ENOMEM;
-		return -1;
-	}
-	if (flags & YESCRYPT_PWXFORM) {
-		size_t S_size = S_SIZE_ALL * sizeof(*S);
-
-		if (flags & YESCRYPT_PARALLEL_SMIX)
-			S_size *= p;
-
-		need += S_size;
-		if (need < S_size) {
-			errno = ENOMEM;
-			return -1;
-		}
-	}
-	if (flags & __YESCRYPT_INIT_SHARED) {
-		if (!alloc_region(&tmp, need))
-			return -1;
-		B = (uint64_t *)tmp.aligned;
-		XY = (uint64_t *)((uint8_t *)B + B_size);
-	} else {
-		init_region(&tmp);
-		if (local->aligned_size < need) {
-			if (free_region(local))
-				return -1;
-			if (!alloc_region(local, need))
-				return -1;
-		}
-		B = (uint64_t *)local->aligned;
-		V = (uint64_t *)((uint8_t *)B + B_size);
-		XY = (uint64_t *)((uint8_t *)V + V_size);
-	}
-	S = NULL;
-	if (flags & YESCRYPT_PWXFORM)
-		S = (uint64_t *)((uint8_t *)XY + XY_size);
-
-
-	if (t || flags) {
-		SHA256_CTX_Y ctx;
-		SHA256_Init_Y(&ctx);
-		SHA256_Update_Y(&ctx, passwd, passwdlen);
-		SHA256_Final_Y((uint8_t *)sha256, &ctx);
-		passwd = (uint8_t *)sha256;
-		passwdlen = sizeof(sha256);
-	}
-	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
-	PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1,(uint8_t *)B, B_size);
-
-	if (t || flags) 
-	{
-		blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0]));
-	}
-	if (p == 1 || (flags & YESCRYPT_PARALLEL_SMIX)) {
-		smix(B, r, N, p, t, flags, V, NROM, shared, XY, S);
-	} else {
-		uint32_t i;
-		/* 2: for i = 0 to p - 1 do */
-		for (i = 0; i < p; i++) {
-			/* 3: B_i <-- MF(B_i, N) */
-			smix(&B[(size_t)16 * r * i], r, N, 1, t, flags, V, NROM, shared, XY, S);
-		}
-	}
-
-	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
-
-	PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen);
-	/*
-	 * Except when computing classic scrypt, allow all computation so far
-	 * to be performed on the client.  The final steps below match those of
-	 * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
-	 * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
-	 * SCRAM's use of SHA-1) would be usable with yescrypt hashes.
-	 */
-	if ((t || flags) && buflen == sizeof(sha256)) {
-		/* Compute ClientKey */
-
-		{
-			HMAC_SHA256_CTX_Y ctx;
-			HMAC_SHA256_Init_Y(&ctx, buf, buflen);
-			HMAC_SHA256_Update_Y(&ctx, salt, saltlen);
-			HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx);
-		}
-		/* Compute StoredKey */
-		{
-			SHA256_CTX_Y ctx;
-			SHA256_Init_Y(&ctx);
-			SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256));
-			SHA256_Final_Y(buf, &ctx);
-		}
-	}
-
-	if (free_region(&tmp))
-		return -1;
-
-	/* Success! */
-	return 0;
-}
-
-int
-yescrypt_kdf_old(const yescrypt_shared_t * shared, yescrypt_local_t * local,
-const uint8_t * passwd, size_t passwdlen,
-const uint8_t * salt, size_t saltlen,
-uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
-uint8_t * buf, size_t buflen)
-{
-	yescrypt_region_t tmp;
-	uint64_t NROM;
-	size_t B_size, V_size, XY_size, need;
-	uint64_t * B, *V, *XY, *S;
-	uint64_t sha256[4];
-
-	/*
-	* YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
-	* so don't let it have side-effects.  Without this adjustment, it'd
-	* enable the SHA-256 password pre-hashing and output post-hashing,
-	* because any deviation from classic scrypt implies those.
-	*/
-	if (p == 1)
-		flags &= ~YESCRYPT_PARALLEL_SMIX;
-
-	/* Sanity-check parameters */
-	if (flags & ~YESCRYPT_KNOWN_FLAGS) {
-		errno = EINVAL;
-		return -1;
-	}
-#if SIZE_MAX > UINT32_MAX
-	if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
-		errno = EFBIG;
-		return -1;
-	}
-#endif
-	if ((uint64_t)(r)* (uint64_t)(p) >= (1 << 30)) {
-		errno = EFBIG;
-		return -1;
-	}
-	if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) {
-		errno = EINVAL;
-		return -1;
-	}
-	if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) {
-		errno = EINVAL;
-		return -1;
-	}
-#if S_MIN_R > 1
-	if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) {
-		errno = EINVAL;
-		return -1;
-	}
-#endif
-	if ((p > SIZE_MAX / ((size_t)256 * r + 64)) ||
-#if SIZE_MAX / 256 <= UINT32_MAX
-		(r > SIZE_MAX / 256) ||
-#endif
-		(N > SIZE_MAX / 128 / r)) {
-		errno = ENOMEM;
-		return -1;
-	}
-	if (N > UINT64_MAX / ((uint64_t)t + 1)) {
-		errno = EFBIG;
-		return -1;
-	}
-
-	if ((flags & YESCRYPT_PWXFORM) &&
-		p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) {
-		errno = ENOMEM;
-		return -1;
-	}
-
-	NROM = 0;
-	if (shared->shared1.aligned) {
-		NROM = shared->shared1.aligned_size / ((size_t)128 * r);
-		if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) ||
-			!(flags & YESCRYPT_RW)) {
-			errno = EINVAL;
-			return -1;
-		}
-	}
-
-	/* Allocate memory */
-	V = NULL;
-	V_size = (size_t)128 * r * N;
-
-	need = V_size;
-	if (flags & __YESCRYPT_INIT_SHARED) {
-		if (local->aligned_size < need) {
-			if (local->base || local->aligned ||
-				local->base_size || local->aligned_size) {
-				errno = EINVAL;
-				return -1;
-			}
-			if (!alloc_region(local, need))
-				return -1;
-		}
-		V = (uint64_t *)local->aligned;
-		need = 0;
-	}
-	B_size = (size_t)128 * r * p;
-	need += B_size;
-	if (need < B_size) {
-		errno = ENOMEM;
-		return -1;
-	}
-	XY_size = (size_t)256 * r + 64;
-
-	need += XY_size;
-	if (need < XY_size) {
-		errno = ENOMEM;
-		return -1;
-	}
-	if (flags & YESCRYPT_PWXFORM) {
-		size_t S_size = S_SIZE_ALL * sizeof(*S);
-
-		if (flags & YESCRYPT_PARALLEL_SMIX)
-			S_size *= p;
-
-		need += S_size;
-		if (need < S_size) {
-			errno = ENOMEM;
-			return -1;
-		}
-	}
-	if (flags & __YESCRYPT_INIT_SHARED) {
-		if (!alloc_region(&tmp, need))
-			return -1;
-		B = (uint64_t *)tmp.aligned;
-		XY = (uint64_t *)((uint8_t *)B + B_size);
-	}
-	else {
-		init_region(&tmp);
-		if (local->aligned_size < need) {
-			if (free_region(local))
-				return -1;
-			if (!alloc_region(local, need))
-				return -1;
-		}
-		B = (uint64_t *)local->aligned;
-		V = (uint64_t *)((uint8_t *)B + B_size);
-		XY = (uint64_t *)((uint8_t *)V + V_size);
-	}
-	S = NULL;
-	if (flags & YESCRYPT_PWXFORM)
-		S = (uint64_t *)((uint8_t *)XY + XY_size);
-
-
-	if (t || flags) {
-		SHA256_CTX_Y ctx;
-		SHA256_Init_Y(&ctx);
-		SHA256_Update_Y(&ctx, passwd, passwdlen);
-		SHA256_Final_Y((uint8_t *)sha256, &ctx);
-		passwd = (uint8_t *)sha256;
-		passwdlen = sizeof(sha256);
-	}
-
-	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
-	PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, (uint8_t *)B, B_size);
-
-
-	if (t || flags)
-	{
-		blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0]));
-	}
-		smix(B, r, N, p, t, flags, V, NROM, shared, XY, S);
-
-
-	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
-	PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen);
-
-	/*
-	* Except when computing classic scrypt, allow all computation so far
-	* to be performed on the client.  The final steps below match those of
-	* SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
-	* far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
-	* SCRAM's use of SHA-1) would be usable with yescrypt hashes.
-	*/
-	if ((t || flags) && buflen == sizeof(sha256)) {
-		/* Compute ClientKey */
-
-		{
-			HMAC_SHA256_CTX_Y ctx;
-			HMAC_SHA256_Init_Y(&ctx, buf, buflen);
-			HMAC_SHA256_Update_Y(&ctx, salt, saltlen);
-			HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx);
-		}
-		/* Compute StoredKey */
-		{
-			SHA256_CTX_Y ctx;
-			SHA256_Init_Y(&ctx);
-			SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256));
-			SHA256_Final_Y(buf, &ctx);
-		}
-	}
-
-	if (free_region(&tmp))
-		return -1;
-
-	/* Success! */
-	return 0;
-}
-
diff --git a/algorithm/yescrypt.c b/algorithm/yescrypt.c
deleted file mode 100644
index 31e0c623..00000000
--- a/algorithm/yescrypt.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*-
- * Copyright 2015 djm34 
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "miner.h"
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "algorithm/yescrypt_core.h"
-
-static const uint32_t diff1targ = 0x0000ffff;
-
-/* Used externally as confirmation of correct OCL code */
-int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
-{
-	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
-	uint32_t data[20], ohash[8];
-
-	be32enc_vect(data, (const uint32_t *)pdata, 19);
-	data[19] = htobe32(nonce);
-	yescrypt_hash((unsigned char*)data,(unsigned char*)ohash);
-
-	tmp_hash7 = be32toh(ohash[7]);
-
-	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
-		(long unsigned int)Htarg,
-		(long unsigned int)diff1targ,
-		(long unsigned int)tmp_hash7);
-
-	if (tmp_hash7 > diff1targ)
-		return -1;
-
-	if (tmp_hash7 > Htarg)
-		return 0;
-
-	return 1;
-}
-
-void yescrypt_regenhash(struct work *work)
-{
-        uint32_t data[20];
-        uint32_t *nonce = (uint32_t *)(work->data + 76);
-        uint32_t *ohash = (uint32_t *)(work->hash);
-
-        be32enc_vect(data, (const uint32_t *)work->data, 19);
-        data[19] = htobe32(*nonce);	
-
-		yescrypt_hash((unsigned char*)data, (unsigned char*)ohash);
-        
-}
-
-
-bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
-	unsigned char *pdata, unsigned char __maybe_unused *phash1,
-	unsigned char __maybe_unused *phash, const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
-{
-	uint32_t *nonce = (uint32_t *)(pdata + 76);
-	uint32_t data[20];
-	uint32_t tmp_hash7;
-	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
-	bool ret = false;
-
-	be32enc_vect(data, (const uint32_t *)pdata, 19);
-	
-	while (1)
-	{
-		uint32_t ostate[8];
-
-		*nonce = ++n;
-		data[19] = (n);
-
-		yescrypt_hash((unsigned char*)data, (unsigned char*)ostate);
-		tmp_hash7 = (ostate[7]);
-
-		applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
-
-		if (unlikely(tmp_hash7 <= Htarg))
-		{
-			((uint32_t *)pdata)[19] = htobe32(n);
-			*last_nonce = n;
-			ret = true;
-			break;
-		}
-
-		if (unlikely((n >= max_nonce) || thr->work_restart))
-		{
-			*last_nonce = n;
-			break;
-		}
-	}
-
-	return ret;
-}
\ No newline at end of file
diff --git a/algorithm/yescrypt.h b/algorithm/yescrypt.h
deleted file mode 100644
index b51cb495..00000000
--- a/algorithm/yescrypt.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef YESCRYPT_H
-#define YESCRYPT_H
-
-#include "miner.h"
-#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar
-#define YESCRYP_SECBUF_SIZE (128*64*8)
-extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
-extern void yescrypt_regenhash(struct work *work);
-
-#endif /* YESCRYPT_H */
diff --git a/algorithm/yescrypt_core.h b/algorithm/yescrypt_core.h
deleted file mode 100644
index 64b9a11f..00000000
--- a/algorithm/yescrypt_core.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/*-
- * Copyright 2009 Colin Percival
- * Copyright 2013,2014 Alexander Peslyak
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * This file was originally written by Colin Percival as part of the Tarsnap
- * online backup system.
- */
-#ifndef _YESCRYPT_H_
-#define _YESCRYPT_H_
-
-#include <stdint.h>
-#include <stdlib.h> /* for size_t */
-#include <errno.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output);
-extern void yescrypt_hash(const unsigned char *input, unsigned char *output);
-
-
-
-/**
- * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
- * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
- * p, buflen) and write the result into buf.  The parameters r, p, and buflen
- * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
- * must be a power of 2 greater than 1.
- *
- * Return 0 on success; or -1 on error.
- *
- * MT-safe as long as buf is local to the thread.
- */
-extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen,
-	const uint8_t * __salt, size_t __saltlen,
-	uint64_t __N, uint32_t __r, uint32_t __p,
-	uint8_t * __buf, size_t __buflen);
-
-/**
- * Internal type used by the memory allocator.  Please do not use it directly.
- * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since
- * they might differ from each other in a future version.
- */
-typedef struct {
-	void * base, * aligned;
-	size_t base_size, aligned_size;
-} yescrypt_region_t;
-
-/**
- * Types for shared (ROM) and thread-local (RAM) data structures.
- */
-typedef yescrypt_region_t yescrypt_shared1_t;
-typedef struct {
-	yescrypt_shared1_t shared1;
-	uint32_t mask1;
-} yescrypt_shared_t;
-typedef yescrypt_region_t yescrypt_local_t;
-
-/**
- * Possible values for yescrypt_init_shared()'s flags argument.
- */
-typedef enum {
-	YESCRYPT_SHARED_DEFAULTS = 0,
-	YESCRYPT_SHARED_PREALLOCATED = 0x100
-} yescrypt_init_shared_flags_t;
-
-/**
- * Possible values for the flags argument of yescrypt_kdf(),
- * yescrypt_gensalt_r(), yescrypt_gensalt().  These may be OR'ed together,
- * except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive.
- * Please refer to the description of yescrypt_kdf() below for the meaning of
- * these flags.
- */
-typedef enum {
-/* public */
-	YESCRYPT_WORM = 0,
-	YESCRYPT_RW = 1,
-	YESCRYPT_PARALLEL_SMIX = 2,
-	YESCRYPT_PWXFORM = 4,
-/* private */
-	__YESCRYPT_INIT_SHARED_1 = 0x10000,
-	__YESCRYPT_INIT_SHARED_2 = 0x20000,
-	__YESCRYPT_INIT_SHARED = 0x30000
-} yescrypt_flags_t;
-
-#define YESCRYPT_KNOWN_FLAGS \
-	(YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \
-	__YESCRYPT_INIT_SHARED)
-
-/**
- * yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask,
- *     buf, buflen):
- * Optionally allocate memory for and initialize the shared (ROM) data
- * structure.  The parameters N, r, and p must satisfy the same conditions as
- * with crypto_scrypt().  param and paramlen specify a local parameter with
- * which the ROM is seeded.  If buf is not NULL, then it is used to return
- * buflen bytes of message digest for the initialized ROM (the caller may use
- * this to verify that the ROM has been computed in the same way that it was on
- * a previous run).
- *
- * Return 0 on success; or -1 on error.
- *
- * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the
- * ROM is assumed to have been preallocated by the caller, with
- * shared->shared1.aligned being the start address of the ROM and
- * shared->shared1.aligned_size being its size (which must be consistent with
- * N, r, and p).  This may be used e.g. when the ROM is to be placed in a SysV
- * shared memory segment allocated by the caller.
- *
- * mask controls the frequency of ROM accesses by yescrypt_kdf().  Normally it
- * should be set to 1, to interleave RAM and ROM accesses, which works well
- * when both regions reside in the machine's RAM anyway.  Other values may be
- * used e.g. when the ROM is memory-mapped from a disk file.  Recommended mask
- * values are powers of 2 minus 1 or minus 2.  Here's the effect of some mask
- * values:
- * mask	value	ROM accesses in SMix 1st loop	ROM accesses in SMix 2nd loop
- *	0		0				1/2
- *	1		1/2				1/2
- *	2		0				1/4
- *	3		1/4				1/4
- *	6		0				1/8
- *	7		1/8				1/8
- *	14		0				1/16
- *	15		1/16				1/16
- *	1022		0				1/1024
- *	1023		1/1024				1/1024
- *
- * Actual computation of the ROM contents may be avoided, if you don't intend
- * to use a ROM but need a dummy shared structure, by calling this function
- * with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the
- * arguments starting with param and on.
- *
- * MT-safe as long as shared is local to the thread.
- */
-extern int yescrypt_init_shared(yescrypt_shared_t * __shared,
-	const uint8_t * __param, size_t __paramlen,
-	uint64_t __N, uint32_t __r, uint32_t __p,
-	yescrypt_init_shared_flags_t __flags, uint32_t __mask,
-	uint8_t * __buf, size_t __buflen);
-
-/**
- * yescrypt_free_shared(shared):
- * Free memory that had been allocated with yescrypt_init_shared().
- *
- * Return 0 on success; or -1 on error.
- *
- * MT-safe as long as shared is local to the thread.
- */
-extern int yescrypt_free_shared(yescrypt_shared_t * __shared);
-
-/**
- * yescrypt_init_local(local):
- * Initialize the thread-local (RAM) data structure.  Actual memory allocation
- * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r().
- *
- * Return 0 on success; or -1 on error.
- *
- * MT-safe as long as local is local to the thread.
- */
-extern int yescrypt_init_local(yescrypt_local_t * __local);
-
-/**
- * yescrypt_free_local(local):
- * Free memory that may have been allocated for an initialized thread-local
- * (RAM) data structure.
- *
- * Return 0 on success; or -1 on error.
- *
- * MT-safe as long as local is local to the thread.
- */
-extern int yescrypt_free_local(yescrypt_local_t * __local);
-
-/**
- * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
- *     N, r, p, t, flags, buf, buflen):
- * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
- * p, buflen), or a revision of scrypt as requested by flags and shared, and
- * write the result into buf.  The parameters N, r, p, and buflen must satisfy
- * the same conditions as with crypto_scrypt().  t controls computation time
- * while not affecting peak memory usage.  shared and flags may request
- * special modes as described below.  local is the thread-local data
- * structure, allowing to preserve and reuse a memory allocation across calls,
- * thereby reducing its overhead.
- *
- * Return 0 on success; or -1 on error.
- *
- * t controls computation time.  t = 0 is optimal in terms of achieving the
- * highest area-time for ASIC attackers.  Thus, higher computation time, if
- * affordable, is best achieved by increasing N rather than by increasing t.
- * However, if the higher memory usage (which goes along with higher N) is not
- * affordable, or if fine-tuning of the time is needed (recall that N must be a
- * power of 2), then t = 1 or above may be used to increase time while staying
- * at the same peak memory usage.  t = 1 increases the time by 25% and
- * decreases the normalized area-time to 96% of optimal.  (Of course, in
- * absolute terms the area-time increases with higher t.  It's just that it
- * would increase slightly more with higher N*r rather than with higher t.)
- * t = 2 increases the time by another 20% and decreases the normalized
- * area-time to 89% of optimal.  Thus, these two values are reasonable to use
- * for fine-tuning.  Values of t higher than 2 result in further increase in
- * time while reducing the efficiency much further (e.g., down to around 50% of
- * optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact
- * numbers varying by the flags settings).
- *
- * Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and
- * passing a dummy shared structure (see the description of
- * yescrypt_init_shared() above for how to produce one).  In this mode, the
- * thread-local memory region (RAM) is first sequentially written to and then
- * randomly read from.  This algorithm is friendly towards time-memory
- * tradeoffs (TMTO), available both to defenders (albeit not in this
- * implementation) and to attackers.
- *
- * Setting YESCRYPT_RW adds extra random reads and writes to the thread-local
- * memory region (RAM), which makes TMTO a lot less efficient.  This may be
- * used to slow down the kinds of attackers who would otherwise benefit from
- * classic scrypt's efficient TMTO.  Since classic scrypt's TMTO allows not
- * only for the tradeoff, but also for a decrease of attacker's area-time (by
- * up to a constant factor), setting YESCRYPT_RW substantially increases the
- * cost of attacks in area-time terms as well.  Yet another benefit of it is
- * that optimal area-time is reached at an earlier time than with classic
- * scrypt, and t = 0 actually corresponds to this earlier completion time,
- * resulting in quicker hash computations (and thus in higher request rate
- * capacity).  Due to these properties, YESCRYPT_RW should almost always be
- * set, except when compatibility with classic scrypt or TMTO-friendliness are
- * desired.
- *
- * YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a
- * lower level as compared to where it is in classic scrypt.  This reduces
- * flexibility for efficient computation (for both attackers and defenders) by
- * requiring that, short of resorting to TMTO, the full amount of memory be
- * allocated as needed for the specified p, regardless of whether that
- * parallelism is actually being fully made use of or not.  (For comparison, a
- * single instance of classic scrypt may be computed in less memory without any
- * CPU time overhead, but in more real time, by not making full use of the
- * parallelism.)  This may be desirable when the defender has enough memory
- * with sufficiently low latency and high bandwidth for efficient full parallel
- * execution, yet the required memory size is high enough that some likely
- * attackers might end up being forced to choose between using higher latency
- * memory than they could use otherwise (waiting for data longer) or using TMTO
- * (waiting for data more times per one hash computation).  The area-time cost
- * for other kinds of attackers (who would use the same memory type and TMTO
- * factor or no TMTO either way) remains roughly the same, given the same
- * running time for the defender.  In the TMTO-friendly YESCRYPT_WORM mode, as
- * long as the defender has enough memory that is just as fast as the smaller
- * per-thread regions would be, doesn't expect to ever need greater
- * flexibility (except possibly via TMTO), and doesn't need backwards
- * compatibility with classic scrypt, there are no other serious drawbacks to
- * this setting.  In the YESCRYPT_RW mode, which is meant to discourage TMTO,
- * this new approach to parallelization makes TMTO less inefficient.  (This is
- * an unfortunate side-effect of avoiding some random writes, as we have to in
- * order to allow for parallel threads to access a common memory region without
- * synchronization overhead.)  Thus, in this mode this setting poses an extra
- * tradeoff of its own (higher area-time cost for a subset of attackers vs.
- * better TMTO resistance).  Setting YESCRYPT_PARALLEL_SMIX also changes the
- * way the running time is to be controlled from N*r*p (for classic scrypt) to
- * N*r (in this modification).  All of this applies only when p > 1.  For
- * p = 1, this setting is a no-op.
- *
- * Passing a real shared structure, with ROM contents previously computed by
- * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for
- * the thread-local RAM region.  In order to allow for initialization of the
- * ROM to be split into a separate program, the shared->shared1.aligned and
- * shared->shared1.aligned_size fields may be set by the caller of
- * yescrypt_kdf() manually rather than with yescrypt_init_shared().
- *
- * local must be initialized with yescrypt_init_local().
- *
- * MT-safe as long as local and buf are local to the thread.
- */
-extern int yescrypt_kdf(const yescrypt_shared_t * __shared,
-	yescrypt_local_t * __local,
-	const uint8_t * __passwd, size_t __passwdlen,
-	const uint8_t * __salt, size_t __saltlen,
-	uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t,
-	yescrypt_flags_t __flags,
-	uint8_t * __buf, size_t __buflen);
-
-/**
- * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen):
- * Compute and encode an scrypt or enhanced scrypt hash of passwd given the
- * parameters and salt value encoded in setting.  If the shared structure is
- * not dummy, a ROM is used and YESCRYPT_RW is required.  Otherwise, whether to
- * use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
- * discouraging modification) is determined by the setting string.  shared and
- * local must be initialized as described above for yescrypt_kdf().  buf must
- * be large enough (as indicated by buflen) to hold the encoded hash string.
- *
- * Return the encoded hash string on success; or NULL on error.
- *
- * MT-safe as long as local and buf are local to the thread.
- */
-extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
-	yescrypt_local_t * __local,
-	const uint8_t * __passwd, size_t __passwdlen,
-	const uint8_t * __setting,
-	uint8_t * __buf, size_t __buflen);
-
-/**
- * yescrypt(passwd, setting):
- * Compute and encode an scrypt or enhanced scrypt hash of passwd given the
- * parameters and salt value encoded in setting.  Whether to use the
- * YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
- * discouraging modification) is determined by the setting string.
- *
- * Return the encoded hash string on success; or NULL on error.
- *
- * This is a crypt(3)-like interface, which is simpler to use than
- * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM,
- * and it is slower than yescrypt_r() for repeated calls because it allocates
- * and frees memory on each call.
- *
- * MT-unsafe.
- */
-extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting);
-
-/**
- * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen):
- * Generate a setting string for use with yescrypt_r() and yescrypt() by
- * encoding into it the parameters N_log2 (which is to be set to base 2
- * logarithm of the desired value for N), r, p, flags, and a salt given by src
- * (of srclen bytes).  buf must be large enough (as indicated by buflen) to
- * hold the setting string.
- *
- * Return the setting string on success; or NULL on error.
- *
- * MT-safe as long as buf is local to the thread.
- */
-extern uint8_t * yescrypt_gensalt_r(
-	uint32_t __N_log2, uint32_t __r, uint32_t __p,
-	yescrypt_flags_t __flags,
-	const uint8_t * __src, size_t __srclen,
-	uint8_t * __buf, size_t __buflen);
-
-/**
- * yescrypt_gensalt(N_log2, r, p, flags, src, srclen):
- * Generate a setting string for use with yescrypt_r() and yescrypt().  This
- * function is the same as yescrypt_gensalt_r() except that it uses a static
- * buffer and thus is not MT-safe.
- *
- * Return the setting string on success; or NULL on error.
- *
- * MT-unsafe.
- */
-extern uint8_t * yescrypt_gensalt(
-	uint32_t __N_log2, uint32_t __r, uint32_t __p,
-	yescrypt_flags_t __flags,
-	const uint8_t * __src, size_t __srclen);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_YESCRYPT_H_ */
diff --git a/algorithm/yescryptcommon.c b/algorithm/yescryptcommon.c
deleted file mode 100644
index cf7067d0..00000000
--- a/algorithm/yescryptcommon.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*-
- * Copyright 2013,2014 Alexander Peslyak
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <string.h>
-#include <stdio.h>
-#include "algorithm/yescrypt_core.h"
-
-#define BYTES2CHARS(bytes) \
-	((((bytes) * 8) + 5) / 6)
-
-#define HASH_SIZE 32 /* bytes */
-#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */
-#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM)
-static const char * const itoa64 =
-	"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
-static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen,
-    uint32_t src, uint32_t srcbits)
-{
-	uint32_t bit;
-
-	for (bit = 0; bit < srcbits; bit += 6) {
-		if (dstlen < 1)
-			return NULL;
-		*dst++ = itoa64[src & 0x3f];
-		dstlen--;
-		src >>= 6;
-	}
-
-	return dst;
-}
-
-static uint8_t * encode64(uint8_t * dst, size_t dstlen,
-    const uint8_t * src, size_t srclen)
-{
-	size_t i;
-
-	for (i = 0; i < srclen; ) {
-		uint8_t * dnext;
-		uint32_t value = 0, bits = 0;
-		do {
-			value |= (uint32_t)src[i++] << bits;
-			bits += 8;
-		} while (bits < 24 && i < srclen);
-		dnext = encode64_uint32(dst, dstlen, value, bits);
-		if (!dnext)
-			return NULL;
-		dstlen -= dnext - dst;
-		dst = dnext;
-	}
-
-	return dst;
-}
-
-static int decode64_one(uint32_t * dst, uint8_t src)
-{
-	const char * ptr = strchr(itoa64, src);
-	if (ptr) {
-		*dst = ptr - itoa64;
-		return 0;
-	}
-	*dst = 0;
-	return -1;
-}
-
-static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits,
-    const uint8_t * src)
-{
-	uint32_t bit;
-	uint32_t value;
-
-	value = 0;
-	for (bit = 0; bit < dstbits; bit += 6) {
-		uint32_t one;
-		if (decode64_one(&one, *src)) {
-			*dst = 0;
-			return NULL;
-		}
-		src++;
-		value |= one << bit;
-	}
-
-	*dst = value;
-	return src;
-}
-
-uint8_t *
-yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local,
-    const uint8_t * passwd, size_t passwdlen,
-    const uint8_t * setting,
-    uint8_t * buf, size_t buflen)
-{
-	uint8_t hash[HASH_SIZE];
-	const uint8_t * src, * salt;
-	uint8_t * dst;
-	size_t prefixlen, saltlen, need;
-	uint8_t version;
-	uint64_t N;
-	uint32_t r, p;
-	yescrypt_flags_t flags = YESCRYPT_WORM;
-          fflush(stdout);
-	if (setting[0] != '$' || setting[1] != '7')
-		{
-            fflush(stdout);
-		return NULL; 
-        }
-          fflush(stdout);  
-	src = setting + 2;
-          fflush(stdout); 
-	switch ((version = *src)) {
-	case '$':
-                fflush(stdout);
-		break;
-	case 'X':
-		src++;
-		flags = YESCRYPT_RW;
-                fflush(stdout);
-		break;
-	default:
-		{
-            fflush(stdout);
-		return NULL; 
-        }
-	}
-
-          fflush(stdout);
-	if (*src != '$') {
-		uint32_t decoded_flags;
-		if (decode64_one(&decoded_flags, *src))
-
-		{
-		fflush(stdout);
-		return NULL; 
-		}
-		flags = decoded_flags;
-		if (*++src != '$')
-	    {	
-        fflush(stdout);
-		return NULL; 
-        }
-	}
-	src++;
-
-	{
-		uint32_t N_log2;
-		if (decode64_one(&N_log2, *src))
-			{
-		         return NULL; 
-            }
-		src++;
-		N = (uint64_t)1 << N_log2;
-	}
-
-	src = decode64_uint32(&r, 30, src);
-	if (!src)
-          {
-		return NULL; 
-          }
-
-	src = decode64_uint32(&p, 30, src);
-	if (!src)
-         {
-		return NULL; 
-         }
-
-	prefixlen = src - setting;
-
-	salt = src;
-	src = (uint8_t *)strrchr((char *)salt, '$');
-	if (src)
-		saltlen = src - salt;
-	else
-		saltlen = strlen((char *)salt);
-
-	need = prefixlen + saltlen + 1 + HASH_LEN + 1;
-	if (need > buflen || need < saltlen)
-                            
-		 {
-        fflush(stdout);
-		return NULL; 
-         }
-
-fflush(stdout);
-	if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
-	    N, r, p, 0, flags, hash, sizeof(hash)))
-	    {
-        fflush(stdout);
-		return NULL; 
-        }
-
-	dst = buf;
-	memcpy(dst, setting, prefixlen + saltlen);
-	dst += prefixlen + saltlen;
-	*dst++ = '$';
-
-	dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash));
-	/* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its
-	 * memory allocations yet anyway. */
-	if (!dst || dst >= buf + buflen) /* Can't happen */
-		{
-		return NULL; 
-        }
-
-	*dst = 0; /* NUL termination */
-           fflush(stdout);
-	return buf;
-}
-
-uint8_t *
-yescrypt(const uint8_t * passwd, const uint8_t * setting)
-{
-	static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1];
-	yescrypt_shared_t shared;
-	yescrypt_local_t local;
-	uint8_t * retval;
-	if (yescrypt_init_shared(&shared, NULL, 0,
-	    0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
-		return NULL;
-	if (yescrypt_init_local(&local)) {
-		yescrypt_free_shared(&shared);
-		return NULL;
-	}
-	retval = yescrypt_r(&shared, &local,
-	    passwd, 80, setting, buf, sizeof(buf));
-        // printf("hashse='%s'\n", (char *)retval);
-	if (yescrypt_free_local(&local)) {
-		yescrypt_free_shared(&shared);
-		return NULL;
-	}
-	if (yescrypt_free_shared(&shared))
-		return NULL;
-	return retval;
-      
-}
-
-uint8_t *
-yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p,
-    yescrypt_flags_t flags,
-    const uint8_t * src, size_t srclen,
-    uint8_t * buf, size_t buflen)
-{
-	uint8_t * dst;
-	size_t prefixlen = 3 + 1 + 5 + 5;
-	size_t saltlen = BYTES2CHARS(srclen);
-	size_t need;
-
-	if (p == 1)
-		flags &= ~YESCRYPT_PARALLEL_SMIX;
-
-	if (flags) {
-		if (flags & ~0x3f)
-			return NULL;
-
-		prefixlen++;
-		if (flags != YESCRYPT_RW)
-			prefixlen++;
-	}
-
-	need = prefixlen + saltlen + 1;
-	if (need > buflen || need < saltlen || saltlen < srclen)
-		return NULL;
-
-	if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30)))
-		return NULL;
-
-	dst = buf;
-	*dst++ = '$';
-	*dst++ = '7';
-	if (flags) {
-		*dst++ = 'X'; /* eXperimental, subject to change */
-		if (flags != YESCRYPT_RW)
-			*dst++ = itoa64[flags];
-	}
-	*dst++ = '$';
-
-	*dst++ = itoa64[N_log2];
-
-	dst = encode64_uint32(dst, buflen - (dst - buf), r, 30);
-	if (!dst) /* Can't happen */
-		return NULL;
-
-	dst = encode64_uint32(dst, buflen - (dst - buf), p, 30);
-	if (!dst) /* Can't happen */
-		return NULL;
-
-	dst = encode64(dst, buflen - (dst - buf), src, srclen);
-	if (!dst || dst >= buf + buflen) /* Can't happen */
-		return NULL;
-
-	*dst = 0; /* NUL termination */
-
-	return buf;
-}
-
-uint8_t *
-yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p,
-    yescrypt_flags_t flags,
-    const uint8_t * src, size_t srclen)
-{
-	static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1];
-	return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen,
-	    buf, sizeof(buf));
-}
-
-static int
-yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
-    const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
-{
-	static __thread int initialized = 0;
-	static __thread yescrypt_shared_t shared;
-	static __thread yescrypt_local_t local;
-
-//		static __declspec(thread) int initialized = 0;
-//		static __declspec(thread) yescrypt_shared_t shared;
-//		static  __declspec(thread) yescrypt_local_t local;
-
-	int retval;
-	if (!initialized) {
-/* "shared" could in fact be shared, but it's simpler to keep it private
- * along with "local".  It's dummy and tiny anyway. */
-		if (yescrypt_init_shared(&shared, NULL, 0,
-		    0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
-			return -1;
-		if (yescrypt_init_local(&local)) {
-			yescrypt_free_shared(&shared);
-			return -1;
-		}
-		initialized = 1;
- 	}
-	retval = yescrypt_kdf(&shared, &local,
-	    passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS,
-	    buf, buflen);		
-
-	return retval;
-}
-
-void yescrypt_hash(const unsigned char *input, unsigned char *output)
-{
-
-   yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32);
-}
diff --git a/kernel/lbry.cl b/kernel/lbry.cl
deleted file mode 100644
index fef4f90e..00000000
--- a/kernel/lbry.cl
+++ /dev/null
@@ -1,179 +0,0 @@
-#include "sha256.cl"
-#include "wolf-sha512.cl"
-#include "ripemd160.cl"
-
-#define SWAP32(x)    as_uint(as_uchar4(x).s3210)
-#define SWAP64(x)    as_ulong(as_uchar8(x).s76543210)
-
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint *input, __global uint8 *ctx)
-{
-  // SHA256 takes 16 uints of input per block - we have 112 bytes to process
-  // 8 * 16 == 64, meaning two block transforms.
-
-  uint SHA256Buf[16];
-  uint gid = get_global_id(0);
-
-  // Remember the last four is the nonce - so 108 bytes / 4 bytes per dword
-  #pragma unroll
-  for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]);
-
-
-
-  // SHA256 initialization constants
-  uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-
-  #pragma unroll
-  for(int i = 0; i < 3; ++i)
-  {
-    if(i == 1)
-    {
-      #pragma unroll
-      for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
-      SHA256Buf[11] = SWAP32(gid);
-      SHA256Buf[12] = 0x80000000;
-      SHA256Buf[13] = 0x00000000;
-      SHA256Buf[14] = 0x00000000;
-      SHA256Buf[15] = 0x00000380;
-    }
-    if(i == 2)
-    {
-      ((uint8 *)SHA256Buf)[0] = outbuf;
-      SHA256Buf[8] = 0x80000000;
-      #pragma unroll
-      for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
-      SHA256Buf[15] = 0x00000100;
-      outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-    }
-    outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  }
-
-  /*
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  #pragma unroll
-  for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
-  SHA256Buf[11] = SWAP32(gid);
-  SHA256Buf[12] = 0x80000000;
-  SHA256Buf[13] = 0x00000000;
-  SHA256Buf[14] = 0x00000000;
-  SHA256Buf[15] = 0x00000380;
-
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  ((uint8 *)SHA256Buf)[0] = outbuf;
-  SHA256Buf[8] = 0x80000000;
-  for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
-  SHA256Buf[15] = 0x00000100;
-  outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  */
-
-
-  /*
-
-  //outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  //outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf);
-
-  // outbuf would normall be SWAP32'd here, but it'll need it again
-  // once we use it as input to the next SHA256, so it negates.
-
-  ((uint8 *)SHA256Buf)[0] = outbuf;
-  SHA256Buf[8] = 0x80000000;
-  for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
-  SHA256Buf[15] = 0x00000100;
-
-  outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-  */
-
-
-
-  outbuf.s0 = SWAP32(outbuf.s0);
-  outbuf.s1 = SWAP32(outbuf.s1);
-  outbuf.s2 = SWAP32(outbuf.s2);
-  outbuf.s3 = SWAP32(outbuf.s3);
-  outbuf.s4 = SWAP32(outbuf.s4);
-  outbuf.s5 = SWAP32(outbuf.s5);
-  outbuf.s6 = SWAP32(outbuf.s6);
-  outbuf.s7 = SWAP32(outbuf.s7);
-
-  ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
-}
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search1(__global uint8 *ctx)
-{
-  ulong W[16] = { 0UL }, SHA512Out[8];
-  uint SHA256Buf[16];
-  uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
-
-  ((uint8 *)W)[0] = outbuf;
-
-  for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]);
-
-  W[4] = 0x8000000000000000UL;
-  W[15] = 0x0000000000000100UL;
-
-  for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i];
-
-  SHA512Block(W, SHA512Out);
-
-  for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]);
-
-  uint RMD160_0[16] = { 0U };
-  uint RMD160_1[16] = { 0U };
-  uint RMD160_0_Out[5], RMD160_1_Out[5];
-
-  for(int i = 0; i < 4; ++i)
-  {
-    ((ulong *)RMD160_0)[i] = SHA512Out[i];
-    ((ulong *)RMD160_1)[i] = SHA512Out[i + 4];
-  }
-
-  RMD160_0[8] = RMD160_1[8] = 0x00000080;
-  RMD160_0[14] = RMD160_1[14] = 0x00000100;
-
-  for(int i = 0; i < 5; ++i)
-  {
-    RMD160_0_Out[i] = RMD160_IV[i];
-    RMD160_1_Out[i] = RMD160_IV[i];
-  }
-
-  RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out);
-  RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out);
-
-  for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]);
-  for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]);
-  SHA256Buf[10] = 0x80000000;
-
-  for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U;
-
-  SHA256Buf[15] = 0x00000140;
-
-  outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-
-  ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
-}
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target)
-{
-  uint SHA256Buf[16] = { 0U };
-  uint gid = get_global_id(0);
-  uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
-
-  ((uint8 *)SHA256Buf)[0] = outbuf;
-  SHA256Buf[8] = 0x80000000;
-  for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
-  SHA256Buf[15] = 0x00000100;
-
-  outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
-  outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
-
-  outbuf.s6 = SWAP32(outbuf.s6);
-  outbuf.s7 = SWAP32(outbuf.s7);
-
-  if(as_ulong(outbuf.s67) <= target)
-    output[atomic_inc(output+0xFF)] = SWAP32(gid);
-}
diff --git a/kernel/yescrypt-multi.cl b/kernel/yescrypt-multi.cl
deleted file mode 100644
index 3af7b28a..00000000
--- a/kernel/yescrypt-multi.cl
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
-* "yescrypt" kernel implementation.
-*
-* ==========================(LICENSE BEGIN)============================
-*
-* Copyright (c) 2015  djm34
-*
-* Permission is hereby granted, free of charge, to any person obtaining
-* a copy of this software and associated documentation files (the
-* "Software"), to deal in the Software without restriction, including
-* without limitation the rights to use, copy, modify, merge, publish,
-* distribute, sublicense, and/or sell copies of the Software, and to
-* permit persons to whom the Software is furnished to do so, subject to
-* the following conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*
-* ===========================(LICENSE END)=============================
-*
-* @author   djm34
-*/
-#if !defined(cl_khr_byte_addressable_store)
-#error "Device does not support unaligned stores"
-#endif
-
-#include "yescrypt_essential.cl"
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target)
-{
-
-	__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-
-
-
-	uint nonce = (get_global_id(0));
-	uint  data[20];
-	uint16 in;
-	uint8 state1, state2;
-//	uint8 sha256tokeep;
-
-//	ulong16 Bdev[8]; // will require an additional buffer
-	((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
-	((uint4  *)data)[4] = ((__global const uint4  *)input)[4];
-//	for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
-	//	if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); }
-	uint8 passwd = sha256_80(data, nonce);
-	//pbkdf		
-	in.lo = pad1.lo ^ passwd;
-	in.hi = pad1.hi;
-	state1 = sha256_Transform(in, H256);
-
-	in.lo = pad2.lo ^ passwd;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-
-	in = ((uint16*)data)[0];
-	state1 = sha256_Transform(in, state1);
-#pragma unroll 1
-	for (int i = 0; i<8; i++)
-	{
-		uint16 result;
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 1;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.lo = swapvec(sha256_Transform(in, state2));
-		if (i == 0) sha256tokeep[0] = result.lo;
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 2;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.hi = swapvec(sha256_Transform(in, state2));
-		Bdev[i].lo = as_ulong8(shuffle(result));
-//		Bdev[i].lo = as_ulong8(result);
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 3;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.lo = swapvec(sha256_Transform(in, state2));
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 4;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.hi = swapvec(sha256_Transform(in, state2));
-
-
-		Bdev[i].hi = as_ulong8(shuffle(result));
-//		Bdev[i].hi = as_ulong8(result);
-	}
-
-	//mixing1
-
-	prevstate[0] = Bdev[0];
-	Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-	prevstate[1] = Bdev[0];
-	Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-
-	uint n = 1;
-#pragma unroll 1
-	for (uint i = 2; i < 64; i++)
-	{
-
-		prevstate[i] = Bdev[0];
-
-		if ((i&(i - 1)) == 0) n = n << 1;
-
-		uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1);
-
-		j += i - n;
-		Bdev[0] ^= prevstate[j];
-
-		Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-	}
-
-
-}
-
-
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search1(__global uchar *buffer1, __global uchar *buffer2)
-{
-}
-
-
-
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2)
-{
-
-			__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-			__global ulong16* prevstate =  (__global ulong16 *)(buff1 +      (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-			__global ulong16 *Bdev =       (__global ulong16 *)(buff2 +       (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-
-
-			for (int i = 0; i<8; i++)
-				hashbuffer[i] = Bdev[i];
-
-			blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-
-
-			for (int i = 0; i<8; i++)
-				hashbuffer[i + 8] = Bdev[i];
-
-			blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-			int n = 1;
-#pragma unroll 1
-			for (int i = 2; i < 2048; i ++)
-			{
-
-				for (int k = 0; k<8; k++)
-					(hashbuffer + 8 * i)[k] = Bdev[k];
-
-
-				if ((i&(i - 1)) == 0) n = n << 1;
-
-				uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1);
-				j += i - n;
-
-				for (int k = 0; k < 8; k++)
-					Bdev[k] ^= (hashbuffer + 8 * j)[k];
-
-
-				blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-            }
-}
-
-/*
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search3(__global uchar *buffer1, __global uchar *buffer2)
-{
-}
-*/
-
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2)
-{
-
-	__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-
-
-#pragma unroll 1
-	for (int z = 0; z < 684; z++)
-	{
-
-		uint j = as_uint2(Bdev[7].hi.s0).x & 2047;
-
-
-		for (int k = 0; k < 8; k++)
-			Bdev[k] ^= (hashbuffer + 8 * j)[k];
-
-		if (z<682)
-			for (int k = 0; k<8; k++)
-				(hashbuffer + 8 * j)[k] = Bdev[k];
-
-		blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-////
-	}
-
-}
-
-/*
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search5(__global uchar *buffer1, __global uchar *buffer2)
-{
-}
-*/
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target)
-{
-
-	__global ulong16 *Bdev =     (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
-
-	uint nonce = (get_global_id(0));
-
-
-	uint  data[20];
-	((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
-	((uint4  *)data)[4] = ((__global const uint4  *)input)[4];
-//	for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
-	uint8 swpass = swapvec(sha256tokeep[0]);
-    uint16 in;
-    uint8 state1,state2;
-	in.lo = pad1.lo ^ swpass;
-	in.hi = pad1.hi;
-
-
-	state1 = sha256_Transform(in, H256);
-
-	in.lo = pad2.lo ^ swpass;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-
-#pragma unroll 1
-	for (int i = 0; i<8; i++) {
-		in = unshuffle(Bdev[i].lo);
-		in = swapvec16(in);
-		state1 = sha256_Transform(in, state1);
-		in = unshuffle(Bdev[i].hi);
-		in = swapvec16(in);
-		state1 = sha256_Transform(in, state1);
-	}
-	in = pad5;
-	state1 = sha256_Transform(in, state1);
-	in.lo = state1;
-	in.hi = pad4;
-	uint8 res = sha256_Transform(in, state2);
-
-	//hmac and final sha
-
-	in.lo = pad1.lo ^ res;
-	in.hi = pad1.hi;
-	state1 = sha256_Transform(in, H256);
-	in.lo = pad2.lo ^ res;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-	in = ((uint16*)data)[0];
-	state1 = sha256_Transform(in, state1);
-	in = padsha80;
-	in.s0 = data[16];
-	in.s1 = data[17];
-	in.s2 = data[18];
-	in.s3 = get_global_id(0);
-	in.sf = 0x480;
-	state1 = sha256_Transform(in, state1);
-	in.lo = state1;
-	in.hi = pad4;
-	state1 = sha256_Transform(in, state2);
-	//		state2 = H256;
-	in.lo = state1;
-	in.hi = pad4;
-	in.sf = 0x100;
-	res = sha256_Transform(in, H256);
-
-
-	if (SWAP32(res.s7) <= (target)) 
-		output[atomic_inc(output + 0xFF)] = (nonce);
-	
-}
diff --git a/kernel/yescrypt.cl b/kernel/yescrypt.cl
deleted file mode 100644
index 0a94ebca..00000000
--- a/kernel/yescrypt.cl
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
-* "yescrypt" kernel implementation.
-*
-* ==========================(LICENSE BEGIN)============================
-*
-* Copyright (c) 2015  djm34
-*
-* Permission is hereby granted, free of charge, to any person obtaining
-* a copy of this software and associated documentation files (the
-* "Software"), to deal in the Software without restriction, including
-* without limitation the rights to use, copy, modify, merge, publish,
-* distribute, sublicense, and/or sell copies of the Software, and to
-* permit persons to whom the Software is furnished to do so, subject to
-* the following conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*
-* ===========================(LICENSE END)=============================
-*
-* @author   djm34
-*/
-#if !defined(cl_khr_byte_addressable_store)
-#error "Device does not support unaligned stores"
-#endif
-
-#include "yescrypt_essential.cl"
-
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, const uint target)
-{
-
-	__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
-	__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
-
-
-
-	uint nonce = (get_global_id(0));
-	uint  data[20];
-	uint16 in;
-	uint8 state1, state2;
-    uint8 sha256tokeep;
-
-	((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
-	((uint4  *)data)[4] = ((__global const uint4  *)input)[4];
-	for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
-	//	if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); }
-	uint8 passwd = sha256_80(data, nonce);
-	//pbkdf		
-	in.lo = pad1.lo ^ passwd;
-	in.hi = pad1.hi;
-	state1 = sha256_Transform(in, H256);
-
-	in.lo = pad2.lo ^ passwd;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-
-	in = ((uint16*)data)[0];
-	state1 = sha256_Transform(in, state1);
-#pragma unroll 1
-	for (int i = 0; i<8; i++)
-	{
-		uint16 result;
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 1;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.lo = swapvec(sha256_Transform(in, state2));
-		if (i == 0) sha256tokeep = result.lo;
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 2;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.hi = swapvec(sha256_Transform(in, state2));
-		Bdev[i].lo = as_ulong8(shuffle(result));
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 3;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.lo = swapvec(sha256_Transform(in, state2));
-		in = pad3;
-		in.s0 = data[16];
-		in.s1 = data[17];
-		in.s2 = data[18];
-		in.s3 = nonce;
-		in.s4 = 4 * i + 4;
-		in.lo = sha256_Transform(in, state1);
-		in.hi = pad4;
-		result.hi = swapvec(sha256_Transform(in, state2));
-
-
-		Bdev[i].hi = as_ulong8(shuffle(result));
-	}
-
-	//mixing1
-
-	prevstate[0] = Bdev[0];
-	Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-	prevstate[1] = Bdev[0];
-	Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-
-	uint n = 1;
-#pragma unroll 1
-	for (uint i = 2; i < 64; i++)
-	{
-
-		prevstate[i] = Bdev[0];
-
-		if ((i&(i - 1)) == 0) n = n << 1;
-
-		uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1);
-
-		j += i - n;
-		Bdev[0] ^= prevstate[j];
-
-		Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
-	}
-
-
-	for (int i = 0; i<8; i++)
-		hashbuffer[i] = Bdev[i];
-
-	blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-
-
-	for (int i = 0; i<8; i++)
-		hashbuffer[i + 8] = Bdev[i];
-
-	blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-    n = 1;
-#pragma unroll 1
-	for (int i = 2; i < 2048; i++)
-	{
-
-		for (int k = 0; k<8; k++)
-			(hashbuffer + 8 * i)[k] = Bdev[k];
-
-
-		if ((i&(i - 1)) == 0) n = n << 1;
-
-		uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1);
-		j += i - n;
-
-		for (int k = 0; k < 8; k++)
-			Bdev[k] ^= (hashbuffer + 8 * j)[k];
-
-
-		blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-	}
-
-
-#pragma unroll 1
-	for (int z = 0; z < 684; z++)
-	{
-
-		uint j = as_uint2(Bdev[7].hi.s0).x & 2047;
-
-
-		for (int k = 0; k < 8; k++)
-			Bdev[k] ^= (hashbuffer + 8 * j)[k];
-
-		if (z<682)
-			for (int k = 0; k<8; k++)
-				(hashbuffer + 8 * j)[k] = Bdev[k];
-
-		blockmix_pwxform((__global ulong8*)Bdev, prevstate);
-		////
-	}
-
-
-
-	uint8 swpass = swapvec(sha256tokeep);
-//	uint16 in;
-//	uint8 state1, state2;
-	in.lo = pad1.lo ^ swpass;
-	in.hi = pad1.hi;
-
-
-	state1 = sha256_Transform(in, H256);
-
-	in.lo = pad2.lo ^ swpass;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-
-#pragma unroll 1
-	for (int i = 0; i<8; i++) {
-		in = unshuffle(Bdev[i].lo);
-		in = swapvec16(in);
-		state1 = sha256_Transform(in, state1);
-		in = unshuffle(Bdev[i].hi);
-		in = swapvec16(in);
-		state1 = sha256_Transform(in, state1);
-	}
-	in = pad5;
-	state1 = sha256_Transform(in, state1);
-	in.lo = state1;
-	in.hi = pad4;
-	uint8 res = sha256_Transform(in, state2);
-
-	//hmac and final sha
-
-	in.lo = pad1.lo ^ res;
-	in.hi = pad1.hi;
-	state1 = sha256_Transform(in, H256);
-	in.lo = pad2.lo ^ res;
-	in.hi = pad2.hi;
-	state2 = sha256_Transform(in, H256);
-	in = ((uint16*)data)[0];
-	state1 = sha256_Transform(in, state1);
-	in = padsha80;
-	in.s0 = data[16];
-	in.s1 = data[17];
-	in.s2 = data[18];
-	in.s3 = get_global_id(0);
-	in.sf = 0x480;
-	state1 = sha256_Transform(in, state1);
-	in.lo = state1;
-	in.hi = pad4;
-	state1 = sha256_Transform(in, state2);
-	//		state2 = H256;
-	in.lo = state1;
-	in.hi = pad4;
-	in.sf = 0x100;
-	res = sha256_Transform(in, H256);
-
-
-	if (SWAP32(res.s7) <= (target))
-		output[atomic_inc(output + 0xFF)] = (nonce);
-
-}
-
diff --git a/kernel/yescrypt_essential.cl b/kernel/yescrypt_essential.cl
deleted file mode 100644
index ba1816a8..00000000
--- a/kernel/yescrypt_essential.cl
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
-* "yescrypt" kernel implementation.
-*
-* ==========================(LICENSE BEGIN)============================
-*
-* Copyright (c) 2015  djm34
-*
-* Permission is hereby granted, free of charge, to any person obtaining
-* a copy of this software and associated documentation files (the
-* "Software"), to deal in the Software without restriction, including
-* without limitation the rights to use, copy, modify, merge, publish,
-* distribute, sublicense, and/or sell copies of the Software, and to
-* permit persons to whom the Software is furnished to do so, subject to
-* the following conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*
-* ===========================(LICENSE END)=============================
-*
-* @author   djm34
-*/
-
-#define ROL32(x, n)  rotate(x, (uint) n)
-#define SWAP32(a)    (as_uint(as_uchar4(a).wzyx))
-//#define ROL32(x, n)   (((x) << (n)) | ((x) >> (32 - (n))))
-#define HASH_MEMORY 4096
-
-
-#define SALSA(a,b,c,d) do { \
-    t =a+d; b^=ROL32(t,  7U);    \
-    t =b+a; c^=ROL32(t,  9U);    \
-    t =c+b; d^=ROL32(t, 13U);    \
-    t =d+c; a^=ROL32(t, 18U);     \
-} while(0)
-
-
-#define SALSA_CORE(state) do { \
-\
-SALSA(state.s0,state.s4,state.s8,state.sc); \
-SALSA(state.s5,state.s9,state.sd,state.s1); \
-SALSA(state.sa,state.se,state.s2,state.s6); \
-SALSA(state.sf,state.s3,state.s7,state.sb); \
-SALSA(state.s0,state.s1,state.s2,state.s3); \
-SALSA(state.s5,state.s6,state.s7,state.s4); \
-SALSA(state.sa,state.sb,state.s8,state.s9); \
-SALSA(state.sf,state.sc,state.sd,state.se); \
-	} while(0)
-
-#define uSALSA_CORE(state) do { \
-\
-SALSA(state.s0,state.s4,state.s8,state.sc); \
-SALSA(state.s1,state.s5,state.s9,state.sd); \
-SALSA(state.s2,state.s6,state.sa,state.se); \
-SALSA(state.s3,state.s7,state.sb,state.sf); \
-SALSA(state.s0,state.sd,state.sa,state.s7); \
-SALSA(state.s1,state.se,state.sb,state.s4); \
-SALSA(state.s2,state.sf,state.s8,state.s5); \
-SALSA(state.s3,state.sc,state.s9,state.s6); \
-} while(0)
-
-
-#define unshuffle(state) (as_uint16(state).s0da741eb852fc963)
-
-#define   shuffle(state) (as_uint16(state).s05af49e38d27c16b)
-
-static __constant uint16 pad1 = 
-{
-	0x36363636, 0x36363636, 0x36363636, 0x36363636,
-	0x36363636, 0x36363636, 0x36363636, 0x36363636,
-	0x36363636, 0x36363636, 0x36363636, 0x36363636,
-	0x36363636, 0x36363636, 0x36363636, 0x36363636
-};
-
-static __constant uint16 pad2 = 
-{
-	0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
-	0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
-	0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
-	0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c
-};
-
-static __constant uint16 pad5 =
-{
-	0x00000001, 0x80000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00002220
-};
-
-static __constant uint16 pad3 =
-{
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x80000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x000004a0
-};
-
-static __constant uint16 padsha80 =
-{
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x80000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000280
-};
-
-static __constant uint8 pad4 =
-{
-	0x80000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000300
-};
-
-
-
-static __constant  uint8 H256 = {
-	0x6A09E667, 0xBB67AE85, 0x3C6EF372,
-	0xA54FF53A, 0x510E527F, 0x9B05688C,
-	0x1F83D9AB, 0x5BE0CD19
-};
-
-inline uint8 swapvec(uint8 buf)
-{
-	uint8 vec;
-	vec.s0 = SWAP32(buf.s0);
-	vec.s1 = SWAP32(buf.s1);
-	vec.s2 = SWAP32(buf.s2);
-	vec.s3 = SWAP32(buf.s3);
-	vec.s4 = SWAP32(buf.s4);
-	vec.s5 = SWAP32(buf.s5);
-	vec.s6 = SWAP32(buf.s6);
-	vec.s7 = SWAP32(buf.s7);
-	return vec;
-}
-
-
-
-inline uint16 swapvec16(uint16 buf)
-{
-	uint16 vec;
-	vec.s0 = SWAP32(buf.s0);
-	vec.s1 = SWAP32(buf.s1);
-	vec.s2 = SWAP32(buf.s2);
-	vec.s3 = SWAP32(buf.s3);
-	vec.s4 = SWAP32(buf.s4);
-	vec.s5 = SWAP32(buf.s5);
-	vec.s6 = SWAP32(buf.s6);
-	vec.s7 = SWAP32(buf.s7);
-	vec.s8 = SWAP32(buf.s8);
-	vec.s9 = SWAP32(buf.s9);
-	vec.sa = SWAP32(buf.sa);
-	vec.sb = SWAP32(buf.sb);
-	vec.sc = SWAP32(buf.sc);
-	vec.sd = SWAP32(buf.sd);
-	vec.se = SWAP32(buf.se);
-	vec.sf = SWAP32(buf.sf);
-	return vec;
-}
-
- ulong8 salsa20_8(uint16 Bx)
-{
-uint t;
-	uint16 st = Bx;
-	uSALSA_CORE(st);
-	uSALSA_CORE(st);
-	uSALSA_CORE(st);
-	uSALSA_CORE(st);
-	return(as_ulong8(st + Bx));
-}
-
- ulong8 salsa20_8n(uint16 Bx)
- {
-	 uint t;
-	 uint16 st = Bx;
-	 SALSA_CORE(st);
-	 SALSA_CORE(st);
-	 SALSA_CORE(st);
-	 SALSA_CORE(st);
-	 return(as_ulong8(st + Bx));
- }
-
-
- ulong16 blockmix_salsa8_small2(ulong16 Bin)
-{
-	ulong8 X = Bin.hi;
-	X ^= Bin.lo;
-	X = salsa20_8(as_uint16(X));
-	Bin.lo = X;
-	X ^= Bin.hi;
-	X = salsa20_8(as_uint16(X));
-	Bin.hi = X;
-	return(Bin);
-}
-/*
- uint16 salsa20_8_2(uint16 Bx)
- {
-	 uint t;
-	 uint16 st = Bx;
-	 uSALSA_CORE(st);
-	 uSALSA_CORE(st);
-	 uSALSA_CORE(st);
-	 uSALSA_CORE(st);
-	 return(st + Bx);
- }
-
- ulong16 blockmix_salsa8_small2(ulong16 Bin)
- {
-	 uint16 X = as_uint16(Bin.hi);
-	 X ^= as_uint16(Bin.lo);
-	 X = salsa20_8_2(as_uint16(X));
-	 Bin.lo = as_ulong8(X);
-	 X ^= as_uint16(Bin.hi);
-	 X = salsa20_8_2(as_uint16(X));
-	 Bin.hi = as_ulong8(X);
-	 return(Bin);
- }
-*/
-
-
-inline ulong2 madd4long2(uint4 a, uint4 b)
-{
-	uint4 result;
-	result.x = a.x*a.y + b.x;
-	result.y = b.y + mad_hi(a.x, a.y, b.x);
-	result.z = a.z*a.w + b.z;
-	result.w = b.w + mad_hi(a.z, a.w, b.z);
-	return as_ulong2(result);
-}
-
-inline ulong2 madd4long3(uint4 a, ulong2 b)
-{
-	ulong2 result;
-	result.x = (ulong)a.x*(ulong)a.y + b.x;
-	result.y = (ulong)a.z*(ulong)a.w + b.y;
-	return result;
-}
-
-
-inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate)
-{
-
-		ulong2 vec = Bout.lo.lo;
-
-		for (int i = 0; i < 6; i++)
-		{
-			ulong2 p0, p1;
-			uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-			p0 = ((__global ulong2*)(prevstate ))[x.x];
-			vec = madd4long3(as_uint4(vec), p0);
-			p1 = ((__global  ulong2*)(prevstate + 32))[x.y];
-
-			vec ^= p1;
-		}
-		Bout.lo.lo = vec;
-        vec = Bout.lo.hi;
-		for (int i = 0; i < 6; i++)
-		{
-
-			ulong2 p0, p1;
-			uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-			p0 = ((__global  ulong2*)(prevstate))[x.x];
-			vec = madd4long3(as_uint4(vec), p0);
-			p1 = ((__global ulong2*)(prevstate + 32))[x.y];
-
-			vec ^= p1;
-		}
-		Bout.lo.hi = vec;
-
-		vec = Bout.hi.lo;
-		for (int i = 0; i < 6; i++)
-		{
-			ulong2 p0, p1;
-			uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-			p0 = ((__global  ulong2*)(prevstate))[x.x];
-			vec = madd4long3(as_uint4(vec), p0);
-			p1 = ((__global  ulong2*)(prevstate + 32))[x.y];
-			vec ^= p1;
-		}
-		Bout.hi.lo = vec;
-		vec = Bout.hi.hi;
-		for (int i = 0; i < 6; i++)
-		{
-			ulong2 p0, p1;
-			uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-			p0 = ((__global  ulong2*)(prevstate))[x.x];
-			vec = madd4long3(as_uint4(vec), p0);
-			p1 = ((__global  ulong2*)(prevstate + 32))[x.y];
-
-			vec ^= p1;
-		}
-		Bout.hi.hi = vec;
-
-		return(Bout);
-}
-
-inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate)
-{
-
-	ulong2 vec = Bout.lo.lo;
-
-	for (int i = 0; i < 6; i++)
-	{
-		ulong2 p0, p1;
-		uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-		p0 = prevstate[x.x];
-		vec = madd4long3(as_uint4(vec), p0);
-		p1 = (prevstate + 32*8)[x.y];
-
-		vec ^= p1;
-	}
-	Bout.lo.lo = vec;
-	vec = Bout.lo.hi;
-	for (int i = 0; i < 6; i++)
-	{
-
-		ulong2 p0, p1;
-		uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-		p0 = prevstate[x.x];
-		vec = madd4long3(as_uint4(vec), p0);
-		p1 = (prevstate + 32 * 8)[x.y];
-
-		vec ^= p1;
-	}
-	Bout.lo.hi = vec;
-
-	vec = Bout.hi.lo;
-	for (int i = 0; i < 6; i++)
-	{
-		ulong2 p0, p1;
-		uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-		p0 = prevstate[x.x];
-		vec = madd4long3(as_uint4(vec), p0);
-		p1 = (prevstate + 32 * 8)[x.y];
-		vec ^= p1;
-	}
-	Bout.hi.lo = vec;
-	vec = Bout.hi.hi;
-	for (int i = 0; i < 6; i++)
-	{
-		ulong2 p0, p1;
-		uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
-		p0 = prevstate[x.x];
-		vec = madd4long3(as_uint4(vec), p0);
-		p1 = (prevstate + 32 * 8)[x.y];
-
-		vec ^= p1;
-	}
-	Bout.hi.hi = vec;
-
-	return(Bout);
-}
-
-
-
-
-inline void blockmix_pwxform(__global ulong8 *Bin, __global  ulong16 *prevstate)
-{
-	Bin[0] ^= Bin[15];
-	Bin[0] = block_pwxform_long_old(Bin[0], prevstate);
-#pragma unroll 1
-	for (int i = 1; i < 16; i++)
-	{
-		Bin[i] ^= Bin[i - 1];
-		Bin[i] = block_pwxform_long_old(Bin[i], prevstate);
-	}
-	Bin[15] = salsa20_8(as_uint16(Bin[15]));
-}
-
-#define SHR(x, n)    ((x) >> n)
-
-
-#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^  SHR(x, 3))
-#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^  SHR(x, 10))
-
-#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
-#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
-
-#define P(a,b,c,d,e,f,g,h,x,K)                  \
-{                                               \
-    temp1 = h + S3(e) + F1(e,f,g) + (K + x);      \
-    d += temp1; h = temp1 + S2(a) + F0(a,b,c);  \
-}
-
-#define PLAST(a,b,c,d,e,f,g,h,x,K)                  \
-{                                               \
-    d += h + S3(e) + F1(e,f,g) + (x + K);              \
-}
-
-#define F0(y, x, z) bitselect(z, y, z ^ x)
-#define F1(x, y, z) bitselect(z, y, x)
-
-#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
-#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
-#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
-#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
-#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
-#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
-#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
-#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
-#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
-#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
-#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
-#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
-#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
-#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
-#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
-#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
-
-#define RD14 (S1(W12) + W7 + S0(W15) + W14)
-#define RD15 (S1(W13) + W8 + S0(W0) + W15)
-
-/// generic sha transform
-inline uint8 sha256_Transform(uint16 data, uint8 state)
-{
-uint temp1;
-    uint8 res = state;
-	uint W0 = data.s0;
-	uint W1 = data.s1;
-	uint W2 = data.s2;
-	uint W3 = data.s3;
-	uint W4 = data.s4;
-	uint W5 = data.s5;
-	uint W6 = data.s6;
-	uint W7 = data.s7;
-	uint W8 = data.s8;
-	uint W9 = data.s9;
-	uint W10 = data.sA;
-	uint W11 = data.sB;
-	uint W12 = data.sC;
-	uint W13 = data.sD;
-	uint W14 = data.sE;
-	uint W15 = data.sF;
-
-#define v0  res.s0
-#define v1  res.s1
-#define v2  res.s2
-#define v3  res.s3
-#define v4  res.s4
-#define v5  res.s5
-#define v6  res.s6
-#define v7  res.s7
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef v4
-#undef v5
-#undef v6
-#undef v7
-	return (res+state);
-}
-
-
-static inline uint8 sha256_round1(uint16 data)
-{
-	uint temp1;
-    uint8 res;
-	uint W0 = data.s0;
-	uint W1 = data.s1;
-	uint W2 = data.s2;
-	uint W3 = data.s3;
-	uint W4 = data.s4;
-	uint W5 = data.s5;
-	uint W6 = data.s6;
-	uint W7 = data.s7;
-	uint W8 = data.s8;
-	uint W9 = data.s9;
-	uint W10 = data.sA;
-	uint W11 = data.sB;
-	uint W12 = data.sC;
-	uint W13 = data.sD;
-	uint W14 = data.sE;
-	uint W15 = data.sF;
-
-	uint v0 = 0x6A09E667;
-	uint v1 = 0xBB67AE85;
-	uint v2 = 0x3C6EF372;
-	uint v3 = 0xA54FF53A;
-	uint v4 = 0x510E527F;
-	uint v5 = 0x9B05688C;
-	uint v6 = 0x1F83D9AB;
-	uint v7 = 0x5BE0CD19;
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
-
-	res.s0 = v0 + 0x6A09E667;
-	res.s1 = v1 + 0xBB67AE85;
-	res.s2 = v2 + 0x3C6EF372;
-	res.s3 = v3 + 0xA54FF53A;
-	res.s4 = v4 + 0x510E527F;
-	res.s5 = v5 + 0x9B05688C;
-	res.s6 = v6 + 0x1F83D9AB;
-	res.s7 = v7 + 0x5BE0CD19;
-	return (res);
-}
-
-
-static inline uint8 sha256_round2(uint16 data,uint8 buf)
-{
-	uint temp1;
-	uint8 res;
-	uint W0 = data.s0;
-	uint W1 = data.s1;
-	uint W2 = data.s2;
-	uint W3 = data.s3;
-	uint W4 = data.s4;
-	uint W5 = data.s5;
-	uint W6 = data.s6;
-	uint W7 = data.s7;
-	uint W8 = data.s8;
-	uint W9 = data.s9;
-	uint W10 = data.sA;
-	uint W11 = data.sB;
-	uint W12 = data.sC;
-	uint W13 = data.sD;
-	uint W14 = data.sE;
-	uint W15 = data.sF;
-
-	uint v0 = buf.s0;
-	uint v1 = buf.s1;
-	uint v2 = buf.s2;
-	uint v3 = buf.s3;
-	uint v4 = buf.s4;
-	uint v5 = buf.s5;
-	uint v6 = buf.s6;
-	uint v7 = buf.s7;
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
-
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
-	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
-	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
-	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
-	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
-	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
-	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
-	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
-	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
-
-	res.s0 = (v0 + buf.s0);
-	res.s1 = (v1 + buf.s1);
-	res.s2 = (v2 + buf.s2);
-	res.s3 = (v3 + buf.s3);
-	res.s4 = (v4 + buf.s4);
-	res.s5 = (v5 + buf.s5);
-	res.s6 = (v6 + buf.s6);
-	res.s7 = (v7 + buf.s7);
-	return (res);
-}
-
-static inline uint8 sha256_80(uint* data,uint nonce)
-{
-
-uint8 buf = sha256_round1( ((uint16*)data)[0]);
-uint16 in = padsha80;
-in.s0 = data[16];
-in.s1 = data[17];
-in.s2 = data[18];
-in.s3 = nonce;
-
-return(sha256_round2(in,buf));
-}
-
diff --git a/ocl.c b/ocl.c
index 44920d2e..b0a644a2 100644
--- a/ocl.c
+++ b/ocl.c
@@ -36,7 +36,6 @@
 #include "ocl/binary_kernel.h"
 #include "algorithm/neoscrypt.h"
 #include "algorithm/pluck.h"
-#include "algorithm/yescrypt.h"
 #include "algorithm/lyra2rev2.h"
 
 /* FIXME: only here for global config vars, replace with configuration.h
@@ -500,91 +499,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
     applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
   }
 
-  // Yescrypt TC
-  else if ((cgpu->algorithm.type == ALGO_YESCRYPT ||
-            algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) {
-    size_t glob_thread_count;
-    long max_int;
-    unsigned char type = 0;
-
-      // determine which intensity type to use
-    if (cgpu->rawintensity > 0) {
-      glob_thread_count = cgpu->rawintensity;
-      max_int = glob_thread_count;
-      type = 2;
-    }
-    else if (cgpu->xintensity > 0) {
-      glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
-      max_int = cgpu->xintensity;
-      type = 1;
-    }
-    else {
-      glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
-      max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
-    }
-
-    glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
-
-    // if TC * scratchbuf size is too big for memory... reduce to max
-    if ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
-
-      /* Selected intensity will not run on this GPU. Not enough memory.
-      * Adapt the memory setting. */
-      // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
-      switch (type) {
-        //raw intensity
-      case 2:
-        while ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
-          --glob_thread_count;
-        }
-
-        max_int = glob_thread_count;
-        cgpu->rawintensity = glob_thread_count;
-        break;
-
-        //x intensity
-      case 1:
-        glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
-        max_int = glob_thread_count / clState->compute_shaders;
-
-        while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
-          --max_int;
-        }
-
-        /* Check if max_intensity is >0. */
-        if (max_int < MIN_XINTENSITY) {
-          applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
-          max_int = MIN_XINTENSITY;
-        }
-
-        cgpu->xintensity = max_int;
-        glob_thread_count = clState->compute_shaders * (1UL << max_int);
-        break;
-
-      default:
-        glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
-        while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
-          --max_int;
-        }
-
-        /* Check if max_intensity is >0. */
-        if (max_int < MIN_INTENSITY) {
-          applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
-          max_int = MIN_INTENSITY;
-        }
-
-        cgpu->intensity = max_int;
-        glob_thread_count = 1UL << max_int;
-        break;
-      }
-    }
-
-    // TC is glob thread count
-    cgpu->thread_concurrency = glob_thread_count;
-
-    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
-  }
-
   // Lyra2re v2 TC
   else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) {
     size_t glob_thread_count;
@@ -788,26 +702,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
       applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
       // scrypt/n-scrypt
     }
-    else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
-      /* The scratch/pad-buffer needs 32kBytes memory per thread. */
-      bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
-      buf1size = PLUCK_SECBUF_SIZE * cgpu->thread_concurrency;
-      buf2size = 128 * 8 * 8 * cgpu->thread_concurrency;
-      buf3size= 8 * 8 * 4 * cgpu->thread_concurrency;
-      /* This is the input buffer. For yescrypt this is guaranteed to be
-      * 80 bytes only. */
-      readbufsize = 80;
-
-      applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
-      // scrypt/n-scrypt
-    }
     else if (algorithm->type == ALGO_LYRA2REV2) {
       /* The scratch/pad-buffer needs 32kBytes memory per thread. */
       bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
       buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
 
-      /* This is the input buffer. For yescrypt this is guaranteed to be
-      * 80 bytes only. */
+      /* This is the input buffer.*/
       readbufsize = 80;
 
       applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size);
@@ -839,27 +739,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
       applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
     }
 
-    if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
-      // need additionnal buffers
-      clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
-      if (status != CL_SUCCESS && !clState->buffer1) {
-        applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
-        return NULL;
-      }
-
-      clState->buffer2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf2size, NULL, &status);
-      if (status != CL_SUCCESS && !clState->buffer2) {
-        applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer2), decrease TC or increase LG", status);
-        return NULL;
-      }
-
-      clState->buffer3 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf3size, NULL, &status);
-      if (status != CL_SUCCESS && !clState->buffer3) {
-        applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer3), decrease TC or increase LG", status);
-        return NULL;
-      }
-    }
-    else if (algorithm->type == ALGO_LYRA2REV2) {
+    if (algorithm->type == ALGO_LYRA2REV2) {
       // need additionnal buffers
       clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
       if (status != CL_SUCCESS && !clState->buffer1) {
diff --git a/sph/gost.c b/sph/gost.c
index f46aeb58..696cc6fc 100644
--- a/sph/gost.c
+++ b/sph/gost.c
@@ -667,8 +667,8 @@ const unsigned char C[12][64] = {
 
 void AddModulo512(const void *a,const void *b,void *c)
 {
-	const unsigned char *A=a, *B=b;
-	unsigned char *C=c;
+	const unsigned char *A = (unsigned char*)a, *B = (unsigned char*) b;
+	unsigned char *C = (unsigned char*) c;
 	int t = 0;
 #ifdef FULL_UNROLL
 #define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF;
@@ -751,8 +751,8 @@ void AddModulo512(const void *a,const void *b,void *c)
 
 void AddXor512(const void *a,const void *b,void *c)
 {
-	const unsigned long long *A=a, *B=b;
-	unsigned long long *C=c;
+	const unsigned long long *A = (unsigned long long *) a, *B = (unsigned long long *) b;
+	unsigned long long *C = (unsigned long long *) c;
 #ifdef FULL_UNROLL
 	C[0] = A[0] ^ B[0];
 	C[1] = A[1] ^ B[1];
@@ -1051,7 +1051,7 @@ sph_gost256_init(void *cc)
 void
 sph_gost256(void *cc, const void *data, size_t len)
 {
-	hash_256(data, 8*len, cc);
+	hash_256((const unsigned char*)data, 8 * len, (unsigned char*)cc);
 }
 
 /* see sph_gost.h */
@@ -1080,7 +1080,7 @@ sph_gost512_init(void *cc)
 void
 sph_gost512(void *cc, const void *data, size_t len)
 {
-	hash_512(data, 8*len, cc);
+	hash_512((const unsigned char*)data, 8 * len, (unsigned char*)cc);
 }
 
 /* see sph_gost.h */
diff --git a/winbuild/dist/include/config.h b/winbuild/dist/include/config.h
index 86931708..b91d6d47 100644
--- a/winbuild/dist/include/config.h
+++ b/winbuild/dist/include/config.h
@@ -1,82 +1,86 @@
-#ifndef __CONFIG_H__
-#define __CONFIG_H__
-
-#define HAVE_STDINT_H
-
-#if defined(_MSC_VER)
-
-#define HAVE_LIBCURL 1
-#define CURL_HAS_KEEPALIVE 1
-#define HAVE_CURSES 1
-#define HAVE_ADL 1
-
-#define STDC_HEADERS 1
-#define EXECV_2ND_ARG_TYPE char* const*
-
-#define HAVE_ALLOCA 1
-#define HAVE_ATTRIBUTE_COLD 1
-#define HAVE_ATTRIBUTE_CONST 1
-#define HAVE_ATTRIBUTE_NORETURN 1
-#define HAVE_ATTRIBUTE_PRINTF 1
-#define HAVE_ATTRIBUTE_UNUSED 1
-#define HAVE_ATTRIBUTE_USED 1
-#define HAVE_BUILTIN_CONSTANT_P 1
-#define HAVE_BUILTIN_TYPES_COMPATIBLE_P 1
-#define HAVE_DECL_MEMMEM 0
-#define HAVE_INTTYPES_H 1
-#define HAVE_LONG_LONG_INT 1
-#define HAVE_MEMORY_H 1
-#define HAVE_MPROTECT 1
-#define HAVE_RAW_DECL_MEMPCPY 1
-#define HAVE_RAW_DECL_STRNCAT 1
-#define HAVE_RAW_DECL_STRNLEN 1
-#define HAVE_RAW_DECL_STRPBRK 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRINGS_H 1
-#define HAVE_STRING_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_UNISTD_H 1
-#define HAVE_UNSIGNED_LONG_LONG_INT 1
-#define HAVE_WARN_UNUSED_RESULT 1
-#define HAVE_WCHAR_H 1
-#define HAVE_WCHAR_T 1
-
-#define PRIi64 "I64d"
-#define PRIi32 "I32d"
-#define PRIu32 "I32u"
-#define PRIu64 "I64u"
-
-#define PATH_MAX MAX_PATH
-
-// Libraries to include
-#pragma comment(lib, "winmm.lib")
-#pragma comment(lib, "wsock32.lib")
-#pragma comment(lib, "pthreadVC2.lib")
-#pragma comment(lib, "OpenCL.lib")
-#pragma comment(lib, "jansson.lib")
-
-#ifdef HAVE_LIBCURL
-#define CURL_STATICLIB 1
-#pragma comment(lib, "libcurl_a.lib")
-#endif
-
-#ifdef HAVE_CURSES
-#pragma comment(lib, "pdcurses.lib")
-#endif
-
-#endif
-
-#define VERSION "v5.2.0"
-#define PACKAGE_NAME "sgminer"
-#define PACKAGE_TARNAME "sgminer"
-#define PACKAGE_VERSION "5.2.0"
-#define PACKAGE_STRING "sgminer 5.2.0"
-#define PACKAGE "sgminer"
-
-#define SGMINER_PREFIX ""
-
-#include "gitversion.h"
-#include "winbuild.h"
-
-#endif
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+#define HAVE_STDINT_H
+
+#if defined(_MSC_VER)
+
+#define HAVE_LIBCURL 1
+#define CURL_HAS_KEEPALIVE 1
+#define HAVE_CURSES 1
+#define HAVE_ADL 1
+#define HAVE_NVML 1
+
+#define STDC_HEADERS 1
+#define EXECV_2ND_ARG_TYPE char* const*
+
+#define HAVE_ALLOCA 1
+#define HAVE_ATTRIBUTE_COLD 1
+#define HAVE_ATTRIBUTE_CONST 1
+#define HAVE_ATTRIBUTE_NORETURN 1
+#define HAVE_ATTRIBUTE_PRINTF 1
+#define HAVE_ATTRIBUTE_UNUSED 1
+#define HAVE_ATTRIBUTE_USED 1
+#define HAVE_BUILTIN_CONSTANT_P 1
+#define HAVE_BUILTIN_TYPES_COMPATIBLE_P 1
+#define HAVE_DECL_MEMMEM 0
+#define HAVE_INTTYPES_H 1
+#define HAVE_LONG_LONG_INT 1
+#define HAVE_MEMORY_H 1
+#define HAVE_MPROTECT 1
+#define HAVE_RAW_DECL_MEMPCPY 1
+#define HAVE_RAW_DECL_STRNCAT 1
+#define HAVE_RAW_DECL_STRNLEN 1
+#define HAVE_RAW_DECL_STRPBRK 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRINGS_H 1
+#define HAVE_STRING_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_UNISTD_H 1
+#define HAVE_UNSIGNED_LONG_LONG_INT 1
+#define HAVE_WARN_UNUSED_RESULT 1
+#define HAVE_WCHAR_H 1
+#define HAVE_WCHAR_T 1
+
+#define PRIi64 "I64d"
+#define PRIi32 "I32d"
+#define PRIu32 "I32u"
+#define PRIu64 "I64u"
+
+#define PATH_MAX MAX_PATH
+
+// Libraries to include
+#pragma comment(lib, "winmm.lib")
+#pragma comment(lib, "wsock32.lib")
+#pragma comment(lib, "pthreadVC2.lib")
+#pragma comment(lib, "OpenCL.lib")
+#pragma comment(lib, "jansson.lib")
+
+#ifdef HAVE_LIBCURL
+#define CURL_STATICLIB 1
+#pragma comment(lib, "ws2_32.lib")
+#pragma comment(lib, "zlib.lib")
+#pragma comment(lib, "openssl.lib")
+#pragma comment(lib, "libcurl_a.lib")
+#endif
+
+#ifdef HAVE_CURSES
+#pragma comment(lib, "pdcurses.lib")
+#endif
+
+#endif
+
+#define VERSION "5.3.9"
+#define PACKAGE_NAME "sgminer"
+#define PACKAGE_TARNAME "sgminer"
+#define PACKAGE_VERSION "5.3.9"
+#define PACKAGE_STRING "sgminer 5.3.9"
+#define PACKAGE "sgminer"
+
+#define SGMINER_PREFIX ""
+
+//#include "gitversion.h"
+#include "winbuild.h"
+
+#endif
diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj
index 67679bb1..16cbb253 100644
--- a/winbuild/sgminer.vcxproj
+++ b/winbuild/sgminer.vcxproj
@@ -263,51 +263,60 @@
     <ClCompile Include="..\algorithm.c" />
     <ClCompile Include="..\algorithm\animecoin.c" />
     <ClCompile Include="..\algorithm\bitblock.c" />
+    <ClCompile Include="..\algorithm\blake256.c" />
+    <ClCompile Include="..\algorithm\blakecoin.c" />
     <ClCompile Include="..\algorithm\credits.c" />
+    <ClCompile Include="..\algorithm\darkcoin.c" />
+    <ClCompile Include="..\algorithm\decred.c" />
+    <ClCompile Include="..\algorithm\fresh.c" />
+    <ClCompile Include="..\algorithm\fuguecoin.c" />
+    <ClCompile Include="..\algorithm\gostcoin.c" />
+    <ClCompile Include="..\algorithm\groestlcoin.c" />
+    <ClCompile Include="..\algorithm\inkcoin.c" />
     <ClCompile Include="..\algorithm\lyra2.c" />
     <ClCompile Include="..\algorithm\lyra2re.c" />
     <ClCompile Include="..\algorithm\lyra2rev2.c" />
+    <ClCompile Include="..\algorithm\marucoin.c" />
+    <ClCompile Include="..\algorithm\maxcoin.c" />
+    <ClCompile Include="..\algorithm\myriadcoin-groestl.c" />
     <ClCompile Include="..\algorithm\neoscrypt.c" />
+    <ClCompile Include="..\algorithm\pascal.c" />
     <ClCompile Include="..\algorithm\pluck.c" />
+    <ClCompile Include="..\algorithm\quarkcoin.c" />
+    <ClCompile Include="..\algorithm\qubitcoin.c" />
+    <ClCompile Include="..\algorithm\scrypt.c" />
+    <ClCompile Include="..\algorithm\sia.c" />
+    <ClCompile Include="..\algorithm\sibcoin.c" />
+    <ClCompile Include="..\algorithm\sifcoin.c" />
     <ClCompile Include="..\algorithm\sponge.c" />
     <ClCompile Include="..\algorithm\talkcoin.c" />
+    <ClCompile Include="..\algorithm\twecoin.c" />
+    <ClCompile Include="..\algorithm\whirlcoin.c" />
     <ClCompile Include="..\algorithm\whirlpoolx.c" />
     <ClCompile Include="..\algorithm\x14.c" />
-    <ClCompile Include="..\algorithm\fresh.c" />
-    <ClCompile Include="..\algorithm\whirlcoin.c" />
     <ClCompile Include="..\api.c" />
     <ClCompile Include="..\ccan\opt\helpers.c" />
     <ClCompile Include="..\ccan\opt\opt.c" />
     <ClCompile Include="..\ccan\opt\parse.c" />
     <ClCompile Include="..\ccan\opt\usage.c" />
-    <ClCompile Include="..\algorithm\darkcoin.c" />
     <ClCompile Include="..\config_parser.c" />
     <ClCompile Include="..\driver-opencl.c" />
     <ClCompile Include="..\events.c" />
     <ClCompile Include="..\findnonce.c" />
-    <ClCompile Include="..\algorithm\fuguecoin.c" />
-    <ClCompile Include="..\algorithm\groestlcoin.c" />
     <ClCompile Include="..\hexdump.c" />
-    <ClCompile Include="..\algorithm\inkcoin.c" />
     <ClCompile Include="..\logging.c" />
-    <ClCompile Include="..\algorithm\marucoin.c" />
-    <ClCompile Include="..\algorithm\maxcoin.c" />
-    <ClCompile Include="..\algorithm\myriadcoin-groestl.c" />
     <ClCompile Include="..\ocl.c" />
     <ClCompile Include="..\ocl\binary_kernel.c" />
     <ClCompile Include="..\ocl\build_kernel.c" />
     <ClCompile Include="..\pool.c" />
-    <ClCompile Include="..\algorithm\quarkcoin.c" />
-    <ClCompile Include="..\algorithm\qubitcoin.c" />
-    <ClCompile Include="..\algorithm\scrypt.c" />
     <ClCompile Include="..\sgminer.c" />
-    <ClCompile Include="..\algorithm\sifcoin.c" />
     <ClCompile Include="..\sph\aes_helper.c" />
     <ClCompile Include="..\sph\blake.c" />
     <ClCompile Include="..\sph\bmw.c" />
     <ClCompile Include="..\sph\cubehash.c" />
     <ClCompile Include="..\sph\echo.c" />
     <ClCompile Include="..\sph\fugue.c" />
+    <ClCompile Include="..\sph\gost.c" />
     <ClCompile Include="..\sph\groestl.c" />
     <ClCompile Include="..\sph\hamsi.c" />
     <ClCompile Include="..\sph\hamsi_helper.c" />
@@ -321,7 +330,6 @@
     <ClCompile Include="..\sph\shavite.c" />
     <ClCompile Include="..\sph\simd.c" />
     <ClCompile Include="..\sph\skein.c" />
-    <ClCompile Include="..\algorithm\twecoin.c" />
     <ClCompile Include="..\sph\whirlpool.c" />
     <ClCompile Include="..\util.c" />
   </ItemGroup>
@@ -330,52 +338,61 @@
     <ClInclude Include="..\algorithm.h" />
     <ClInclude Include="..\algorithm\animecoin.h" />
     <ClInclude Include="..\algorithm\bitblock.h" />
+    <ClInclude Include="..\algorithm\blake256.h" />
+    <ClInclude Include="..\algorithm\blakecoin.h" />
     <ClInclude Include="..\algorithm\credits.h" />
+    <ClInclude Include="..\algorithm\darkcoin.h" />
+    <ClInclude Include="..\algorithm\decred.h" />
+    <ClInclude Include="..\algorithm\fresh.h" />
+    <ClInclude Include="..\algorithm\fuguecoin.h" />
+    <ClInclude Include="..\algorithm\gostcoin.h" />
+    <ClInclude Include="..\algorithm\groestlcoin.h" />
+    <ClInclude Include="..\algorithm\inkcoin.h" />
     <ClInclude Include="..\algorithm\lyra2.h" />
     <ClInclude Include="..\algorithm\lyra2re.h" />
     <ClInclude Include="..\algorithm\lyra2rev2.h" />
+    <ClInclude Include="..\algorithm\marucoin.h" />
+    <ClInclude Include="..\algorithm\maxcoin.h" />
+    <ClInclude Include="..\algorithm\myriadcoin-groestl.h" />
     <ClInclude Include="..\algorithm\neoscrypt.h" />
+    <ClInclude Include="..\algorithm\pascal.h" />
     <ClInclude Include="..\algorithm\pluck.h" />
+    <ClInclude Include="..\algorithm\quarkcoin.h" />
+    <ClInclude Include="..\algorithm\qubitcoin.h" />
+    <ClInclude Include="..\algorithm\scrypt.h" />
+    <ClInclude Include="..\algorithm\sia.h" />
+    <ClInclude Include="..\algorithm\sibcoin.h" />
+    <ClInclude Include="..\algorithm\sifcoin.h" />
     <ClInclude Include="..\algorithm\sponge.h" />
     <ClInclude Include="..\algorithm\sysendian.h" />
     <ClInclude Include="..\algorithm\talkcoin.h" />
+    <ClInclude Include="..\algorithm\twecoin.h" />
+    <ClInclude Include="..\algorithm\whirlcoin.h" />
     <ClInclude Include="..\algorithm\whirlpoolx.h" />
     <ClInclude Include="..\algorithm\x14.h" />
-    <ClInclude Include="..\algorithm\fresh.h" />
-    <ClInclude Include="..\algorithm\whirlcoin.h" />
     <ClInclude Include="..\api.h" />
     <ClInclude Include="..\arg-nonnull.h" />
     <ClInclude Include="..\bench_block.h" />
     <ClInclude Include="..\c++defs.h" />
     <ClInclude Include="..\compat.h" />
-    <ClInclude Include="..\algorithm\darkcoin.h" />
     <ClInclude Include="..\config_parser.h" />
     <ClInclude Include="..\driver-opencl.h" />
     <ClInclude Include="..\elist.h" />
     <ClInclude Include="..\events.h" />
     <ClInclude Include="..\findnonce.h" />
-    <ClInclude Include="..\algorithm\fuguecoin.h" />
-    <ClInclude Include="..\algorithm\groestlcoin.h" />
-    <ClInclude Include="..\algorithm\inkcoin.h" />
     <ClInclude Include="..\logging.h" />
-    <ClInclude Include="..\algorithm\marucoin.h" />
-    <ClInclude Include="..\algorithm\maxcoin.h" />
     <ClInclude Include="..\miner.h" />
-    <ClInclude Include="..\algorithm\myriadcoin-groestl.h" />
     <ClInclude Include="..\ocl.h" />
     <ClInclude Include="..\ocl\binary_kernel.h" />
     <ClInclude Include="..\ocl\build_kernel.h" />
     <ClInclude Include="..\pool.h" />
-    <ClInclude Include="..\algorithm\quarkcoin.h" />
-    <ClInclude Include="..\algorithm\qubitcoin.h" />
-    <ClInclude Include="..\algorithm\scrypt.h" />
-    <ClInclude Include="..\algorithm\sifcoin.h" />
     <ClInclude Include="..\sph\sha256_Y.h" />
     <ClInclude Include="..\sph\sph_blake.h" />
     <ClInclude Include="..\sph\sph_bmw.h" />
     <ClInclude Include="..\sph\sph_cubehash.h" />
     <ClInclude Include="..\sph\sph_echo.h" />
     <ClInclude Include="..\sph\sph_fugue.h" />
+    <ClInclude Include="..\sph\sph_gost.h" />
     <ClInclude Include="..\sph\sph_groestl.h" />
     <ClInclude Include="..\sph\sph_hamsi.h" />
     <ClInclude Include="..\sph\sph_jh.h" />
@@ -388,7 +405,6 @@
     <ClInclude Include="..\sph\sph_simd.h" />
     <ClInclude Include="..\sph\sph_skein.h" />
     <ClInclude Include="..\sph\sph_types.h" />
-    <ClInclude Include="..\algorithm\twecoin.h" />
     <ClInclude Include="..\sph\sph_whirlpool.h" />
     <ClInclude Include="..\uthash.h" />
     <ClInclude Include="..\util.h" />
diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters
index 7866e29d..6e120dcf 100644
--- a/winbuild/sgminer.vcxproj.filters
+++ b/winbuild/sgminer.vcxproj.filters
@@ -86,6 +86,9 @@
     <ClCompile Include="..\sph\fugue.c">
       <Filter>Source Files\sph</Filter>
     </ClCompile>
+    <ClCompile Include="..\sph\gost.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
     <ClCompile Include="..\sph\groestl.c">
       <Filter>Source Files\sph</Filter>
     </ClCompile>
@@ -137,6 +140,9 @@
     <ClCompile Include="..\algorithm\fuguecoin.c">
       <Filter>Source Files\algorithm</Filter>
     </ClCompile>
+    <ClCompile Include="..\algorithm\gostcoin.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
     <ClCompile Include="..\algorithm\groestlcoin.c">
       <Filter>Source Files\algorithm</Filter>
     </ClCompile>
@@ -310,6 +316,9 @@
     <ClInclude Include="..\sph\sph_hamsi.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
+    <ClInclude Include="..\sph\sph_gost.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
     <ClInclude Include="..\sph\sph_groestl.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
@@ -343,6 +352,9 @@
     <ClInclude Include="..\algorithm\fuguecoin.h">
       <Filter>Header Files\algorithm</Filter>
     </ClInclude>
+    <ClInclude Include="..\algorithm\gostcoin.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
     <ClInclude Include="..\algorithm\groestlcoin.h">
       <Filter>Header Files\algorithm</Filter>
     </ClInclude>