diff --git a/Makefile.am b/Makefile.am
index 32a49ea..afd5390 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -39,6 +39,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  Algo256/blake256.cu Algo256/decred.cu Algo256/vanilla.cu Algo256/keccak256.cu \
 			  Algo256/blake2s.cu sph/blake2s.c \
 			  Algo256/bmw.cu Algo256/cuda_bmw.cu \
+			  crypto/xmr-rpc.cpp crypto/wildkeccak-cpu.cpp crypto/wildkeccak.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
 			  JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
diff --git a/README.txt b/README.txt
index b6ca326..74557df 100644
--- a/README.txt
+++ b/README.txt
@@ -1,5 +1,5 @@
 
-ccminer 1.8.4 (Dec. 2016) "Winter boosts: sib, veltor, blake2s"
+ccminer 2.0 (December 2016)   "Boolberry's WildKeccak RPC 2.0"
 ---------------------------------------------------------------
 
 ***************************************************************
@@ -51,6 +51,7 @@ Woodcoin (Double Skein)
 Vanilla (Blake256 8-rounds - double sha256)
 Vertcoin Lyra2RE
 Ziftrcoin (ZR5)
+Boolberry (Wild Keccak)
 
 where some of these coins have a VERY NOTABLE nVidia advantage
 over competing AMD (OpenCL Only) implementations.
@@ -110,6 +111,7 @@ its command line interface and options.
                           vanilla     use to mine Vanilla (Blake256)
                           veltor      use to mine VeltorCoin
                           whirlpool   use to mine Joincoin
+                          wildkeccak  use to mine Boolberry (Stratum only)
                           zr5         use to mine ZiftrCoin
 
   -d, --devices         gives a comma separated list of CUDA device IDs
@@ -184,6 +186,12 @@ Scrypt specific options:
       --no-autotune     disable auto-tuning of kernel launch parameters
 
 
+Boolberry specific:
+  -l, --launch-config   gives the launch configuration for each kernel
+                        in a comma separated list, one per device.
+  -k, --scratchpad url  Url used to download the scratchpad cache.
+
+
 >>> Examples <<<
 
 
@@ -206,6 +214,8 @@ Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs
 Example for Groestlcoin solo mining
     ccminer -q -s 1 -a groestl -o http://127.0.0.1:1441/ -u USERNAME -p PASSWORD
 
+Example for Boolberry
+    ccminer -a wildkeccak -o stratum+tcp://bbr.suprnova.cc:7777 -u tpruvot.donate -p x -k http://bbr.suprnova.cc/scratchpad.bin -l 64x360
 
 Example for Scrypt-N (2048) on Nicehash
     ccminer -a scrypt:10 -o stratum+tcp://stratum.nicehash.com:3335 -u 3EujYFcoBzWvpUEvbe3obEG95mBuU88QBD -p x
diff --git a/algos.h b/algos.h
index c8107ee..f960b25 100644
--- a/algos.h
+++ b/algos.h
@@ -48,6 +48,7 @@ enum sha_algos {
 	ALGO_WHIRLCOIN,
 	ALGO_WHIRLPOOL,
 	ALGO_WHIRLPOOLX,
+	ALGO_WILDKECCAK,
 	ALGO_ZR5,
 	ALGO_AUTO,
 	ALGO_COUNT
@@ -99,6 +100,7 @@ static const char *algo_names[] = {
 	"whirlcoin",
 	"whirlpool",
 	"whirlpoolx",
+	"wildkeccak",
 	"zr5",
 	"auto", /* reserved for multi algo */
 	""
diff --git a/bench.cpp b/bench.cpp
index b0b0c6d..1aa3611 100644
--- a/bench.cpp
+++ b/bench.cpp
@@ -75,6 +75,7 @@ void algo_free_all(int thr_id)
 	free_veltor(thr_id);
 	free_whirl(thr_id);
 	//free_whirlx(thr_id);
+	free_wildkeccak(thr_id);
 	free_x11evo(thr_id);
 	free_x11(thr_id);
 	free_x13(thr_id);
diff --git a/ccminer.cpp b/ccminer.cpp
index a9a484f..de22c51 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -43,6 +43,8 @@
 #include "miner.h"
 #include "algos.h"
 #include "sia/sia-rpc.h"
+#include "crypto/xmr-rpc.h"
+
 #include <cuda_runtime.h>
 
 #ifdef WIN32
@@ -95,7 +97,7 @@ bool allow_mininginfo = true;
 bool check_dups = false;
 bool check_stratum_jobs = false;
 
-static bool submit_old = false;
+bool submit_old = false;
 bool use_syslog = false;
 bool use_colors = true;
 int use_pok = 0;
@@ -160,6 +162,8 @@ volatile bool pool_is_switching = false;
 volatile int pool_switch_count = 0;
 bool conditional_pool_rotate = false;
 
+extern char* opt_scratchpad_url;
+
 // current connection
 char *rpc_user = NULL;
 char *rpc_pass;
@@ -257,6 +261,7 @@ Options:\n\
 			x14         X14\n\
 			x15         X15\n\
 			x17         X17\n\
+			wildkeccak  Boolberry\n\
 			zr5         ZR5 (ZiftrCoin)\n\
   -d, --devices         Comma separated list of CUDA devices to use.\n\
                         Device IDs start counting from 0! Alternatively takes\n\
@@ -335,7 +340,7 @@ static char const short_options[] =
 #ifdef HAVE_SYSLOG_H
 	"S"
 #endif
-	"a:Bc:i:Dhp:Px:f:m:nqr:R:s:t:T:o:u:O:Vd:N:b:l:L:";
+	"a:Bc:k:i:Dhp:Px:f:m:nqr:R:s:t:T:o:u:O:Vd:N:b:l:L:";
 
 struct option options[] = {
 	{ "algo", 1, NULL, 'a' },
@@ -360,9 +365,10 @@ struct option options[] = {
 	{ "no-stratum", 0, NULL, 1007 },
 	{ "no-autotune", 0, NULL, 1004 },  // scrypt
 	{ "interactive", 1, NULL, 1050 },  // scrypt
-	{ "launch-config", 1, NULL, 'l' }, // scrypt
 	{ "lookup-gap", 1, NULL, 'L' },    // scrypt
 	{ "texture-cache", 1, NULL, 1051 },// scrypt
+	{ "launch-config", 1, NULL, 'l' }, // scrypt & bbr
+	{ "scratchpad", 1, NULL, 'k' },    // bbr
 	{ "max-temp", 1, NULL, 1060 },
 	{ "max-diff", 1, NULL, 1061 },
 	{ "max-rate", 1, NULL, 1062 },
@@ -431,6 +437,13 @@ Scrypt specific options:\n\
       --no-autotune     disable auto-tuning of kernel launch parameters\n\
 ";
 
+static char const xmr_usage[] = "\n\
+CryptoNote specific options:\n\
+  -l, --launch-config   gives the launch configuration for each kernel\n\
+                        in a comma separated list, one per device.\n\
+  -k, --scratchpad url  Url used to download the scratchpad cache.\n\
+";
+
 struct work _ALIGN(64) g_work;
 volatile time_t g_work_time;
 pthread_mutex_t g_work_lock;
@@ -544,8 +557,7 @@ void proper_exit(int reason)
 	exit(reason);
 }
 
-static bool jobj_binary(const json_t *obj, const char *key,
-			void *buf, size_t buflen)
+bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen)
 {
 	const char *hexstr;
 	json_t *tmp;
@@ -608,6 +620,8 @@ static bool work_decode(const json_t *val, struct work *work)
 		data_size = 80;
 		adata_sz = data_size / 4;
 		break;
+	case ALGO_WILDKECCAK:
+		return rpc2_job_decode(val, work);
 	default:
 		data_size = 128;
 		adata_sz = data_size / 4;
@@ -780,6 +794,18 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 	bool stale_work = false;
 	int idnonce = 0;
 
+	if (pool->type & POOL_STRATUM && stratum.rpc2) {
+		struct work submit_work;
+		memcpy(&submit_work, work, sizeof(struct work));
+		bool sent = hashlog_already_submittted(submit_work.job_id, submit_work.nonces[0]);
+		if (sent) {
+			return true;
+		}
+		bool ret = rpc2_stratum_submit(pool, &submit_work);
+		hashlog_remember_submit(&submit_work, submit_work.nonces[0]);
+		return ret;
+	}
+
 	/* discard if a newer block was received */
 	stale_work = work->height && work->height < g_work.height;
 	if (have_stratum && !stale_work && opt_algo != ALGO_ZR5 && opt_algo != ALGO_SCRYPT_JANE) {
@@ -1405,6 +1431,9 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	uchar merkle_root[64] = { 0 };
 	int i;
 
+	if (sctx->rpc2)
+		return rpc2_stratum_gen_work(sctx, work);
+
 	if (!sctx->job.job_id) {
 		// applog(LOG_WARNING, "stratum_gen_work: job not yet retrieved");
 		return false;
@@ -1733,10 +1762,16 @@ static void *miner_thread(void *userdata)
 
 		uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
 
+		if (opt_algo == ALGO_WILDKECCAK) {
+			nonceptr = (uint32_t*) (((char*)work.data) + 1);
+			wcmpoft = 2;
+			wcmplen = 32;
+		}
+
 		if (have_stratum) {
 			uint32_t sleeptime = 0;
 
-			if (opt_algo == ALGO_DECRED)
+			if (opt_algo == ALGO_DECRED || stratum.rpc2)
 				work_done = true; // force "regen" hash
 			while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
 				usleep(100*1000);
@@ -1748,7 +1783,7 @@ static void *miner_thread(void *userdata)
 			}
 			if (sleeptime && opt_debug && !opt_quiet)
 				applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
-			nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
+			//nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
 			pthread_mutex_lock(&g_work_lock);
 			extrajob |= work_done;
 
@@ -1835,6 +1870,8 @@ static void *miner_thread(void *userdata)
 			nonceptr[1] += 1;
 			nonceptr[2] |= thr_id;
 
+		} else if (opt_algo == ALGO_WILDKECCAK) {
+			//nonceptr[1] += 1;
 		} else if (opt_algo == ALGO_SIA) {
 			// suprnova job_id check without data/target/height change...
 			if (have_stratum && strcmp(work.job_id, g_work.job_id)) {
@@ -1879,6 +1916,11 @@ static void *miner_thread(void *userdata)
 			gpulog(LOG_DEBUG, thr_id, "no data");
 			continue;
 		}
+		if (stratum.rpc2 && !scratchpad_size) {
+			sleep(1);
+			if (!thr_id) pools[cur_pooln].wait_time += 1;
+			continue;
+		}
 
 		/* conditional mining */
 		if (!wanna_mine(thr_id)) {
@@ -2196,6 +2238,9 @@ static void *miner_thread(void *userdata)
 		//case ALGO_WHIRLPOOLX:
 		//	rc = scanhash_whirlx(thr_id, &work, max_nonce, &hashes_done);
 		//	break;
+		case ALGO_WILDKECCAK:
+			rc = scanhash_wildkeccak(thr_id, &work, max_nonce, &hashes_done);
+			break;
 		case ALGO_X11EVO:
 			rc = scanhash_x11evo(thr_id, &work, max_nonce, &hashes_done);
 			break;
@@ -2242,6 +2287,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_LBRY:
 			case ALGO_SIA:
 			case ALGO_VELTOR:
+			case ALGO_WILDKECCAK:
 				// migrated algos
 				break;
 			case ALGO_ZR5:
@@ -2254,6 +2300,13 @@ static void *miner_thread(void *userdata)
 				work.nonces[1] = nonceptr[2];
 		}
 
+		if (stratum.rpc2 && rc == -EBUSY || work_restart[thr_id].restart) {
+			// bbr scratchpad download or stale result
+			sleep(1);
+			if (!thr_id) pools[cur_pooln].wait_time += 1;
+			continue;
+		}
+
 		if (rc > 0 && opt_debug)
 			applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", work.nonces[0], swab32(work.nonces[0]));
 		if (rc > 1 && opt_debug)
@@ -2548,7 +2601,7 @@ static bool stratum_handle_response(char *buf)
 	err_val = json_object_get(val, "error");
 	id_val = json_object_get(val, "id");
 
-	if (!id_val || json_is_null(id_val) || !res_val)
+	if (!id_val || json_is_null(id_val))
 		goto out;
 
 	// ignore late login answers
@@ -2564,8 +2617,24 @@ static bool stratum_handle_response(char *buf)
 	// store time required to the pool to answer to a submit
 	stratum.answer_msec = (1000 * diff.tv_sec) + (uint32_t) (0.001 * diff.tv_usec);
 
-	share_result(json_is_true(res_val), stratum.pooln, stratum.sharediff,
-		err_val ? json_string_value(json_array_get(err_val, 1)) : NULL);
+	if (stratum.rpc2) {
+
+		const char* reject_reason = err_val ? json_string_value(json_object_get(err_val, "message")) : NULL;
+		// {"id":4,"jsonrpc":"2.0","error":null,"result":{"status":"OK"}}
+		share_result(json_is_null(err_val), stratum.pooln, stratum.sharediff, reject_reason);
+		if (reject_reason) {
+			g_work_time = 0;
+			restart_threads();
+		}
+
+	} else {
+
+		if (!res_val)
+			goto out;
+
+		share_result(json_is_true(res_val), stratum.pooln, stratum.sharediff,
+			err_val ? json_string_value(json_array_get(err_val, 1)) : NULL);
+	}
 
 	ret = true;
 out:
@@ -2641,6 +2710,10 @@ wait_stratum_url:
 			}
 		}
 
+		if (opt_algo == ALGO_WILDKECCAK) {
+			rpc2_stratum_thread_stuff(pool);
+		}
+
 		if (switchn != pool_switch_count) goto pool_switched;
 
 		if (stratum.job.job_id &&
@@ -2733,6 +2806,9 @@ static void show_usage_and_exit(int status)
 	if (opt_algo == ALGO_SCRYPT || opt_algo == ALGO_SCRYPT_JANE) {
 		printf(scrypt_usage);
 	}
+	if (opt_algo == ALGO_WILDKECCAK) {
+		printf(xmr_usage);
+	}
 	proper_exit(status);
 }
 
@@ -2814,6 +2890,9 @@ void parse_arg(int key, char *arg)
 		}
 		break;
 	}
+	case 'k':
+		opt_scratchpad_url = strdup(arg);
+		break;
 	case 'i':
 		d = atof(arg);
 		v = (uint32_t) d;
@@ -3011,7 +3090,7 @@ void parse_arg(int key, char *arg)
 	case 1004:
 		opt_autotune = false;
 		break;
-	case 'l': /* scrypt --launch-config */
+	case 'l': /* --launch-config */
 		{
 			char *last = NULL, *pch = strtok(arg,",");
 			int n = 0;
@@ -3582,6 +3661,12 @@ int main(int argc, char *argv[])
 		allow_mininginfo = false;
 	}
 
+	if (opt_algo == ALGO_WILDKECCAK) {
+		rpc2_init();
+		applog(LOG_INFO, "Using CryptoNote JSON-RPC 2.0");
+		GetScratchpad();
+	}
+
 	flags = !opt_benchmark && strncmp(rpc_url, "https:", 6)
 	      ? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
 	      : CURL_GLOBAL_ALL;
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index a7bfce1..9a27e9e 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
   </ImportGroup>
   <ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.props" />
@@ -230,6 +230,9 @@
       <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
       <Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
     </ClCompile>
+    <ClCompile Include="crypto\mman.c" />
+    <ClCompile Include="crypto\wildkeccak-cpu.cpp" />
+    <ClCompile Include="crypto\xmr-rpc.cpp" />
     <ClCompile Include="nvapi.cpp" />
     <ClCompile Include="pools.cpp" />
     <ClCompile Include="util.cpp" />
@@ -248,6 +251,9 @@
     <ClCompile Include="myriadgroestl.cpp" />
     <ClCompile Include="lyra2\Lyra2.c" />
     <ClCompile Include="lyra2\Sponge.c" />
+    <ClInclude Include="crypto\mman.h" />
+    <ClInclude Include="crypto\wildkeccak.h" />
+    <ClInclude Include="crypto\xmr-rpc.h" />
     <ClInclude Include="lyra2\cuda_lyra2_sm2.cuh" />
     <ClInclude Include="neoscrypt\neoscrypt.h" />
     <ClCompile Include="neoscrypt\neoscrypt.cpp" />
@@ -258,6 +264,9 @@
     <CudaCompile Include="Algo256\cuda_bmw.cu">
       <MaxRegCount>76</MaxRegCount>
     </CudaCompile>
+    <CudaCompile Include="crypto\wildkeccak.cu">
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">128</MaxRegCount>
+    </CudaCompile>
     <CudaCompile Include="neoscrypt\cuda_neoscrypt.cu">
       <MaxRegCount>160</MaxRegCount>
     </CudaCompile>
@@ -531,7 +540,7 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
   </ImportGroup>
   <ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.targets" />
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index ad96edd..6cfc1d4 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -88,6 +88,9 @@
     <Filter Include="Source Files\sia">
       <UniqueIdentifier>{86a896c0-1688-4854-98e0-285d166069a3}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Source Files\crypto">
+      <UniqueIdentifier>{fea0fce3-c0fe-42f7-aa37-0cbba10b008a}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="compat\jansson\dump.c">
@@ -279,6 +282,15 @@
     <ClCompile Include="sia\sia-rpc.cpp">
       <Filter>Source Files\sia</Filter>
     </ClCompile>
+    <ClCompile Include="crypto\xmr-rpc.cpp">
+      <Filter>Source Files\crypto</Filter>
+    </ClCompile>
+    <ClCompile Include="crypto\mman.c">
+      <Filter>Source Files\crypto</Filter>
+    </ClCompile>
+    <ClCompile Include="crypto\wildkeccak-cpu.cpp">
+      <Filter>Source Files\crypto</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="algos.h">
@@ -482,6 +494,15 @@
     <ClInclude Include="sia\sia-rpc.h">
       <Filter>Source Files\sia</Filter>
     </ClInclude>
+    <ClInclude Include="crypto\mman.h">
+      <Filter>Source Files\crypto</Filter>
+    </ClInclude>
+    <ClInclude Include="crypto\wildkeccak.h">
+      <Filter>Source Files\crypto</Filter>
+    </ClInclude>
+    <ClInclude Include="crypto\xmr-rpc.h">
+      <Filter>Source Files\crypto</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <CudaCompile Include="cuda.cpp">
@@ -760,6 +781,9 @@
     <CudaCompile Include="sia\sia.cu">
       <Filter>Source Files\sia</Filter>
     </CudaCompile>
+    <CudaCompile Include="crypto\wildkeccak.cu">
+      <Filter>Source Files\crypto</Filter>
+    </CudaCompile>
   </ItemGroup>
   <ItemGroup>
     <Image Include="res\ccminer.ico">
diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h
index c089a5c..26cfea4 100644
--- a/compat/ccminer-config.h
+++ b/compat/ccminer-config.h
@@ -164,7 +164,7 @@
 #define PACKAGE_URL "http://github.com/tpruvot/ccminer"
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.8.4"
+#define PACKAGE_VERSION "2.0"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
diff --git a/configure.ac b/configure.ac
index 79a5217..7842201 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.8.4], [], [ccminer], [http://github.com/tpruvot/ccminer])
+AC_INIT([ccminer], [2.0], [], [ccminer], [http://github.com/tpruvot/ccminer])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/configure.sh b/configure.sh
index a4c4f46..edb3242 100755
--- a/configure.sh
+++ b/configure.sh
@@ -3,5 +3,5 @@
 extracflags="-march=native -D_REENTRANT -falign-functions=16 -falign-jumps=16 -falign-labels=16"
 
 CUDA_CFLAGS="-O3 -lineno -Xcompiler -Wall  -D_FORCE_INLINES" \
-	./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda --with-nvml=libnvidia-ml.so
+	./configure CXXFLAGS="-O3 $extracflags" --with-cuda=/usr/local/cuda-7.5 --with-nvml=libnvidia-ml.so
 
diff --git a/crypto/int128_c.h b/crypto/int128_c.h
new file mode 100644
index 0000000..3db164a
--- /dev/null
+++ b/crypto/int128_c.h
@@ -0,0 +1,573 @@
+#ifndef UINT128_C_H
+#define UINT128_C_H
+
+struct __uint128 {
+	uint64_t Hi;
+	uint64_t Lo;
+};
+typedef struct __uint128 uint128;
+
+void Increment(uint128 * N)
+{
+	uint64_t T = (N->Lo + 1);
+	N->Hi += ((N->Lo ^T) & N->Lo) >> 63;
+	N->Lo = T;
+}
+
+void Decrement(uint128 * N)
+{
+	uint64_t T = (N->Lo - 1);
+	N->Hi -= ((T ^ N->Lo) & T) >> 63;
+	N->Lo = T;
+}
+
+void Add(uint128 * Ans, uint128 N, uint128 M)
+{
+	uint64_t C = (((N.Lo & M.Lo) & 1) + (N.Lo >> 1) + (M.Lo >> 1)) >> 63;
+	Ans->Hi = N.Hi + M.Hi + C;
+	Ans->Lo = N.Lo + M.Lo;
+}
+
+void Subtract(uint128 * Ans, uint128 N, uint128 M)
+{
+	Ans->Lo = N.Lo - M.Lo;
+	uint64_t C = (((Ans->Lo & M.Lo) & 1) + (M.Lo >> 1) + (Ans->Lo >> 1)) >> 63;
+	Ans->Hi = N.Hi - (M.Hi + C);
+}
+
+void inc128(uint128 N, uint128* A)
+{
+	A->Lo = (N.Lo + 1);
+	A->Hi = N.Hi + (((N.Lo ^ A->Lo) & N.Lo) >> 63);
+}
+
+void dec128(uint128 N, uint128* A)
+{
+	A->Lo = N.Lo - 1;
+	A->Hi = N.Hi - (((A->Lo ^ N.Lo) & A->Lo) >> 63);
+}
+
+void add128(uint128 N, uint128 M, uint128* A)
+{
+	uint64_t C = (((N.Lo & M.Lo) & 1) + (N.Lo >> 1) + (M.Lo >> 1)) >> 63;
+	A->Hi = N.Hi + M.Hi + C;
+	A->Lo = N.Lo + M.Lo;
+}
+
+void sub128(uint128 N, uint128 M, uint128* A)
+{
+	A->Lo = N.Lo - M.Lo;
+	uint64_t C = (((A->Lo & M.Lo) & 1) + (M.Lo >> 1) + (A->Lo >> 1)) >> 63;
+	A->Hi = N.Hi - (M.Hi + C);
+}
+
+void mult64to128(uint64_t u, uint64_t v, uint64_t * h, uint64_t *l)
+{
+	uint64_t u1 = (u & 0xffffffff);
+	uint64_t v1 = (v & 0xffffffff);
+	uint64_t t = (u1 * v1);
+	uint64_t w3 = (t & 0xffffffff);
+	uint64_t k = (t >> 32);
+
+	u >>= 32;
+	t = (u * v1) + k;
+	k = (t & 0xffffffff);
+	uint64_t w1 = (t >> 32);
+
+	v >>= 32;
+	t = (u1 * v) + k;
+	k = (t >> 32);
+
+	*h = (u * v) + w1 + k;
+	*l = (t << 32) + w3;
+}
+
+void mult128(uint128 N, uint128 M, uint128 * Ans)
+{
+	mult64to128(N.Lo, M.Lo, &Ans->Hi, &Ans->Lo);
+	Ans->Hi += (N.Hi * M.Lo) + (N.Lo * M.Hi);
+}
+
+void mult128to256(uint128 N, uint128 M, uint128 * H, uint128 * L)
+{
+	mult64to128(N.Hi, M.Hi, &H->Hi, &H->Lo);
+	mult64to128(N.Lo, M.Lo, &L->Hi, &L->Lo);
+
+	uint128 T;
+	mult64to128(N.Hi, M.Lo, &T.Hi, &T.Lo);
+	L->Hi += T.Lo;
+	if(L->Hi < T.Lo)  // if L->Hi overflowed
+	{
+		Increment(H);
+	}
+	H->Lo += T.Hi;
+	if(H->Lo < T.Hi)  // if H->Lo overflowed
+	{
+		++H->Hi;
+	}
+
+	mult64to128(N.Lo, M.Hi, &T.Hi, &T.Lo);
+	L->Hi += T.Lo;
+	if(L->Hi < T.Lo)  // if L->Hi overflowed
+	{
+		Increment(H);
+	}
+	H->Lo += T.Hi;
+	if(H->Lo < T.Hi)  // if H->Lo overflowed
+	{
+		++H->Hi;
+	}
+}
+
+
+void sqr64to128(uint64_t r, uint64_t * h, uint64_t *l)
+{
+	uint64_t r1 = (r & 0xffffffff);
+	uint64_t t = (r1 * r1);
+	uint64_t w3 = (t & 0xffffffff);
+	uint64_t k = (t >> 32);
+
+	r >>= 32;
+	uint64_t m = (r * r1);
+	t = m + k;
+	uint64_t w2 = (t & 0xffffffff);
+	uint64_t w1 = (t >> 32);
+
+	t = m + w2;
+	k = (t >> 32);
+	*h = (r * r) + w1 + k;
+	*l = (t << 32) + w3;
+}
+
+void sqr128(uint128 R, uint128 * Ans)
+{
+	sqr64to128(R.Lo, &Ans->Hi, &Ans->Lo);
+	Ans->Hi += (R.Hi * R.Lo) << 1;
+}
+
+void sqr128to256(uint128 R, uint128 * H, uint128 * L)
+{
+	sqr64to128(R.Hi, &H->Hi, &H->Lo);
+	sqr64to128(R.Lo, &L->Hi, &L->Lo);
+
+	uint128 T;
+	mult64to128(R.Hi, R.Lo, &T.Hi, &T.Lo);
+
+	H->Hi += (T.Hi >> 63);
+	T.Hi = (T.Hi << 1) | (T.Lo >> 63);  // Shift Left 1 bit
+	T.Lo <<= 1;
+
+	L->Hi += T.Lo;
+	if(L->Hi < T.Lo)  // if L->Hi overflowed
+	{
+		Increment(H);
+	}
+
+	H->Lo += T.Hi;
+	if(H->Lo < T.Hi)  // if H->Lo overflowed
+	{
+		++H->Hi;
+	}
+}
+
+void shiftleft128(uint128 N, size_t S, uint128 * A)
+{
+	uint64_t M1, M2;
+	S &= 127;
+
+	M1 = ((((S + 127) | S) & 64) >> 6) - 1llu;
+	M2 = (S >> 6) - 1llu;
+	S &= 63;
+	A->Hi = (N.Lo << S) & (~M2);
+	A->Lo = (N.Lo << S) & M2;
+	A->Hi |= ((N.Hi << S) | ((N.Lo >> (64 - S)) & M1)) & M2;
+
+/*
+	S &= 127;
+
+	if(S != 0)
+	{
+		if(S > 64)
+		{
+			A.Hi = N.Lo << (S - 64);
+			A.Lo = 0;
+		}
+		else if(S < 64)
+		{
+			A.Hi = (N.Hi << S) | (N.Lo >> (64 - S));
+			A.Lo = N.Lo << S;
+		}
+		else
+		{
+			A.Hi = N.Lo;
+			A.Lo = 0;
+		}
+	}
+	else
+	{
+		A.Hi = N.Hi;
+		A.Lo = N.Lo;
+	}
+	//*/
+}
+
+void shiftright128(uint128 N, size_t S, uint128 * A)
+{
+	uint64_t M1, M2;
+	S &= 127;
+
+	M1 = ((((S + 127) | S) & 64) >> 6) - 1llu;
+	M2 = (S >> 6) - 1llu;
+	S &= 63;
+	A->Lo = (N.Hi >> S) & (~M2);
+	A->Hi = (N.Hi >> S) & M2;
+	A->Lo |= ((N.Lo >> S) | ((N.Hi << (64 - S)) & M1)) & M2;
+
+	/*
+	S &= 127;
+
+	if(S != 0)
+	{
+		if(S > 64)
+		{
+			A.Hi = N.Hi >> (S - 64);
+			A.Lo = 0;
+		}
+		else if(S < 64)
+		{
+			A.Lo = (N.Lo >> S) | (N.Hi << (64 - S));
+			A.Hi = N.Hi >> S;
+		}
+		else
+		{
+			A.Lo = N.Hi;
+			A.Hi = 0;
+		}
+	}
+	else
+	{
+		A.Hi = N.Hi;
+		A.Lo = N.Lo;
+	}
+	//*/
+}
+
+
+void not128(uint128 N, uint128 * A)
+{
+	A->Hi = ~N.Hi;
+	A->Lo = ~N.Lo;
+}
+
+void or128(uint128 N1, uint128 N2, uint128 * A)
+{
+	A->Hi = N1.Hi | N2.Hi;
+	A->Lo = N1.Lo | N2.Lo;
+}
+
+void and128(uint128 N1, uint128 N2, uint128 * A)
+{
+	A->Hi = N1.Hi & N2.Hi;
+	A->Lo = N1.Lo & N2.Lo;
+}
+
+void xor128(uint128 N1, uint128 N2, uint128 * A)
+{
+	A->Hi = N1.Hi ^ N2.Hi;
+	A->Lo = N1.Lo ^ N2.Lo;
+}
+
+size_t nlz64(uint64_t N)
+{
+	uint64_t I;
+	size_t C;
+
+	I = ~N;
+	C = ((I ^ (I + 1)) & I) >> 63;
+
+	I = (N >> 32) + 0xffffffff;
+	I = ((I & 0x100000000) ^ 0x100000000) >> 27;
+	C += I;  N <<= I;
+
+	I = (N >> 48) + 0xffff;
+	I = ((I & 0x10000) ^ 0x10000) >> 12;
+	C += I;  N <<= I;
+
+	I = (N >> 56) + 0xff;
+	I = ((I & 0x100) ^ 0x100) >> 5;
+	C += I;  N <<= I;
+
+	I = (N >> 60) + 0xf;
+	I = ((I & 0x10) ^ 0x10) >> 2;
+	C += I;  N <<= I;
+
+	I = (N >> 62) + 3;
+	I = ((I & 4) ^ 4) >> 1;
+	C += I;  N <<= I;
+
+	C += (N >> 63) ^ 1;
+
+	return C;
+}
+
+size_t ntz64(uint64_t N)
+{
+	uint64_t I = ~N;
+	size_t C = ((I ^ (I + 1)) & I) >> 63;
+
+	I = (N & 0xffffffff) + 0xffffffff;
+	I = ((I & 0x100000000) ^ 0x100000000) >> 27;
+	C += I;  N >>= I;
+
+	I = (N & 0xffff) + 0xffff;
+	I = ((I & 0x10000) ^ 0x10000) >> 12;
+	C += I;  N >>= I;
+
+	I = (N & 0xff) + 0xff;
+	I = ((I & 0x100) ^ 0x100) >> 5;
+	C += I;  N >>= I;
+
+	I = (N & 0xf) + 0xf;
+	I = ((I & 0x10) ^ 0x10) >> 2;
+	C += I;  N >>= I;
+
+	I = (N & 3) + 3;
+	I = ((I & 4) ^ 4) >> 1;
+	C += I;  N >>= I;
+
+	C += ((N & 1) ^ 1);
+
+	return C;
+}
+
+size_t popcnt64(uint64_t V)
+{
+	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+	V -= ((V >> 1) & 0x5555555555555555);
+	V = (V & 0x3333333333333333) + ((V >> 2) & 0x3333333333333333);
+	return ((V + (V >> 4) & 0xF0F0F0F0F0F0F0F) * 0x101010101010101) >> 56;
+}
+
+size_t popcnt128(uint128 N)
+{
+	return popcnt64(N.Hi) + popcnt64(N.Lo);
+}
+
+
+size_t nlz128(uint128 N)
+{
+	return (N.Hi == 0) ? nlz64(N.Lo) + 64 : nlz64(N.Hi);
+}
+
+size_t ntz128(uint128 N)
+{
+	return (N.Lo == 0) ? ntz64(N.Hi) + 64 : ntz64(N.Lo);
+}
+int compare128(uint128 N1, uint128 N2)
+{
+	return	(((N1.Hi > N2.Hi) || ((N1.Hi == N2.Hi) && (N1.Lo > N2.Lo))) ? 1 : 0)
+		 -  (((N1.Hi < N2.Hi) || ((N1.Hi == N2.Hi) && (N1.Lo < N2.Lo))) ? 1 : 0);
+}
+
+void bindivmod128(uint128 M, uint128 N, uint128 * Q, uint128 *R)
+{
+	Q->Hi = Q->Lo = 0;
+	size_t Shift = nlz128(N) - nlz128(M);
+	shiftleft128(N, Shift, &N);
+
+	do
+	{
+		shiftleft128(*Q, (size_t)1, Q);
+		if(compare128(M, N) >= 0)
+		{
+			sub128(M, N, &M);
+			Q->Lo |= 1;
+		}
+
+		shiftright128(N, 1, &N);
+	}while(Shift-- != 0);
+
+	R->Hi = M.Hi;
+	R->Lo = M.Lo;
+}
+
+void divmod128by64(const uint64_t u1, const uint64_t u0, uint64_t v, uint64_t * q, uint64_t * r)
+{
+	const uint64_t b = 1ll << 32;
+	uint64_t un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right;
+	size_t s;
+
+	s = nlz64(v);
+	v <<= s;
+	vn1 = v >> 32;
+	vn0 = v & 0xffffffff;
+
+	if (s > 0)
+	{
+		un32 = (u1 << s) | (u0 >> (64 - s));
+		un10 = u0 << s;
+	}
+	else
+	{
+		un32 = u1;
+		un10 = u0;
+	}
+
+	un1 = un10 >> 32;
+	un0 = un10 & 0xffffffff;
+
+	q1 = un32 / vn1;
+	rhat = un32 % vn1;
+
+	left = q1 * vn0;
+	right = (rhat << 32) + un1;
+again1:
+	if ((q1 >= b) || (left > right))
+	{
+		--q1;
+		rhat += vn1;
+		if (rhat < b)
+		{
+			left -= vn0;
+			right = (rhat << 32) | un1;
+			goto again1;
+		}
+	}
+
+	un21 = (un32 << 32) + (un1 - (q1 * v));
+
+	q0 = un21 / vn1;
+	rhat = un21 % vn1;
+
+	left = q0 * vn0;
+	right = (rhat << 32) | un0;
+again2:
+	if ((q0 >= b) || (left > right))
+	{
+		--q0;
+		rhat += vn1;
+		if (rhat < b)
+		{
+			left -= vn0;
+			right = (rhat << 32) | un0;
+			goto again2;
+		}
+	}
+
+	*r = ((un21 << 32) + (un0 - (q0 * v))) >> s;
+	*q = (q1 << 32) | q0;
+}
+
+static void divmod128by128(uint128 M, uint128 N, uint128 * Q, uint128 * R)
+{
+	if (N.Hi == 0)
+	{
+		if (M.Hi < N.Lo)
+		{
+			divmod128by64(M.Hi, M.Lo, N.Lo, &Q->Lo, &R->Lo);
+			Q->Hi = 0;
+			R->Hi = 0;
+			return;
+		}
+		else
+		{
+			Q->Hi = M.Hi / N.Lo;
+			R->Hi = M.Hi % N.Lo;
+			divmod128by64(R->Hi, M.Lo, N.Lo, &Q->Lo, &R->Lo);
+			R->Hi = 0;
+			return;
+		}
+	}
+	else
+	{
+		size_t n = nlz64(N.Hi);
+
+		uint128 v1;
+		shiftleft128(N, n, &v1);
+
+		uint128 u1;
+		shiftright128(M, 1, &u1);
+
+		uint128 q1;
+		divmod128by64(u1.Hi, u1.Lo, v1.Hi, &q1.Hi, &q1.Lo);
+		q1.Hi = 0;
+		shiftright128(q1, 63 - n, &q1);
+
+		if ((q1.Hi | q1.Lo) != 0)
+		{
+			dec128(q1, &q1);
+		}
+
+		Q->Hi = q1.Hi;
+		Q->Lo = q1.Lo;
+		mult128(q1, N, &q1);
+		sub128(M, q1, R);
+
+		if (compare128(*R, N) >= 0)
+		{
+			inc128(*Q, Q);
+			sub128(*R, N, R);
+		}
+
+		return;
+	}
+}
+
+void divmod128(uint128 M, uint128 N, uint128 * Q, uint128 * R)
+{
+	size_t Nlz, Mlz, Ntz;
+	int C;
+
+	Nlz = nlz128(N);
+	Mlz = nlz128(M);
+	Ntz = ntz128(N);
+
+	if(Nlz == 128)
+	{
+		return;
+	}
+	else if((M.Hi | N.Hi) == 0)
+	{
+		Q->Hi = R->Hi = 0;
+		Q->Lo = M.Lo / N.Lo;
+		R->Lo = M.Lo % N.Lo;
+		return;
+	}
+	else if(Nlz == 127)
+	{
+		*Q = M;
+		R->Hi = R->Lo = 0;
+		return;
+	}
+	else if((Ntz + Nlz) == 127)
+	{
+		shiftright128(M, Ntz, Q);
+		dec128(N, &N);
+		and128(N, M, R);
+		return;
+	}
+
+	C = compare128(M, N);
+	if(C < 0)
+	{
+		Q->Hi = Q->Lo = 0;
+		*R = M;
+		return;
+	}
+	else if(C == 0)
+	{
+		Q->Hi = R->Hi = R->Lo = 0;
+		Q->Lo = 1;
+		return;
+	}
+
+	if((Nlz - Mlz) > 5)
+	{
+		divmod128by128(M, N, Q, R);
+	}
+	else
+	{
+		bindivmod128(M, N, Q, R);
+	}
+}
+#endif
\ No newline at end of file
diff --git a/crypto/mman.c b/crypto/mman.c
new file mode 100644
index 0000000..3f015f2
--- /dev/null
+++ b/crypto/mman.c
@@ -0,0 +1,180 @@
+
+#include <windows.h>
+#include <errno.h>
+#include <io.h>
+
+#include "mman.h"
+
+#ifndef FILE_MAP_EXECUTE
+#define FILE_MAP_EXECUTE    0x0020
+#endif /* FILE_MAP_EXECUTE */
+
+static int __map_mman_error(const DWORD err, const int deferr)
+{
+	if (err == 0)
+		return 0;
+	//TODO: implement
+	return err;
+}
+
+static DWORD __map_mmap_prot_page(const int prot)
+{
+	DWORD protect = 0;
+
+	if (prot == PROT_NONE)
+		return protect;
+
+	if ((prot & PROT_EXEC) != 0)
+	{
+		protect = ((prot & PROT_WRITE) != 0) ?
+					PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
+	}
+	else
+	{
+		protect = ((prot & PROT_WRITE) != 0) ?
+					PAGE_READWRITE : PAGE_READONLY;
+	}
+
+	return protect;
+}
+
+static DWORD __map_mmap_prot_file(const int prot)
+{
+	DWORD desiredAccess = 0;
+
+	if (prot == PROT_NONE)
+		return desiredAccess;
+
+	if ((prot & PROT_READ) != 0)
+		desiredAccess |= FILE_MAP_READ;
+	if ((prot & PROT_WRITE) != 0)
+		desiredAccess |= FILE_MAP_WRITE;
+	if ((prot & PROT_EXEC) != 0)
+		desiredAccess |= FILE_MAP_EXECUTE;
+
+	return desiredAccess;
+}
+
+void* mmap(void *addr, size_t len, int prot, int flags, int fildes, OffsetType off)
+{
+	HANDLE fm, h;
+
+	void * map = MAP_FAILED;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4293)
+#endif
+
+	const DWORD dwFileOffsetLow = (sizeof(OffsetType) <= sizeof(DWORD)) ?
+					(DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
+	const DWORD dwFileOffsetHigh = (sizeof(OffsetType) <= sizeof(DWORD)) ?
+					(DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
+	const DWORD protect = __map_mmap_prot_page(prot);
+	const DWORD desiredAccess = __map_mmap_prot_file(prot);
+
+	const OffsetType maxSize = off + (OffsetType)len;
+
+	const DWORD dwMaxSizeLow = (sizeof(OffsetType) <= sizeof(DWORD)) ?
+					(DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
+	const DWORD dwMaxSizeHigh = (sizeof(OffsetType) <= sizeof(DWORD)) ?
+					(DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	errno = 0;
+
+	if (len == 0
+		/* Unsupported flag combinations */
+		|| (flags & MAP_FIXED) != 0
+		/* Usupported protection combinations */
+		|| prot == PROT_EXEC)
+	{
+		errno = EINVAL;
+		return MAP_FAILED;
+	}
+
+	h = ((flags & MAP_ANONYMOUS) == 0) ?
+					(HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
+
+	if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
+	{
+		errno = EBADF;
+		return MAP_FAILED;
+	}
+
+	fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
+
+	if (fm == NULL)
+	{
+		errno = __map_mman_error(GetLastError(), EPERM);
+		return MAP_FAILED;
+	}
+
+	map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
+
+	CloseHandle(fm);
+
+	if (map == NULL)
+	{
+		errno = __map_mman_error(GetLastError(), EPERM);
+		return MAP_FAILED;
+	}
+
+	return map;
+}
+
+int munmap(void *addr, size_t len)
+{
+	if (UnmapViewOfFile(addr))
+		return 0;
+
+	errno =  __map_mman_error(GetLastError(), EPERM);
+
+	return -1;
+}
+
+int _mprotect(void *addr, size_t len, int prot)
+{
+	DWORD newProtect = __map_mmap_prot_page(prot);
+	DWORD oldProtect = 0;
+
+	if (VirtualProtect(addr, len, newProtect, &oldProtect))
+		return 0;
+
+	errno =  __map_mman_error(GetLastError(), EPERM);
+
+	return -1;
+}
+
+int msync(void *addr, size_t len, int flags)
+{
+	if (FlushViewOfFile(addr, len))
+		return 0;
+
+	errno =  __map_mman_error(GetLastError(), EPERM);
+
+	return -1;
+}
+
+int mlock(const void *addr, size_t len)
+{
+	if (VirtualLock((LPVOID)addr, len))
+		return 0;
+
+	errno =  __map_mman_error(GetLastError(), EPERM);
+
+	return -1;
+}
+
+int munlock(const void *addr, size_t len)
+{
+	if (VirtualUnlock((LPVOID)addr, len))
+		return 0;
+
+	errno =  __map_mman_error(GetLastError(), EPERM);
+
+	return -1;
+}
\ No newline at end of file
diff --git a/crypto/mman.h b/crypto/mman.h
new file mode 100644
index 0000000..12df7b5
--- /dev/null
+++ b/crypto/mman.h
@@ -0,0 +1,63 @@
+/*
+ * sys/mman.h
+ * mman-win32
+ */
+
+#ifndef _SYS_MMAN_H_
+#define _SYS_MMAN_H_
+
+#ifndef _WIN32_WINNT		// Allow use of features specific to Windows XP or later.
+#define _WIN32_WINNT 0x0501	// Change this to the appropriate value to target other versions of Windows.
+#endif
+
+/* All the headers include this file. */
+#ifndef _MSC_VER
+#include <_mingw.h>
+#endif
+
+/* Determine offset type */
+#include <stdint.h>
+#if defined(_WIN64)
+typedef int64_t OffsetType;
+#else
+typedef uint32_t OffsetType;
+#endif
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PROT_NONE       0
+#define PROT_READ       1
+#define PROT_WRITE      2
+#define PROT_EXEC       4
+
+#define MAP_FILE        0
+#define MAP_SHARED      1
+#define MAP_PRIVATE     2
+#define MAP_TYPE        0xf
+#define MAP_FIXED       0x10
+#define MAP_ANONYMOUS   0x20
+#define MAP_ANON        MAP_ANONYMOUS
+
+#define MAP_FAILED      ((void *)-1)
+
+/* Flags for msync. */
+#define MS_ASYNC        1
+#define MS_SYNC         2
+#define MS_INVALIDATE   4
+
+void*   mmap(void *addr, size_t len, int prot, int flags, int fildes, OffsetType off);
+int     munmap(void *addr, size_t len);
+int     _mprotect(void *addr, size_t len, int prot);
+int     msync(void *addr, size_t len, int flags);
+int     mlock(const void *addr, size_t len);
+int     munlock(const void *addr, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*  _SYS_MMAN_H_ */
diff --git a/crypto/wildkeccak-cpu.cpp b/crypto/wildkeccak-cpu.cpp
new file mode 100644
index 0000000..fb24652
--- /dev/null
+++ b/crypto/wildkeccak-cpu.cpp
@@ -0,0 +1,371 @@
+// Memory-hard extension of keccak for PoW
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Copyright (c) 2014 The Boolberry developers
+
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+// Modified for CPUminer by Lucas Jones
+// Adapted for ccminer by Tanguy Pruvot - 2016
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#include <emmintrin.h>
+#include <bignum.hpp>
+#include "int128_c.h"
+#else
+#include <x86intrin.h>
+#endif
+
+#include <miner.h>
+
+#include "xmr-rpc.h"
+
+extern uint64_t* pscratchpad_buff;
+
+struct reciprocal_value64 {
+	uint64_t m;
+	uint8_t sh1, sh2;
+};
+
+static inline int fls64(uint64_t x)
+{
+#if defined(_WIN64)
+	unsigned long bitpos = 0;
+	_BitScanReverse64(&bitpos, x);
+	return (int) (bitpos + 1);
+#elif defined(WIN32)
+	unsigned long hipos = 0, bitpos = 0;
+	uint32_t hi = x >> 32;
+	_BitScanReverse(&hipos, hi);
+	if (!hipos) {
+		_BitScanReverse(&bitpos, (uint32_t) x);
+	}
+	return (int) hipos ? hipos + 33 : bitpos + 1;
+#else
+	/*
+	* AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
+	* dest reg is undefined if x==0, but their CPU architect says its
+	* value is written to set it to the same as before.
+	*/
+	register long bitpos = -1;
+	asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x));
+	return bitpos + 1;
+#endif
+}
+
+static inline struct reciprocal_value64 reciprocal_val64(uint64_t d)
+{
+	struct reciprocal_value64 R;
+	int l;
+
+	l = fls64(d - 1);
+
+#ifdef _MSC_VER
+	uint128 v1;
+	v1.Lo = (1ULL << l) - d;v1.Hi=0;
+	uint128 v2;
+	v2.Hi = 1; v2.Lo = 0;
+
+	uint128 v;
+	mult128(v1,v2,&v);
+	divmod128by64(v.Hi,v.Lo,d,&v.Hi,&v.Lo);
+	Increment(&v);
+	R.m = (uint64_t)v.Hi;
+#else
+    __uint128_t m;
+    m = (((__uint128_t)1 << 64) * ((1ULL << l) - d));
+    m /= d;
+	++m;
+	R.m = (uint64_t)m;
+#endif
+
+	R.sh1 = min(l, 1);
+	R.sh2 = max(l - 1, 0);
+
+	return R;
+}
+
+static inline uint64_t reciprocal_divide64(uint64_t a, struct reciprocal_value64 R)
+{
+#ifdef _MSC_VER
+    uint128 v;
+	mult64to128(a,R.m,&v.Hi,&v.Lo);
+	uint64_t t = v.Hi;
+#else
+    uint64_t t = (uint64_t)(((__uint128_t)a * R.m) >> 64);
+#endif
+	return (t + ((a - t) >> R.sh1)) >> R.sh2;
+}
+
+static inline uint64_t reciprocal_remainder64(uint64_t A, uint64_t B, struct reciprocal_value64 R)
+{
+	uint64_t div, mod;
+
+	div = reciprocal_divide64(A, R);
+	mod = A - (uint64_t) (div * B);
+	if (mod >= B) mod -= B;
+	return mod;
+}
+
+//#define UNROLL_SCR_MIX
+
+static inline uint64_t rotl641(uint64_t x) { return((x << 1) | (x >> 63)); }
+static inline uint64_t rotl64_1(uint64_t x, uint64_t y) { return((x << y) | (x >> (64 - y))); }
+static inline uint64_t rotl64_2(uint64_t x, uint64_t y) { return(rotl64_1((x >> 32) | (x << 32), y)); }
+static inline uint64_t bitselect(uint64_t a, uint64_t b, uint64_t c) { return(a ^ (c & (b ^ a))); }
+
+static inline void keccakf_mul(uint64_t *s)
+{
+	uint64_t bc[5], t[5];
+	uint64_t tmp1, tmp2;
+	int i;
+
+	for(i = 0; i < 5; i++)
+		t[i] = s[i + 0] ^ s[i + 5] ^ s[i + 10] * s[i + 15] * s[i + 20];
+
+	bc[0] = t[0] ^ rotl641(t[2]);
+	bc[1] = t[1] ^ rotl641(t[3]);
+	bc[2] = t[2] ^ rotl641(t[4]);
+	bc[3] = t[3] ^ rotl641(t[0]);
+	bc[4] = t[4] ^ rotl641(t[1]);
+
+	tmp1 = s[1] ^ bc[0];
+
+	s[ 0] ^= bc[4];
+	s[ 1] = rotl64_1(s[ 6] ^ bc[0], 44);
+	s[ 6] = rotl64_1(s[ 9] ^ bc[3], 20);
+	s[ 9] = rotl64_1(s[22] ^ bc[1], 61);
+	s[22] = rotl64_1(s[14] ^ bc[3], 39);
+	s[14] = rotl64_1(s[20] ^ bc[4], 18);
+	s[20] = rotl64_1(s[ 2] ^ bc[1], 62);
+	s[ 2] = rotl64_1(s[12] ^ bc[1], 43);
+	s[12] = rotl64_1(s[13] ^ bc[2], 25);
+	s[13] = rotl64_1(s[19] ^ bc[3], 8);
+	s[19] = rotl64_1(s[23] ^ bc[2], 56);
+	s[23] = rotl64_1(s[15] ^ bc[4], 41);
+	s[15] = rotl64_1(s[ 4] ^ bc[3], 27);
+	s[ 4] = rotl64_1(s[24] ^ bc[3], 14);
+	s[24] = rotl64_1(s[21] ^ bc[0], 2);
+	s[21] = rotl64_1(s[ 8] ^ bc[2], 55);
+	s[ 8] = rotl64_1(s[16] ^ bc[0], 45);
+	s[16] = rotl64_1(s[ 5] ^ bc[4], 36);
+	s[ 5] = rotl64_1(s[ 3] ^ bc[2], 28);
+	s[ 3] = rotl64_1(s[18] ^ bc[2], 21);
+	s[18] = rotl64_1(s[17] ^ bc[1], 15);
+	s[17] = rotl64_1(s[11] ^ bc[0], 10);
+	s[11] = rotl64_1(s[ 7] ^ bc[1], 6);
+	s[ 7] = rotl64_1(s[10] ^ bc[4], 3);
+	s[10] = rotl64_1(tmp1, 1);
+
+	tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
+	tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
+	tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
+	tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
+	tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
+
+	s[0] ^= 0x0000000000000001ULL;
+}
+
+static inline void keccakf_mul_last(uint64_t *s)
+{
+	uint64_t bc[5], xormul[5];
+	uint64_t tmp1, tmp2;
+	int i;
+
+	for(i = 0; i < 5; i++)
+		xormul[i] = s[i + 0] ^ s[i + 5] ^ s[i + 10] * s[i + 15] * s[i + 20];
+
+	bc[0] = xormul[0] ^ rotl641(xormul[2]);
+	bc[1] = xormul[1] ^ rotl641(xormul[3]);
+	bc[2] = xormul[2] ^ rotl641(xormul[4]);
+	bc[3] = xormul[3] ^ rotl641(xormul[0]);
+	bc[4] = xormul[4] ^ rotl641(xormul[1]);
+
+	s[0] ^= bc[4];
+	s[1] = rotl64_2(s[6] ^ bc[0], 12);
+	s[2] = rotl64_2(s[12] ^ bc[1], 11);
+	s[4] = rotl64_1(s[24] ^ bc[3], 14);
+	s[3] = rotl64_1(s[18] ^ bc[2], 21);
+
+	tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]);
+	s[0] ^= 0x0000000000000001ULL;
+}
+
+struct reciprocal_value64 cached_recip;
+static uint64_t cached_scr_size = 0;
+
+static inline void scr_mix(uint64_t *st, uint64_t scr_size, struct reciprocal_value64 recip)
+{
+#define KK_MIXIN_SIZE 24
+	uint64_t _ALIGN(128) idx[KK_MIXIN_SIZE];
+
+#ifdef _MSC_VER
+	#define pscr pscratchpad_buff
+	int x;
+
+	// non-optimized 64bit operations
+	for (x = 0; x < KK_MIXIN_SIZE; x++) {
+		idx[x] = reciprocal_remainder64(st[x], scr_size, recip) << 2;
+	}
+	if (idx[7] > scr_size*4) {
+		applog(LOG_WARNING, "Wrong remainder64 returned by the cpu hash %016llx > %016llx",
+			(unsigned long long) idx[7], (unsigned long long) scr_size*4);
+		return;
+	}
+	for(x = 0; x < KK_MIXIN_SIZE; x += 4) {
+		st[x + 0] ^= pscr[idx[x] + 0] ^ pscr[idx[x + 1] + 0] ^ pscr[idx[x + 2] + 0] ^ pscr[idx[x + 3] + 0];
+		st[x + 1] ^= pscr[idx[x] + 1] ^ pscr[idx[x + 1] + 1] ^ pscr[idx[x + 2] + 1] ^ pscr[idx[x + 3] + 1];
+		st[x + 2] ^= pscr[idx[x] + 2] ^ pscr[idx[x + 1] + 2] ^ pscr[idx[x + 2] + 2] ^ pscr[idx[x + 3] + 2];
+		st[x + 3] ^= pscr[idx[x] + 3] ^ pscr[idx[x + 1] + 3] ^ pscr[idx[x + 2] + 3] ^ pscr[idx[x + 3] + 3];
+	}
+	return;
+
+#elif !defined(UNROLL_SCR_MIX)
+
+	#pragma GCC ivdep
+	for(int x = 0; x < 3; ++x)
+	{
+		__m128i *st0, *st1, *st2, *st3;
+
+		idx[0] = reciprocal_remainder64(st[(x << 3) + 0], scr_size, recip) << 2;
+		idx[1] = reciprocal_remainder64(st[(x << 3) + 1], scr_size, recip) << 2;
+		idx[2] = reciprocal_remainder64(st[(x << 3) + 2], scr_size, recip) << 2;
+		idx[3] = reciprocal_remainder64(st[(x << 3) + 3], scr_size, recip) << 2;
+		idx[4] = reciprocal_remainder64(st[(x << 3) + 4], scr_size, recip) << 2;
+		idx[5] = reciprocal_remainder64(st[(x << 3) + 5], scr_size, recip) << 2;
+		idx[6] = reciprocal_remainder64(st[(x << 3) + 6], scr_size, recip) << 2;
+		idx[7] = reciprocal_remainder64(st[(x << 3) + 7], scr_size, recip) << 2;
+
+		for(int y = 0; y < 8; y++) _mm_prefetch((const char*) (&pscratchpad_buff[idx[y]]), _MM_HINT_T1);
+
+		st0 = (__m128i *)&st[(x << 3) + 0];
+		st1 = (__m128i *)&st[(x << 3) + 2];
+		st2 = (__m128i *)&st[(x << 3) + 4];
+		st3 = (__m128i *)&st[(x << 3) + 6];
+
+		*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[0]]));
+		*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[1]]));
+		*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[2]]));
+		*st0 = _mm_xor_si128(*st0, *((__m128i *)&pscratchpad_buff[idx[3]]));
+
+		*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[0] + 2]));
+		*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[1] + 2]));
+		*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[2] + 2]));
+		*st1 = _mm_xor_si128(*st1, *((__m128i *)&pscratchpad_buff[idx[3] + 2]));
+
+		*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[4]]));
+		*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[5]]));
+		*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[6]]));
+		*st2 = _mm_xor_si128(*st2, *((__m128i *)&pscratchpad_buff[idx[7]]));
+
+		*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[4] + 2]));
+		*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[5] + 2]));
+		*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[6] + 2]));
+		*st3 = _mm_xor_si128(*st3, *((__m128i *)&pscratchpad_buff[idx[7] + 2]));
+	}
+
+#else
+	#warning using AVX2 optimizations
+
+	idx[ 0] = reciprocal_remainder64(st[0], scr_size, recip) << 2;
+	idx[ 1] = reciprocal_remainder64(st[1], scr_size, recip) << 2;
+	idx[ 2] = reciprocal_remainder64(st[2], scr_size, recip) << 2;
+	idx[ 3] = reciprocal_remainder64(st[3], scr_size, recip) << 2;
+	idx[ 4] = reciprocal_remainder64(st[4], scr_size, recip) << 2;
+	idx[ 5] = reciprocal_remainder64(st[5], scr_size, recip) << 2;
+	idx[ 6] = reciprocal_remainder64(st[6], scr_size, recip) << 2;
+	idx[ 7] = reciprocal_remainder64(st[7], scr_size, recip) << 2;
+
+	for(int y = 0; y < 8; y++) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
+
+	idx[ 8] = reciprocal_remainder64(st[8], scr_size, recip) << 2;
+	idx[ 9] = reciprocal_remainder64(st[9], scr_size, recip) << 2;
+	idx[10] = reciprocal_remainder64(st[10], scr_size, recip) << 2;
+	idx[11] = reciprocal_remainder64(st[11], scr_size, recip) << 2;
+	idx[12] = reciprocal_remainder64(st[12], scr_size, recip) << 2;
+	idx[13] = reciprocal_remainder64(st[13], scr_size, recip) << 2;
+	idx[14] = reciprocal_remainder64(st[14], scr_size, recip) << 2;
+	idx[15] = reciprocal_remainder64(st[15], scr_size, recip) << 2;
+
+	for(int y = 8; y < 16; ++y) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
+
+	idx[16] = reciprocal_remainder64(st[16], scr_size, recip) << 2;
+	idx[17] = reciprocal_remainder64(st[17], scr_size, recip) << 2;
+	idx[18] = reciprocal_remainder64(st[18], scr_size, recip) << 2;
+	idx[19] = reciprocal_remainder64(st[19], scr_size, recip) << 2;
+	idx[20] = reciprocal_remainder64(st[20], scr_size, recip) << 2;
+	idx[21] = reciprocal_remainder64(st[21], scr_size, recip) << 2;
+	idx[22] = reciprocal_remainder64(st[22], scr_size, recip) << 2;
+	idx[23] = reciprocal_remainder64(st[23], scr_size, recip) << 2;
+
+	for(int y = 16; y < 24; ++y) _mm_prefetch(&pscratchpad_buff[idx[y]], _MM_HINT_T1);
+
+	__m256i *st0 = (__m256i *)&st[0];
+
+	for(int x = 0; x < 6; ++x)
+	{
+		*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 0]]));
+		*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 1]]));
+		*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 2]]));
+		*st0 = _mm256_xor_si256(*st0, *((__m256i *)&pscratchpad_buff[idx[(x << 2) + 3]]));
+		++st0;
+	}
+
+#endif
+	return;
+}
+
+static void wild_keccak_hash_dbl(uint8_t * __restrict md, const uint8_t * __restrict in)
+{
+	uint64_t _ALIGN(32) st[25];
+	uint64_t scr_size, i;
+	struct reciprocal_value64 recip;
+
+	scr_size = scratchpad_size >> 2;
+	if (scr_size == cached_scr_size)
+		recip = cached_recip;
+	else {
+		cached_recip = recip = reciprocal_val64(scr_size);
+		cached_scr_size = scr_size;
+	}
+
+	// Wild Keccak #1
+	memcpy(st, in, 88);
+	st[10] = (st[10] & 0x00000000000000FFULL) | 0x0000000000000100ULL;
+	memset(&st[11], 0, 112);
+	st[16] |= 0x8000000000000000ULL;
+
+	for(i = 0; i < 23; i++) {
+		keccakf_mul(st);
+		scr_mix(st, scr_size, recip);
+	}
+
+	keccakf_mul_last(st);
+
+	// Wild Keccak #2
+	memset(&st[4], 0x00, 168);
+	st[ 4] = 0x0000000000000001ULL;
+	st[16] = 0x8000000000000000ULL;
+
+	for(i = 0; i < 23; i++) {
+		keccakf_mul(st);
+		scr_mix(st, scr_size, recip);
+	}
+
+	keccakf_mul_last(st);
+
+	memcpy(md, st, 32);
+	return;
+}
+
+void wildkeccak_hash(void* output, const void* input, uint64_t* scratchpad, uint64_t ssize)
+{
+	if (scratchpad) pscratchpad_buff = scratchpad;
+	if (!scratchpad_size) scratchpad_size = ssize;
+	wild_keccak_hash_dbl((uint8_t*)output, (uint8_t*)input);
+}
diff --git a/crypto/wildkeccak.cu b/crypto/wildkeccak.cu
new file mode 100644
index 0000000..fae66ae
--- /dev/null
+++ b/crypto/wildkeccak.cu
@@ -0,0 +1,369 @@
+extern "C" {
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+}
+
+#include <miner.h>
+#include <cuda_helper.h>
+#include <cuda_vector_uint2x4.h> // todo
+
+#include "wildkeccak.h"
+
+extern char *device_config[MAX_GPUS]; // -l
+extern uint64_t* pscratchpad_buff;
+
+static uint64_t*    d_input[MAX_GPUS];
+static uint32_t*    d_retnonce[MAX_GPUS];
+static ulonglong4*  d_scratchpad[MAX_GPUS];
+
+static uint64_t*    h_scratchpad[MAX_GPUS] = { 0 };
+static cudaStream_t bufpad_stream[MAX_GPUS] = { 0 };
+static cudaStream_t kernel_stream[MAX_GPUS] = { 0 };
+
+uint64_t scratchpad_size = 0;
+
+uint32_t WK_CUDABlocks   = 64;
+uint32_t WK_CUDAThreads  = 256;
+
+#define st0 	vst0.x
+#define st1 	vst0.y
+#define st2 	vst0.z
+#define st3 	vst0.w
+
+#define st4 	vst4.x
+#define st5 	vst4.y
+#define st6 	vst4.z
+#define st7 	vst4.w
+
+#define st8 	vst8.x
+#define st9 	vst8.y
+#define st10	vst8.z
+#define st11	vst8.w
+
+#define st12	vst12.x
+#define st13	vst12.y
+#define st14	vst12.z
+#define st15	vst12.w
+
+#define st16	vst16.x
+#define st17	vst16.y
+#define st18	vst16.z
+#define st19	vst16.w
+
+#define st20	vst20.x
+#define st21	vst20.y
+#define st22	vst20.z
+#define st23	vst20.w
+
+#if __CUDA_ARCH__ >= 320
+
+__device__ __forceinline__ uint64_t cuda_rotl641(const uint64_t value)
+{
+	uint2 result;
+	asm("shf.l.wrap.b32 %0, %1, %2, 1U;" : "=r"(result.x)
+		: "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))));
+	asm("shf.l.wrap.b32 %0, %1, %2, 1U;" : "=r"(result.y)
+		: "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))));
+	return __double_as_longlong(__hiloint2double(result.y, result.x));
+}
+
+#else
+__noinline__ __device__ uint64_t cuda_rotl641(const uint64_t x) { return((x << 1) | (x >> 63)); }
+#endif
+
+__noinline__ __device__ uint64_t bitselect(const uint64_t a, const uint64_t b, const uint64_t c) { return(a ^ (c & (b ^ a))); }
+
+#define ROTL641(x) (cuda_rotl641(x))
+
+#define RND() \
+	bc[0] = st0 ^ st5 ^ st10 * st15 * st20 ^ ROTL641(st2 ^ st7 ^ st12 * st17 * st22); \
+	bc[1] = st1 ^ st6 ^ st11 * st16 * st21 ^ ROTL641(st3 ^ st8 ^ st13 * st18 * st23); \
+	bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL641(st4 ^ st9 ^ st14 * st19 * st24); \
+	bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL641(st0 ^ st5 ^ st10 * st15 * st20); \
+	bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL641(st1 ^ st6 ^ st11 * st16 * st21); \
+	tmp1 = st1 ^ bc[0]; \
+	\
+	st0  ^= bc[4]; \
+	st1  = ROTL64(st6  ^ bc[0], 44); \
+	st6  = ROTL64(st9  ^ bc[3], 20); \
+	st9  = ROTL64(st22 ^ bc[1], 61); \
+	st22 = ROTL64(st14 ^ bc[3], 39); \
+	st14 = ROTL64(st20 ^ bc[4], 18); \
+	st20 = ROTL64(st2  ^ bc[1], 62); \
+	st2  = ROTL64(st12 ^ bc[1], 43); \
+	st12 = ROTL64(st13 ^ bc[2], 25); \
+	st13 = ROTL64(st19 ^ bc[3], 8); \
+	st19 = ROTL64(st23 ^ bc[2], 56); \
+	st23 = ROTL64(st15 ^ bc[4], 41); \
+	st15 = ROTL64(st4  ^ bc[3], 27); \
+	st4  = ROTL64(st24 ^ bc[3], 14); \
+	st24 = ROTL64(st21 ^ bc[0], 2); \
+	st21 = ROTL64(st8  ^ bc[2], 55); \
+	st8  = ROTL64(st16 ^ bc[0], 45); \
+	st16 = ROTL64(st5  ^ bc[4], 36); \
+	st5  = ROTL64(st3  ^ bc[2], 28); \
+	st3  = ROTL64(st18 ^ bc[2], 21); \
+	st18 = ROTL64(st17 ^ bc[1], 15); \
+	st17 = ROTL64(st11 ^ bc[0], 10); \
+	st11 = ROTL64(st7  ^ bc[1], 6); \
+	st7  = ROTL64(st10 ^ bc[4], 3); \
+	st10 = ROTL641(tmp1); \
+	\
+	tmp1 = st0; tmp2 = st1; st0 = bitselect(st0 ^ st2, st0, st1); st1 = bitselect(st1 ^ st3, st1, st2); st2 = bitselect(st2 ^ st4, st2, st3); st3 = bitselect(st3 ^ tmp1, st3, st4); st4 = bitselect(st4 ^ tmp2, st4, tmp1); \
+	tmp1 = st5; tmp2 = st6; st5 = bitselect(st5 ^ st7, st5, st6); st6 = bitselect(st6 ^ st8, st6, st7); st7 = bitselect(st7 ^ st9, st7, st8); st8 = bitselect(st8 ^ tmp1, st8, st9); st9 = bitselect(st9 ^ tmp2, st9, tmp1); \
+	tmp1 = st10; tmp2 = st11; st10 = bitselect(st10 ^ st12, st10, st11); st11 = bitselect(st11 ^ st13, st11, st12); st12 = bitselect(st12 ^ st14, st12, st13); st13 = bitselect(st13 ^ tmp1, st13, st14); st14 = bitselect(st14 ^ tmp2, st14, tmp1); \
+	tmp1 = st15; tmp2 = st16; st15 = bitselect(st15 ^ st17, st15, st16); st16 = bitselect(st16 ^ st18, st16, st17); st17 = bitselect(st17 ^ st19, st17, st18); st18 = bitselect(st18 ^ tmp1, st18, st19); st19 = bitselect(st19 ^ tmp2, st19, tmp1); \
+	tmp1 = st20; tmp2 = st21; st20 = bitselect(st20 ^ st22, st20, st21); st21 = bitselect(st21 ^ st23, st21, st22); st22 = bitselect(st22 ^ st24, st22, st23); st23 = bitselect(st23 ^ tmp1, st23, st24); st24 = bitselect(st24 ^ tmp2, st24, tmp1); \
+	st0 ^= 1;
+
+#define LASTRND1() \
+	bc[0] = st0 ^ st5 ^ st10 * st15 * st20 ^ ROTL64(st2 ^ st7 ^ st12 * st17 * st22, 1); \
+	bc[1] = st1 ^ st6 ^ st11 * st16 * st21 ^ ROTL64(st3 ^ st8 ^ st13 * st18 * st23, 1); \
+	bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL64(st4 ^ st9 ^ st14 * st19 * st24, 1); \
+	bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL64(st0 ^ st5 ^ st10 * st15 * st20, 1); \
+	bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL64(st1 ^ st6 ^ st11 * st16 * st21, 1); \
+	\
+	st0 ^= bc[4]; \
+	st1 = ROTL64(st6 ^ bc[0], 44); \
+	st2 = ROTL64(st12 ^ bc[1], 43); \
+	st4 = ROTL64(st24 ^ bc[3], 14); \
+	st3 = ROTL64(st18 ^ bc[2], 21); \
+	\
+	tmp1 = st0; st0 = bitselect(st0 ^ st2, st0, st1); st1 = bitselect(st1 ^ st3, st1, st2); st2 = bitselect(st2 ^ st4, st2, st3); st3 = bitselect(st3 ^ tmp1, st3, st4); \
+	st0 ^= 1;
+
+#define LASTRND2() \
+	bc[2] = st2 ^ st7 ^ st12 * st17 * st22 ^ ROTL64(st4 ^ st9 ^ st14 * st19 * st24, 1); \
+	bc[3] = st3 ^ st8 ^ st13 * st18 * st23 ^ ROTL64(st0 ^ st5 ^ st10 * st15 * st20, 1); \
+	bc[4] = st4 ^ st9 ^ st14 * st19 * st24 ^ ROTL64(st1 ^ st6 ^ st11 * st16 * st21, 1); \
+	\
+	st0 ^= bc[4]; \
+	st4 = ROTL64(st24 ^ bc[3], 14); \
+	st3 = ROTL64(st18 ^ bc[2], 21); \
+	st3 = bitselect(st3 ^ st0, st3, st4);
+
+__device__ ulonglong4 operator^(const ulonglong4 &a, const ulonglong4 &b)
+{
+	return(make_ulonglong4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w));
+}
+
+#define MIX(vst) vst = vst ^ scratchpad[vst.x % scr_size] ^ scratchpad[vst.y % scr_size] ^ scratchpad[vst.z % scr_size] ^ scratchpad[vst.w % scr_size];
+
+#define MIX_ALL MIX(vst0); MIX(vst4); MIX(vst8); MIX(vst12); MIX(vst16); MIX(vst20);
+
+__global__
+void wk(uint32_t* __restrict__ retnonce, const uint64_t* __restrict__ input, const ulonglong4* __restrict__ scratchpad,
+	const uint32_t scr_size, const uint32_t target, uint64_t startNonce)
+{
+	ulonglong4 vst0, vst4, vst8, vst12, vst16, vst20;
+	uint64_t bc[5];
+	uint64_t st24, tmp1, tmp2;
+
+	const uint64_t nonce = startNonce + (blockDim.x * blockIdx.x) + threadIdx.x;
+	vst0  = make_ulonglong4((nonce << 8) + (input[0] & 0xFF), input[1] & 0xFFFFFFFFFFFFFF00ULL, input[2], input[3]);
+	vst4  = make_ulonglong4(input[4], input[5], input[6], input[7]);
+	vst8  = make_ulonglong4(input[8], input[9], (input[10] & 0xFF) | 0x100, 0);
+	vst12 = make_ulonglong4(0, 0, 0, 0);
+	vst16 = make_ulonglong4(0x8000000000000000ULL, 0, 0, 0);
+	vst20 = make_ulonglong4(0, 0, 0, 0);
+	st24  = 0;
+
+	RND();
+	MIX_ALL;
+
+	for(int i = 0; i < 22; i++) {
+		RND();
+		MIX_ALL;
+	}
+
+	LASTRND1();
+
+	vst4  = make_ulonglong4(1, 0, 0, 0);
+	vst8  = make_ulonglong4(0, 0, 0, 0);
+	vst12 = make_ulonglong4(0, 0, 0, 0);
+	vst16 = make_ulonglong4(0x8000000000000000ULL, 0, 0, 0);
+	vst20 = make_ulonglong4(0, 0, 0, 0);
+	st24  = 0;
+
+	RND();
+	MIX_ALL;
+
+	#pragma unroll
+	for(int i = 0; i < 22; i++) {
+		RND();
+		MIX_ALL;
+	}
+
+	LASTRND2();
+
+	if((st3 >> 32) <= target) {
+		retnonce[0] = (uint32_t) nonce;
+		retnonce[1] = retnonce[0];
+	}
+}
+
+__host__
+void wildkeccak_kernel(const int thr_id, const uint32_t threads, const uint32_t startNounce, const uint2 target, uint32_t *resNonces)
+{
+	CUDA_SAFE_CALL(cudaMemsetAsync(d_retnonce[thr_id], 0xff, 2 * sizeof(uint32_t), kernel_stream[thr_id]));
+
+	const uint32_t threadsperblock = WK_CUDAThreads;
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	wk <<<grid, block, 0, kernel_stream[thr_id]>>> (d_retnonce[thr_id], d_input[thr_id], d_scratchpad[thr_id],
+		(uint32_t)(scratchpad_size >> 2), target.y, startNounce);
+
+	cudaMemcpyAsync(resNonces, d_retnonce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost, kernel_stream[thr_id]);
+}
+
+static bool init[MAX_GPUS] = { 0 };
+
+extern "C" int scanhash_wildkeccak(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	//uint32_t _ALIGN(64) endiandata[20];
+	uint32_t *ptarget = work->target;
+	uint32_t throughput = 0;
+	uint64_t n, nonce, first;
+	uint8_t *pdata = (uint8_t*) work->data;
+	memcpy(&first, &pdata[1], 8);
+	//memcpy(&n, &pdata[1], 4);;
+	n = nonce = first;
+//	pdata[5] = thr_id;
+//	memcpy(&nonce, &pdata[1], 8);
+
+	if (!scratchpad_size || !h_scratchpad[thr_id]) {
+		if (h_scratchpad[thr_id])
+			applog(LOG_ERR, "Scratchpad size is not set!");
+		work->data[0] = 0; // invalidate
+		sleep(1);
+		return -EBUSY;
+	}
+
+	if (!init[thr_id]) {
+
+		if (device_config[thr_id]) {
+			sscanf(device_config[thr_id], "%ux%u", &WK_CUDABlocks, &WK_CUDAThreads);
+			gpulog(LOG_INFO, thr_id, "Using %u x %u threads kernel launch config", WK_CUDABlocks, WK_CUDAThreads);
+		} else {
+			throughput = cuda_default_throughput(thr_id, WK_CUDABlocks*WK_CUDAThreads);
+			gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
+		}
+
+		cudaSetDevice(device_map[thr_id]);
+		if (opt_cudaschedule == -1 && gpu_threads == 1) {
+			cudaDeviceReset();
+			// reduce cpu usage (linux)
+			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+			cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+			CUDA_LOG_ERROR();
+		}
+
+		CUDA_SAFE_CALL(cudaMalloc(&d_input[thr_id], 88));
+		CUDA_SAFE_CALL(cudaMalloc(&d_retnonce[thr_id], 2*sizeof(uint32_t)));
+
+		int status = (int) cudaMalloc(&d_scratchpad[thr_id], WILD_KECCAK_SCRATCHPAD_BUFFSIZE);
+		if (status != cudaSuccess) {
+			gpulog(LOG_ERR, thr_id, "Unable to allocate device memory, %u MB, err %d",
+				(uint32_t) (WILD_KECCAK_SCRATCHPAD_BUFFSIZE/(1024*1024)), status);
+			exit(-ENOMEM);
+		}
+
+		cudaStreamCreate(&bufpad_stream[thr_id]);
+		cudaStreamCreate(&kernel_stream[thr_id]);
+
+		CUDA_SAFE_CALL(cudaMemcpyAsync(d_scratchpad[thr_id], h_scratchpad[thr_id], scratchpad_size << 3, cudaMemcpyHostToDevice, bufpad_stream[thr_id]));
+
+		init[thr_id] = true;
+	}
+
+	throughput = cuda_default_throughput(thr_id, WK_CUDABlocks*WK_CUDAThreads);
+
+	cudaMemcpy(d_input[thr_id], pdata, 88, cudaMemcpyHostToDevice);
+//	cudaMemset(d_retnonce[thr_id], 0xFF, 2*sizeof(uint32_t));
+
+	if (h_scratchpad[thr_id]) {
+		cudaStreamSynchronize(bufpad_stream[thr_id]);
+	}
+
+	do {
+//		const uint32_t blocks = WK_CUDABlocks, threads = WK_CUDAThreads;
+//		const dim3 block(blocks);
+//		const dim3 thread(threads);
+		uint32_t h_retnonce[2] = { UINT32_MAX, UINT32_MAX };
+		uint2 target = make_uint2(ptarget[6], ptarget[7]);
+
+		wildkeccak_kernel(thr_id, throughput, nonce, target, h_retnonce);
+		/*
+		wk <<<block, thread, 0, kernel_stream[thr_id]>>> (d_retnonce[thr_id], d_input[thr_id], d_scratchpad[thr_id],
+			(uint32_t)(scratchpad_size >> 2), nonce, ptarget[7]);
+		*/
+
+		*hashes_done = (unsigned long) (n - first + throughput);
+
+		cudaStreamSynchronize(kernel_stream[thr_id]);
+		if(h_retnonce[0] != UINT32_MAX) {
+			uint8_t _ALIGN(64) cpuhash[32];
+			uint32_t* vhash = (uint32_t*) cpuhash;
+			uint64_t nonce64;
+			memcpy(&pdata[1], &h_retnonce[0], sizeof(uint32_t));
+			memcpy(&nonce64, &pdata[1], 8);
+			wildkeccak_hash(cpuhash, pdata, pscratchpad_buff, scratchpad_size);
+			if (!cpuhash[31] && vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
+				work_set_target_ratio(work, vhash);
+				//applog_hex(pdata,   84);
+				//applog_hex(cpuhash, 32);
+				//applog_hex(ptarget, 32);
+				memcpy(work->nonces, &nonce64, 8);
+				if (n + throughput > max_nonce) {
+					*hashes_done = (unsigned long) (max_nonce - first);
+				}
+				return 1;
+			}
+		}
+
+		if (n + throughput >= max_nonce) {
+			n = max_nonce;
+			break;
+		}
+
+		n += throughput;
+		nonce += throughput;
+
+	} while(!work_restart[thr_id].restart);
+
+	*hashes_done = (unsigned long) (n - first + 1);
+	return 0;
+}
+
+void wildkeccak_scratchpad_need_update(uint64_t* pscratchpad_buff)
+{
+	for(int i = 0; i < opt_n_threads; i++) {
+		h_scratchpad[i] = pscratchpad_buff;
+		if (init[i]) {
+			gpulog(LOG_DEBUG, i, "Starting scratchpad update...");
+			cudaMemcpyAsync(d_scratchpad[i], h_scratchpad[i], scratchpad_size << 3, cudaMemcpyHostToDevice, bufpad_stream[i]);
+			work_restart[i].restart = true;
+		}
+	}
+}
+
+void free_wildkeccak(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaThreadSynchronize();
+
+	cudaFree(d_scratchpad[thr_id]);
+	cudaFree(d_input[thr_id]);
+	cudaFree(d_retnonce[thr_id]);
+
+	cudaStreamDestroy(bufpad_stream[thr_id]);
+	cudaStreamDestroy(kernel_stream[thr_id]);
+
+	cudaDeviceSynchronize();
+
+	init[thr_id] = false;
+}
diff --git a/crypto/wildkeccak.h b/crypto/wildkeccak.h
new file mode 100644
index 0000000..3cfe9d3
--- /dev/null
+++ b/crypto/wildkeccak.h
@@ -0,0 +1,10 @@
+
+#define WILD_KECCAK_SCRATCHPAD_BUFFSIZE   1ULL << 28
+#define WILD_KECCAK_ADDENDUMS_ARRAY_SIZE  10
+
+extern uint64_t scratchpad_size;
+
+extern uint32_t WK_CUDABlocks, WK_CUDAThreads;
+
+void wildkeccak_scratchpad_need_update(uint64_t* pscratchpad_buff);
+
diff --git a/crypto/xmr-rpc.cpp b/crypto/xmr-rpc.cpp
new file mode 100644
index 0000000..bae2001
--- /dev/null
+++ b/crypto/xmr-rpc.cpp
@@ -0,0 +1,1257 @@
+/**
+ * XMR RPC 2.0 Specific Stuff
+ * tpruvot@github - October 2016
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h> // mkdir
+
+#include <miner.h>
+
+#ifdef _MSC_VER
+#include "mman.h" // mmap
+#include <direct.h> // _mkdir
+#define chdir(x) _chdir(x)
+#define mkdir(x) _mkdir(x)
+#define getcwd(d,sz) _getcwd(d,sz)
+#define unlink(x) _unlink(x)
+#define PATH_MAX MAX_PATH
+#else
+#include <sys/mman.h> // mmap
+#endif
+
+#ifndef PRIu64
+#define PRIu64 "I64u"
+#endif
+
+#include "xmr-rpc.h"
+#include "wildkeccak.h"
+
+//#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+extern struct stratum_ctx stratum;
+
+bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen);
+
+pthread_mutex_t rpc2_job_lock;
+pthread_mutex_t rpc2_work_lock;
+pthread_mutex_t rpc2_login_lock;
+pthread_mutex_t rpc2_getscratchpad_lock;
+
+char* opt_scratchpad_url = NULL;
+uint64_t* pscratchpad_buff = NULL;
+
+// hide addendums flood on start
+static bool opt_quiet_start = true;
+
+static const char * pscratchpad_local_cache = NULL;
+static const char cachedir_suffix[] = "boolberry"; /* scratchpad cache saved as ~/.cache/boolberry/scratchpad.bin */
+static char scratchpad_file[PATH_MAX];
+static time_t prev_save = 0;
+static struct scratchpad_hi current_scratchpad_hi;
+static struct addendums_array_entry add_arr[WILD_KECCAK_ADDENDUMS_ARRAY_SIZE];
+
+static char *rpc2_job_id = NULL;
+static char *rpc2_blob = NULL;
+static uint32_t rpc2_target = 0;
+static size_t rpc2_bloblen = 0;
+static struct work rpc2_work;
+
+static char rpc2_id[64] = { 0 };
+static uint64_t last_found_nonce = 0;
+
+static const char* get_json_string_param(const json_t *val, const char* param_name)
+{
+	json_t *tmp;
+	tmp = json_object_get(val, param_name);
+	if(!tmp) {
+		return NULL;
+	}
+	return json_string_value(tmp);
+}
+
+static size_t hex2bin_len(unsigned char *p, const char *hexstr, size_t len)
+{
+	char hex_byte[3];
+	char *ep;
+	size_t count = 0;
+
+	hex_byte[2] = '\0';
+
+	while (*hexstr && len) {
+		if (!hexstr[1]) {
+			applog(LOG_ERR, "hex2bin str truncated");
+			return 0;
+		}
+		hex_byte[0] = hexstr[0];
+		hex_byte[1] = hexstr[1];
+		*p = (unsigned char) strtol(hex_byte, &ep, 16);
+		if (*ep) {
+			applog(LOG_ERR, "hex2bin failed on '%s'", hex_byte);
+			return 0;
+		}
+		count++;
+		p++;
+		hexstr += 2;
+		len--;
+	}
+
+	return (/*len == 0 &&*/ *hexstr == 0) ? count : 0;
+}
+
+static bool parse_height_info(const json_t *hi_section, struct scratchpad_hi* phi)
+{
+	unsigned char prevhash[32] = { 0 };
+	const char* block_id;
+	uint64_t hi_h;
+	size_t len;
+
+	if(!phi || !hi_section) {
+		applog(LOG_ERR, "parse_height_info: wrong params");
+		return false;
+	}
+
+	json_t *height = json_object_get(hi_section, "height");
+	if(!height) {
+		applog(LOG_ERR, "JSON inval hi, no height param");
+		goto err_out;
+	}
+
+	if(!json_is_integer(height)) {
+		applog(LOG_ERR, "JSON inval hi: height is not integer ");
+		goto err_out;
+	}
+
+	hi_h = (uint64_t)json_integer_value(height);
+	if(!hi_h) {
+		applog(LOG_ERR, "JSON inval hi: height is 0");
+		goto err_out;
+	}
+
+	block_id = get_json_string_param(hi_section, "block_id");
+	if(!block_id) {
+		applog(LOG_ERR, "JSON inval hi: block_id not found ");
+		goto err_out;
+	}
+
+	len = hex2bin_len(prevhash, block_id, 32);
+	if(len != 32) {
+		applog(LOG_ERR, "JSON inval hi: block_id wrong len %d", len);
+		goto err_out;
+	}
+
+	phi->height = hi_h;
+	memcpy(phi->prevhash, prevhash, 32);
+
+	return true;
+err_out:
+	return false;
+}
+
+static void reset_scratchpad(void)
+{
+	current_scratchpad_hi.height = 0;
+	scratchpad_size = 0;
+	//unlink(scratchpad_file);
+}
+
+static bool patch_scratchpad_with_addendum(uint64_t global_add_startpoint, uint64_t* padd_buff, size_t count/*uint64 units*/)
+{
+	for(size_t i = 0; i < count; i += 4) {
+		uint64_t global_offset = (padd_buff[i]%(global_add_startpoint/4))*4;
+		for(size_t j = 0; j != 4; j++)
+			pscratchpad_buff[global_offset + j] ^= padd_buff[i + j];
+	}
+	return true;
+}
+
+static bool apply_addendum(uint64_t* padd_buff, size_t count/*uint64 units*/)
+{
+	if(WILD_KECCAK_SCRATCHPAD_BUFFSIZE <= (scratchpad_size + count)*8 ) {
+		applog(LOG_ERR, "!!!!!!! WILD_KECCAK_SCRATCHPAD_BUFFSIZE overflowed !!!!!!!! please increase this constant! ");
+		return false;
+	}
+
+	if(!patch_scratchpad_with_addendum(scratchpad_size, padd_buff, count)) {
+		applog(LOG_ERR, "patch_scratchpad_with_addendum is broken, resetting scratchpad");
+		reset_scratchpad();
+		return false;
+	}
+	for(int k = 0; k != count; k++)
+		pscratchpad_buff[scratchpad_size+k] = padd_buff[k];
+
+	scratchpad_size += count;
+
+	return true;
+}
+
+static bool pop_addendum(struct addendums_array_entry* entry)
+{
+	if(!entry)
+		return false;
+
+	if(!entry->add_size || !entry->prev_hi.height) {
+		applog(LOG_ERR, "wrong parameters");
+		return false;
+	}
+	patch_scratchpad_with_addendum(scratchpad_size - entry->add_size, &pscratchpad_buff[scratchpad_size - entry->add_size], (size_t) entry->add_size);
+	scratchpad_size = scratchpad_size - entry->add_size;
+	memcpy(&current_scratchpad_hi, &entry->prev_hi, sizeof(entry->prev_hi));
+
+	memset(entry, 0, sizeof(struct addendums_array_entry));
+	return true;
+}
+
+// playback scratchpad addendums for whole add_arr
+static bool revert_scratchpad()
+{
+	size_t p = 0;
+	size_t i = 0;
+	size_t arr_size = ARRAY_SIZE(add_arr);
+
+	for(p=0; p != arr_size; p++) {
+		i = arr_size-(p+1);
+		if(!add_arr[i].prev_hi.height)
+			continue;
+		pop_addendum(&add_arr[i]);
+	}
+	return true;
+}
+
+static bool push_addendum_info(struct scratchpad_hi* pprev_hi, uint64_t size /* uint64 units count*/)
+{
+	size_t i = 0;
+	size_t arr_size = ARRAY_SIZE(add_arr);
+
+	// Find last free entry
+	for(i=0; i != arr_size; i++) {
+		if(!add_arr[i].prev_hi.height)
+			break;
+	}
+
+	if(i >= arr_size) {
+		// Shift array
+		memmove(&add_arr[0], &add_arr[1], (arr_size-1)*sizeof(add_arr[0]));
+		i = arr_size - 1;
+	}
+	add_arr[i].prev_hi = *pprev_hi;
+	add_arr[i].add_size = size;
+
+	return true;
+}
+
+static bool addendum_decode(const json_t *addm)
+{
+	struct scratchpad_hi hi;
+	unsigned char prevhash[32];
+	uint64_t* padd_buff;
+	uint64_t old_height;
+
+	json_t* hi_section = json_object_get(addm, "hi");
+	if (!hi_section) {
+		//applog(LOG_ERR, "JSON addms field not found");
+		//return false;
+		return true;
+	}
+
+	if(!parse_height_info(hi_section, &hi)) {
+		return false;
+	}
+
+	const char* prev_id_str = get_json_string_param(addm, "prev_id");
+	if(!prev_id_str) {
+		applog(LOG_ERR, "JSON prev_id is not a string");
+		return false;
+	}
+	if(!hex2bin(prevhash, prev_id_str, 32)) {
+		applog(LOG_ERR, "JSON prev_id is not valid hex string");
+		return false;
+	}
+
+	if(current_scratchpad_hi.height != hi.height -1)
+	{
+		if(current_scratchpad_hi.height > hi.height -1) {
+			//skip low scratchpad
+			applog(LOG_ERR, "addendum with hi.height=%lld skiped since current_scratchpad_hi.height=%lld", hi.height, current_scratchpad_hi.height);
+			return true;
+		}
+
+		//TODO: ADD SPLIT HANDLING HERE
+		applog(LOG_ERR, "JSON height in addendum-1 (%lld-1) missmatched with current_scratchpad_hi.height(%lld), reverting scratchpad and re-login",
+			hi.height, current_scratchpad_hi.height);
+		revert_scratchpad();
+		//init re-login
+		strcpy(rpc2_id, "");
+		return false;
+	}
+
+	if(memcmp(prevhash, current_scratchpad_hi.prevhash, 32)) {
+		//TODO: ADD SPLIT HANDLING HERE
+		applog(LOG_ERR, "JSON prev_id in addendum missmatched with current_scratchpad_hi.prevhash");
+		return false;
+	}
+
+	const char* addm_hexstr = get_json_string_param(addm, "addm");
+	if(!addm_hexstr) {
+		applog(LOG_ERR, "JSON prev_id in addendum missmatched with current_scratchpad_hi.prevhash");
+		return false;
+	}
+	size_t add_len = strlen(addm_hexstr);
+	if(add_len%64) {
+		applog(LOG_ERR, "JSON wrong addm hex str len");
+		return false;
+	}
+	padd_buff = (uint64_t*) calloc(1, add_len/2);
+	if (!padd_buff) {
+		applog(LOG_ERR, "out of memory, wanted %zu", add_len/2);
+		return false;
+	}
+
+	if(!hex2bin((unsigned char*)padd_buff, addm_hexstr, add_len/2)) {
+		applog(LOG_ERR, "JSON wrong addm hex str len");
+		goto err_out;
+	}
+
+	if(!apply_addendum(padd_buff, add_len/16)) {
+		applog(LOG_ERR, "JSON Failed to apply_addendum!");
+		goto err_out;
+	}
+	free(padd_buff);
+
+	push_addendum_info(&current_scratchpad_hi, add_len/16);
+	old_height = current_scratchpad_hi.height;
+	current_scratchpad_hi = hi;
+
+	if (!opt_quiet && !opt_quiet_start)
+		applog(LOG_BLUE, "ADDENDUM APPLIED: %lld --> %lld", old_height, current_scratchpad_hi.height);
+
+	return true;
+err_out:
+	free(padd_buff);
+	return false;
+}
+
+static bool addendums_decode(const json_t *job)
+{
+	json_t* paddms = json_object_get(job, "addms");
+	if (!paddms) {
+		//applog(LOG_ERR, "JSON addms field not found");
+		//return false;
+		return true;
+	}
+
+	if(!json_is_array(paddms)) {
+		applog(LOG_ERR, "JSON addms field is not array");
+		return false;
+	}
+
+	size_t add_sz = json_array_size(paddms);
+	for (size_t i = 0; i < add_sz; i++)
+	{
+		json_t *addm = json_array_get(paddms, i);
+		if (!addm) {
+			applog(LOG_ERR, "Internal error: failed to get addm");
+			return false;
+		}
+		if(!addendum_decode(addm))
+			return false;
+	}
+
+	return true;
+}
+
+bool rpc2_job_decode(const json_t *job, struct work *work)
+{
+	json_t *tmp;
+	size_t blobLen;
+	const char *job_id;
+	const char *hexblob;
+
+	tmp = json_object_get(job, "job_id");
+	if (!tmp) {
+		applog(LOG_ERR, "JSON inval job id");
+		goto err_out;
+	}
+
+	if(!addendums_decode(job)) {
+		applog(LOG_ERR, "JSON failed to process addendums");
+		goto err_out;
+	}
+	// now allow ADDENDUM notices (after the init)
+	opt_quiet_start = false;
+
+	job_id = json_string_value(tmp);
+	tmp = json_object_get(job, "blob");
+	if (!tmp) {
+		applog(LOG_ERR, "JSON inval blob");
+		goto err_out;
+	}
+	hexblob = json_string_value(tmp);
+	blobLen = strlen(hexblob);
+	if (blobLen % 2 != 0 || ((blobLen / 2) < 40 && blobLen != 0) || (blobLen / 2) > 128)
+	{
+		applog(LOG_ERR, "JSON invalid blob length");
+		goto err_out;
+	}
+	if (blobLen != 0)
+	{
+		pthread_mutex_lock(&rpc2_job_lock);
+		char *blob = (char*) calloc(1, blobLen / 2);
+		if (!hex2bin(blob, hexblob, blobLen / 2))
+		{
+			applog(LOG_ERR, "JSON inval blob");
+			pthread_mutex_unlock(&rpc2_job_lock);
+			goto err_out;
+		}
+		if (rpc2_blob) {
+			free(rpc2_blob);
+		}
+		rpc2_bloblen = blobLen / 2;
+		rpc2_blob = (char*) malloc(rpc2_bloblen);
+		memcpy(rpc2_blob, blob, blobLen / 2);
+
+		free(blob);
+
+		uint32_t target;
+		jobj_binary(job, "target", &target, 4);
+		if(rpc2_target != target) {
+			double difficulty = (((double) UINT32_MAX) / target);
+			stratum.job.diff = difficulty;
+			//applog(LOG_WARNING, "Stratum difficulty set to %.1f M", difficulty/1e6);
+			rpc2_target = target;
+		}
+
+		if (rpc2_job_id) {
+			free(rpc2_job_id);
+		}
+		rpc2_job_id = strdup(job_id);
+		pthread_mutex_unlock(&rpc2_job_lock);
+	}
+	if(work)
+	{
+		if (!rpc2_blob) {
+			applog(LOG_ERR, "Requested work before work was received");
+			goto err_out;
+		}
+		memcpy(work->data, rpc2_blob, rpc2_bloblen);
+		memset(work->target, 0xff, sizeof(work->target));
+
+		// hmmpff ? seems wrong
+		//*((uint64_t*)&work->target[6]) = rpc2_target;
+		work->target[7] = rpc2_target;
+
+		work->targetdiff = target_to_diff(work->target);
+
+		snprintf(work->job_id, sizeof(work->job_id), "%s", rpc2_job_id);
+	}
+
+	wildkeccak_scratchpad_need_update(pscratchpad_buff);
+	return true;
+
+err_out:
+	return false;
+}
+
+extern struct work _ALIGN(64) g_work;
+extern volatile time_t g_work_time;
+extern bool submit_old;
+
+bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *id, json_t *params)
+{
+	bool ret = false;
+	pthread_mutex_lock(&rpc2_work_lock);
+	ret = rpc2_job_decode(params, &rpc2_work);
+	// update miner threads work
+	rpc2_stratum_gen_work(sctx, &g_work);
+	//memcpy(&g_work, &rpc2_work, sizeof(struct work));
+	pthread_mutex_unlock(&rpc2_work_lock);
+	return ret;
+}
+
+bool rpc2_stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
+{
+//	pthread_mutex_lock(&rpc2_work_lock);
+	memcpy(work, &rpc2_work, sizeof(struct work));
+	if (stratum_diff != sctx->job.diff) {
+		char sdiff[32] = { 0 };
+		stratum_diff = sctx->job.diff;
+		if (opt_showdiff && work->targetdiff != stratum_diff)
+			snprintf(sdiff, 32, " (%.5f)", work->targetdiff);
+		applog(LOG_WARNING, "Stratum difficulty set to %.1f M%s", stratum_diff/1e6, sdiff);
+	}
+	if (work->target[7] != rpc2_target) {
+		work->target[7] = rpc2_target;
+		work->targetdiff = target_to_diff(work->target);
+		g_work_time = 0;
+		restart_threads();
+	}
+//	pthread_mutex_unlock(&rpc2_work_lock);
+	return (work->data[0] != 0);
+}
+
+#define JSON_SUBMIT_BUF_LEN 512
+bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work)
+{
+	char    _ALIGN(64) s[JSON_SUBMIT_BUF_LEN];
+	uint8_t _ALIGN(64) hash[32];
+	uint8_t _ALIGN(64) data[88];
+	char *noncestr, *hashhex;
+
+	memcpy(&data[0], work->data, 88);
+	memcpy(&data[1], work->nonces, 8);
+
+	// pass if the previous hash is not the current previous hash
+	if(!submit_old && memcmp(&work->data[3], &g_work.data[3], 28)) {
+		if (opt_debug) applog(LOG_DEBUG, "stale work detected", work->sharediff, work->targetdiff);
+		pool->stales_count++;
+		return true;
+	}
+
+	noncestr = bin2hex((unsigned char*) &data[1], 8);
+	memcpy(&last_found_nonce, work->nonces, 8); // "nonce":"5794ec8000000000" => 0x0000000080ec9457
+
+	wildkeccak_hash(hash, data, NULL, 0);
+	//applog(LOG_DEBUG, "submit diff %g > %g", work->sharediff, work->targetdiff);
+	//applog_hex(data, 81);
+	//applog_hex(hash, 32);
+	if (hash[31] != 0)
+		return true; // prevent bad hashes
+	hashhex = bin2hex((unsigned char*)hash, 32);
+
+	snprintf(s, sizeof(s), "{\"method\":\"submit\",\"params\":"
+		"{\"id\":\"%s\",\"job_id\":\"%s\",\"nonce\":\"%s\",\"result\":\"%s\"}, \"id\":4}",
+		rpc2_id, work->job_id, noncestr, hashhex);
+
+	free(hashhex);
+	free(noncestr);
+
+	if(!stratum_send_line(&stratum, s)) {
+		applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
+		return false;
+	}
+
+	stratum.sharediff = target_to_diff((uint32_t*)hash);
+
+	return true;
+}
+
+bool rpc2_login_decode(const json_t *val)
+{
+	const char *id;
+	const char *s;
+	json_t *res = json_object_get(val, "result");
+	if(!res) {
+		applog(LOG_ERR, "JSON invalid result");
+		goto err_out;
+	}
+
+	json_t *tmp;
+	tmp = json_object_get(res, "id");
+	if(!tmp) {
+		applog(LOG_ERR, "JSON inval id");
+		goto err_out;
+	}
+	id = json_string_value(tmp);
+	if(!id) {
+		applog(LOG_ERR, "JSON id is not a string");
+		goto err_out;
+	}
+
+	strncpy(rpc2_id, id, sizeof(rpc2_id)-1);
+
+	if(opt_debug)
+		applog(LOG_DEBUG, "Auth id: %s", id);
+
+	tmp = json_object_get(res, "status");
+	if(!tmp) {
+		applog(LOG_ERR, "JSON inval status");
+		goto err_out;
+	}
+	s = json_string_value(tmp);
+	if(!s) {
+		applog(LOG_ERR, "JSON status is not a string");
+		goto err_out;
+	}
+	if(strcmp(s, "OK")) {
+		applog(LOG_ERR, "JSON returned status \"%s\"", s);
+		goto err_out;
+	}
+
+	return true;
+
+err_out:
+	return false;
+}
+
+bool store_scratchpad_to_file(bool do_fsync)
+{
+	char file_name_buff[PATH_MAX] = { 0 };
+	FILE *fp;
+	int ret;
+
+	if(!scratchpad_size || !pscratchpad_buff) return true; // opt_algo != ALGO_WILDKECCAK || 
+
+	snprintf(file_name_buff, sizeof(file_name_buff), "%s.tmp", pscratchpad_local_cache);
+	unlink(file_name_buff);
+	fp = fopen(file_name_buff, "wbx");
+	if (!fp) {
+		applog(LOG_ERR, "failed to create file %s: %s", file_name_buff, strerror(errno));
+		return false;
+	}
+
+	struct scratchpad_file_header sf = { 0 };
+	memcpy(sf.add_arr, add_arr, sizeof(sf.add_arr));
+	sf.current_hi = current_scratchpad_hi;
+	sf.scratchpad_size = scratchpad_size;
+
+	if ((fwrite(&sf, sizeof(sf), 1, fp) != 1) ||
+		(fwrite(pscratchpad_buff, 8, (size_t) scratchpad_size, fp) != scratchpad_size)) {
+			applog(LOG_ERR, "failed to write file %s: %s", file_name_buff, strerror(errno));
+			fclose(fp);
+			unlink(file_name_buff);
+			return false;
+	}
+	fflush(fp);
+	/*if (do_fsync) {
+		if (fsync(fileno(fp)) == -1) {
+			applog(LOG_ERR, "failed to fsync file %s: %s", file_name_buff, strerror(errno));
+			fclose(fp);
+			unlink(file_name_buff);
+			return false;
+		}
+	}*/
+	if (fclose(fp) == EOF) {
+		applog(LOG_ERR, "failed to write file %s: %s", file_name_buff, strerror(errno));
+		unlink(file_name_buff);
+		return false;
+	}
+	ret = rename(file_name_buff, pscratchpad_local_cache);
+	if (ret == -1) {
+		applog(LOG_ERR, "failed to rename %s to %s: %s",
+			file_name_buff, pscratchpad_local_cache, strerror(errno));
+		unlink(file_name_buff);
+		return false;
+	}
+	applog(LOG_DEBUG, "saved scratchpad to %s (%zu+%zu bytes)", pscratchpad_local_cache,
+		sizeof(struct scratchpad_file_header), (size_t)scratchpad_size * 8);
+	return true;
+}
+
+/* TODO: repetitive error+log spam handling */
+bool load_scratchpad_from_file(const char *fname)
+{
+	FILE *fp;
+	long flen;
+
+	fp = fopen(fname, "rb");
+	if (!fp) {
+		if (errno != ENOENT) {
+			applog(LOG_ERR, "failed to load %s: %s", fname, strerror(errno));
+		}
+		return false;
+	}
+
+	struct scratchpad_file_header fh = { 0 };
+	if ((fread(&fh, sizeof(fh), 1, fp) != 1)) {
+		applog(LOG_ERR, "read error from %s: %s", fname, strerror(errno));
+		fclose(fp);
+		return false;
+	}
+
+	if ((fh.scratchpad_size*8 > (WILD_KECCAK_SCRATCHPAD_BUFFSIZE)) ||(fh.scratchpad_size%4)) {
+		applog(LOG_ERR, "file %s size invalid (%" PRIu64 "), max=%zu",
+			fname, fh.scratchpad_size*8, WILD_KECCAK_SCRATCHPAD_BUFFSIZE);
+		fclose(fp);
+		return false;
+	}
+
+	if (fread(pscratchpad_buff, 8, (size_t) fh.scratchpad_size, fp) != fh.scratchpad_size) {
+		applog(LOG_ERR, "read error from %s: %s", fname, strerror(errno));
+		fclose(fp);
+		return false;
+	}
+
+	scratchpad_size = fh.scratchpad_size;
+	current_scratchpad_hi = fh.current_hi;
+	memcpy(&add_arr[0], &fh.add_arr[0], sizeof(fh.add_arr));
+	flen = (long)scratchpad_size*8;
+
+	if (!opt_quiet) {
+		applog(LOG_INFO, "Scratchpad size %ld kB at block %" PRIu64, flen/1024, current_scratchpad_hi.height);
+	}
+
+	fclose(fp);
+	prev_save = time(NULL);
+
+	return true;
+}
+
+bool dump_scratchpad_to_file_debug()
+{
+	char file_name_buff[1024] = { 0 };
+	snprintf(file_name_buff, sizeof(file_name_buff), "scratchpad_%" PRIu64 "_%llx.scr",
+		current_scratchpad_hi.height, (long long) last_found_nonce);
+
+	/* do not bother rewriting if it exists already */
+
+	FILE *fp = fopen(file_name_buff, "w");
+	if(!fp) {
+		applog(LOG_WARNING, "failed to open file %s: %s", file_name_buff, strerror(errno));
+		return false;
+	}
+	if (fwrite(pscratchpad_buff, 8, (size_t) scratchpad_size, fp) != scratchpad_size) {
+		applog(LOG_ERR, "failed to write file %s: %s", file_name_buff, strerror(errno));
+		fclose(fp);
+		return false;
+	}
+	if (fclose(fp) == EOF) {
+		applog(LOG_ERR, "failed to write file %s: %s", file_name_buff, strerror(errno));
+		return false;
+	}
+
+	fclose(fp);
+	return true;
+}
+
+static bool try_mkdir_chdir(const char *dirn)
+{
+	if (chdir(dirn) == -1) {
+		if (errno == ENOENT) {
+#ifdef WIN32
+			if (mkdir(dirn) == -1) {
+#else
+			if (mkdir(dirn, 0700) == -1) {
+#endif
+				applog(LOG_ERR, "mkdir failed: %s", strerror(errno));
+				return false;
+			}
+			if (chdir(dirn) == -1) {
+				applog(LOG_ERR, "chdir failed: %s", strerror(errno));
+				return false;
+			}
+		} else {
+			applog(LOG_ERR, "chdir failed: %s", strerror(errno));
+			return false;
+		}
+	}
+	return true;
+}
+
+static size_t curl_write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+	size_t written = fwrite(ptr, size, nmemb, stream);
+	return written;
+}
+
+static bool download_inital_scratchpad(const char* path_to, const char* url)
+{
+	CURL *curl;
+	CURLcode res;
+	char curl_error_buff[CURL_ERROR_SIZE] = { 0 };
+	FILE *fp = fopen(path_to,"wb");
+	if (!fp) {
+		applog(LOG_ERR, "Failed to create file %s error %d", path_to, errno);
+		return false;
+	}
+
+	applog(LOG_INFO, "Downloading scratchpad....");
+
+	curl_global_cleanup();
+	res = curl_global_init(CURL_GLOBAL_ALL);
+	if (res != CURLE_OK) {
+		applog(LOG_WARNING, "curl curl_global_init error: %d", (int) res);
+	}
+
+	curl = curl_easy_init();
+	if (!curl) {
+		applog(LOG_INFO, "Failed to curl_easy_init.");
+		fclose(fp);
+		unlink(path_to);
+		return false;
+	}
+
+	if (opt_protocol && opt_debug) {
+		curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
+	}
+
+	curl_easy_setopt(curl, CURLOPT_URL, url);
+	curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30);
+	curl_easy_setopt(curl, CURLOPT_TIMEOUT, 300);
+	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+	curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_error_buff);
+	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write_data);
+	curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
+	//curl_easy_setopt(curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
+	curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
+	if (opt_cert) {
+		curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
+	} else {
+		curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+	}
+
+	res = curl_easy_perform(curl);
+	if (res != CURLE_OK) {
+		if (res == CURLE_OUT_OF_MEMORY) {
+			applog(LOG_ERR, "Failed to download file, not enough memory!");
+			applog(LOG_ERR, "curl error: %s", curl_error_buff);
+		} else {
+			applog(LOG_ERR, "Failed to download file, error: %s", curl_error_buff);
+		}
+	} else {
+		applog(LOG_INFO, "Scratchpad downloaded.");
+	}
+	/* always cleanup */
+	curl_easy_cleanup(curl);
+
+	fflush(fp);
+	fclose(fp);
+
+	if (res != CURLE_OK) {
+		unlink(path_to);
+		return false;
+	}
+	return true;
+}
+
+#ifndef WIN32
+
+void GetScratchpad()
+{
+	const char *phome_var_name = "HOME";
+	size_t sz = WILD_KECCAK_SCRATCHPAD_BUFFSIZE;
+	char cachedir[PATH_MAX];
+
+	if(!getenv(phome_var_name)) {
+		applog(LOG_ERR, "$%s not set", phome_var_name);
+		exit(1);
+	}
+	else if(!try_mkdir_chdir(getenv(phome_var_name))) {
+		exit(1);
+	}
+
+	if(!try_mkdir_chdir(".cache")) exit(1);
+
+	if(!try_mkdir_chdir(cachedir_suffix)) exit(1);
+
+	if(getcwd(cachedir, sizeof(cachedir) - 22) == NULL) {
+		applog(LOG_ERR, "getcwd failed: %s", strerror(errno));
+		exit(1);
+	}
+
+	snprintf(scratchpad_file, sizeof(scratchpad_file), "%s/scratchpad.bin", cachedir);
+	pscratchpad_local_cache = scratchpad_file;
+
+	if (!opt_quiet)
+		applog(LOG_INFO, "Scratchpad file %s", pscratchpad_local_cache);
+
+	pscratchpad_buff = (uint64_t*) mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0);
+	if(pscratchpad_buff == MAP_FAILED)
+	{
+		applog(LOG_INFO, "hugetlb not available");
+		pscratchpad_buff = (uint64_t*) malloc(sz);
+		if(!pscratchpad_buff) {
+			applog(LOG_ERR, "Scratchpad allocation failed");
+			exit(1);
+		}
+	} else {
+		applog(LOG_INFO, "using hugetlb");
+	}
+	madvise(pscratchpad_buff, sz, MADV_RANDOM | MADV_WILLNEED | MADV_HUGEPAGE);
+	mlock(pscratchpad_buff, sz);
+
+	if(!load_scratchpad_from_file(pscratchpad_local_cache))
+	{
+		if(!opt_scratchpad_url) {
+			applog(LOG_ERR, "Scratchpad URL not set. Please specify correct scratchpad url by -k or --scratchpad option");
+			exit(1);
+		}
+		if(!download_inital_scratchpad(pscratchpad_local_cache, opt_scratchpad_url)) {
+			applog(LOG_ERR, "Scratchpad not found and not downloaded. Please specify correct scratchpad url by -k or --scratchpad  option");
+			exit(1);
+		}
+		if(!load_scratchpad_from_file(pscratchpad_local_cache)) {
+			applog(LOG_ERR, "Failed to load scratchpad data after downloading, probably broken scratchpad link, please restart miner with correct inital scratcpad link(-k or --scratchpad )");
+			unlink(pscratchpad_local_cache);
+			exit(1);
+		}
+	}
+}
+
+#else /* Windows */
+
+void GetScratchpad()
+{
+	bool scratchpad_need_update = false;
+	size_t sz = WILD_KECCAK_SCRATCHPAD_BUFFSIZE;
+	const char* phome_var_name = "LOCALAPPDATA";
+	char cachedir[PATH_MAX];
+
+	if(!getenv(phome_var_name)) {
+		applog(LOG_ERR, "%s env var is not set", phome_var_name);
+		exit(1);
+	}
+	else if(!try_mkdir_chdir(getenv(phome_var_name))) {
+		exit(1);
+	}
+
+	if(!try_mkdir_chdir(".cache"))
+		exit(1);
+
+	if(!try_mkdir_chdir(cachedir_suffix))
+		exit(1);
+
+	if(getcwd(cachedir, sizeof(cachedir) - 22) == NULL) {
+		applog(LOG_ERR, "getcwd failed: %s", strerror(errno));
+		exit(1);
+	}
+
+	snprintf(scratchpad_file, sizeof(scratchpad_file), "%s\\scratchpad.bin", cachedir);
+	pscratchpad_local_cache = scratchpad_file;
+
+	if (!opt_quiet)
+		applog(LOG_INFO, "Scratchpad file %s", pscratchpad_local_cache);
+
+	if (pscratchpad_buff) {
+		reset_scratchpad();
+		wildkeccak_scratchpad_need_update(NULL);
+		scratchpad_need_update = true;
+		free(pscratchpad_buff);
+		pscratchpad_buff = NULL;
+	}
+
+	pscratchpad_buff = (uint64_t*) malloc(sz);
+	if(!pscratchpad_buff) {
+		applog(LOG_ERR, "Scratchpad allocation failed");
+		exit(1);
+	}
+
+	if(!load_scratchpad_from_file(pscratchpad_local_cache))
+	{
+		if(!opt_scratchpad_url) {
+			applog(LOG_ERR, "Scratchpad URL not set. Please specify correct scratchpad url by -k or --scratchpad option");
+			exit(1);
+		}
+		free(pscratchpad_buff);
+		pscratchpad_buff = NULL;
+		if(!download_inital_scratchpad(pscratchpad_local_cache, opt_scratchpad_url)) {
+			applog(LOG_ERR, "Scratchpad not found and not downloaded. Please specify correct scratchpad url by -k or --scratchpad  option");
+			exit(1);
+		}
+		pscratchpad_buff = (uint64_t*) malloc(sz);
+		if(!pscratchpad_buff) {
+			applog(LOG_ERR, "Scratchpad allocation failed");
+			exit(1);
+		}
+		if(!load_scratchpad_from_file(pscratchpad_local_cache)) {
+			applog(LOG_ERR, "Failed to load scratchpad data after downloading, probably broken scratchpad link, please restart miner with correct inital scratcpad link(-k or --scratchpad )");
+			unlink(pscratchpad_local_cache);
+			exit(1);
+		}
+	}
+
+	if (scratchpad_need_update)
+		wildkeccak_scratchpad_need_update(pscratchpad_buff);
+}
+
+#endif /* GetScratchpad() linux */
+
+static bool rpc2_getfullscratchpad_decode(const json_t *val)
+{
+	const char* status;
+	const char* scratch_hex;
+	size_t len;
+	json_t *hi;
+	json_t *res = json_object_get(val, "result");
+	if(!res) {
+		applog(LOG_ERR, "JSON invalid result in rpc2_getfullscratchpad_decode");
+		goto err_out;
+	}
+
+	//check status
+	status = get_json_string_param(res, "status");
+	if (!status ) {
+		applog(LOG_ERR, "JSON status is not a string");
+		goto err_out;
+	}
+
+	if(strcmp(status, "OK")) {
+		applog(LOG_ERR, "JSON returned status \"%s\"", status);
+		goto err_out;
+	}
+
+	//parse scratchpad
+	scratch_hex = get_json_string_param(res, "scratchpad_hex");
+	if (!scratch_hex) {
+		applog(LOG_ERR, "JSON scratch_hex is not a string");
+		goto err_out;
+	}
+
+	len = hex2bin_len((unsigned char*)pscratchpad_buff, scratch_hex, WILD_KECCAK_SCRATCHPAD_BUFFSIZE);
+	if (!len) {
+		applog(LOG_ERR, "JSON scratch_hex is not valid hex");
+		goto err_out;
+	}
+
+	if (len%8 || len%32) {
+		applog(LOG_ERR, "JSON scratch_hex is not valid size=%d bytes", len);
+		goto err_out;
+	}
+
+	//parse hi
+	hi = json_object_get(res, "hi");
+	if(!hi) {
+		applog(LOG_ERR, "JSON inval hi");
+		goto err_out;
+	}
+
+	if(!parse_height_info(hi, &current_scratchpad_hi))
+	{
+		applog(LOG_ERR, "JSON inval hi, failed to parse");
+		goto err_out;
+	}
+
+	applog(LOG_INFO, "Fetched scratchpad size %d bytes", len);
+	scratchpad_size = len/8;
+
+	return true;
+
+err_out: return false;
+}
+
+static bool rpc2_stratum_getscratchpad(struct stratum_ctx *sctx)
+{
+	bool ret = false;
+	json_t *val = NULL;
+	json_error_t err;
+	char *s, *sret;
+
+	s = (char*) calloc(1, 1024);
+	if (!s)
+		goto out;
+	sprintf(s, "{\"method\": \"getfullscratchpad\", \"params\": {\"id\": \"%s\", \"agent\": \"" USER_AGENT "\"}, \"id\": 1}", rpc2_id);
+
+	applog(LOG_INFO, "Getting full scratchpad....");
+	if (!stratum_send_line(sctx, s))
+		goto out;
+
+	//sret = stratum_recv_line_timeout(sctx, 920);
+	sret = stratum_recv_line(sctx);
+	if (!sret)
+		goto out;
+	applog(LOG_DEBUG, "Getting full scratchpad received line");
+
+	val = JSON_LOADS(sret, &err);
+	free(sret);
+	if (!val) {
+		applog(LOG_ERR, "JSON decode rpc2_getscratchpad response failed(%d): %s", err.line, err.text);
+		goto out;
+	}
+
+	applog(LOG_DEBUG, "Getting full scratchpad parsed line");
+
+	ret = rpc2_getfullscratchpad_decode(val);
+
+out:
+	free(s);
+	if (val)
+		json_decref(val);
+
+	return ret;
+}
+
+bool rpc2_stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
+{
+	bool ret = false;
+	json_t *val = NULL, *res_val, *err_val, *job_val = NULL;
+	json_error_t err;
+	char *s, *sret;
+	char *prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
+	s = (char*) calloc(1, 320 + strlen(user) + strlen(pass));
+	sprintf(s, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\","
+			   "\"hi\":{\"height\":%" PRIu64 ",\"block_id\":\"%s\"},"
+			   "\"agent\":\"" USER_AGENT "\"},\"id\":2}",
+		user, pass, current_scratchpad_hi.height, prevhash);
+	free(prevhash);
+
+	if (!stratum_send_line(sctx, s))
+		goto out;
+
+	while (1) {
+		sret = stratum_recv_line(sctx);
+		if (!sret)
+			goto out;
+		if (!stratum_handle_method(sctx, sret))
+			break;
+		free(sret);
+	}
+
+	val = JSON_LOADS(sret, &err);
+	free(sret);
+	if (!val) {
+		applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
+		goto out;
+	}
+
+	res_val = json_object_get(val, "result");
+	err_val = json_object_get(val, "error");
+
+	if (!res_val || json_is_false(res_val) ||
+		(err_val && !json_is_null(err_val)))  {
+			applog(LOG_ERR, "Stratum authentication failed");
+			if (err_val) {
+				const char *msg = json_string_value(json_object_get(err_val,"message"));
+				if (msg && strlen(msg)) {
+					if (strstr(msg, "scratchpad too old") && pscratchpad_local_cache) {
+						if (unlink(pscratchpad_local_cache) == 0) {
+							applog(LOG_INFO, "Outdated scratchpad, deleted...", pscratchpad_local_cache);
+							GetScratchpad();
+							goto out;
+						}
+					}
+					applog(LOG_NOTICE, "%s", msg);
+				}
+			}
+			goto out;
+	}
+
+	rpc2_login_decode(val);
+	job_val = json_object_get(res_val, "job");
+
+	pthread_mutex_lock(&rpc2_work_lock);
+	if(job_val) rpc2_job_decode(job_val, &rpc2_work);
+	pthread_mutex_unlock(&rpc2_work_lock);
+
+	ret = true;
+
+out:
+	free(s);
+	if (val)
+		json_decref(val);
+
+	return ret;
+}
+
+bool rpc2_stratum_request_job(struct stratum_ctx *sctx)
+{
+	json_t *val = NULL, *res_val, *err_val;
+	char *sret;
+	json_error_t err;
+	bool ret = false;
+	char *s = (char*) calloc(1, 10*2048);
+	if (!s) {
+		applog(LOG_ERR, "Stratum job OOM!");
+		return ret;
+	}
+
+	char* prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
+	sprintf(s, "{\"method\":\"getjob\",\"params\": {"
+		"\"id\":\"%s\", \"hi\": {\"height\": %" PRIu64 ",\"block_id\":\"%s\" }, \"agent\": \"" USER_AGENT "\"},"
+		"\"id\":1}",
+		rpc2_id, current_scratchpad_hi.height, prevhash);
+	free(prevhash);
+
+	if(!stratum_send_line(sctx, s)) {
+		applog(LOG_ERR, "Stratum failed to send getjob line");
+		goto out;
+	}
+
+	sret = stratum_recv_line(sctx);
+	if (!sret) {
+		applog(LOG_ERR, "Stratum failed to recv getjob line");
+		goto out;
+	}
+
+	val = JSON_LOADS(sret, &err);
+	free(sret);
+	if (!val) {
+		applog(LOG_ERR, "JSON getwork decode failed(%d): %s", err.line, err.text);
+		goto out;
+	}
+
+	res_val = json_object_get(val, "result");
+	err_val = json_object_get(val, "error");
+
+	if (!res_val || json_is_false(res_val) ||
+		(err_val && !json_is_null(err_val))) {
+			applog(LOG_ERR, "Stratum getjob failed");
+			goto out;
+	}
+
+	pthread_mutex_lock(&rpc2_work_lock);
+	rpc2_job_decode(res_val, &rpc2_work);
+	pthread_mutex_unlock(&rpc2_work_lock);
+
+	ret = true;
+out:
+	if (val)
+		json_decref(val);
+
+	return ret;
+}
+
+int rpc2_stratum_thread_stuff(struct pool_infos* pool)
+{
+	int opt_fail_pause = 10;
+
+	if(!strcmp(rpc2_id, "")) {
+		if (!opt_quiet)
+			applog(LOG_DEBUG, "disconnecting...");
+		stratum_disconnect(&stratum);
+		//not logged in, try to relogin
+		if (!opt_quiet)
+			applog(LOG_DEBUG, "Re-connect and relogin...");
+		if(!stratum_connect(&stratum, stratum.url) || !stratum_authorize(&stratum, pool->user, pool->pass)) {
+			stratum_disconnect(&stratum);
+			applog(LOG_ERR, "Failed...retry after %d seconds", opt_fail_pause);
+			sleep(opt_fail_pause);
+		}
+	}
+
+	if(!scratchpad_size) {
+		if(!rpc2_stratum_getscratchpad(&stratum)) {
+			stratum_disconnect(&stratum);
+			applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
+			sleep(opt_fail_pause);
+		}
+		store_scratchpad_to_file(false);
+		prev_save = time(NULL);
+
+		if(!rpc2_stratum_request_job(&stratum)) {
+			stratum_disconnect(&stratum);
+			applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
+			sleep(opt_fail_pause);
+		}
+	}
+
+	/* save every 12 hours */
+	if ((time(NULL) - prev_save) > 12*3600) {
+		store_scratchpad_to_file(false);
+		prev_save = time(NULL);
+	}
+
+	if (rpc2_work.job_id && (!g_work_time || strcmp(rpc2_work.job_id, g_work.job_id))) {
+		pthread_mutex_lock(&rpc2_work_lock);
+		rpc2_stratum_gen_work(&stratum, &g_work);
+		g_work_time = time(NULL);
+		pthread_mutex_unlock(&rpc2_work_lock);
+
+		if (opt_debug) applog(LOG_DEBUG, "Stratum detected new block");
+		restart_threads();
+	}
+
+	return 0;
+}
+
+void rpc2_init()
+{
+	memset(&current_scratchpad_hi, 0, sizeof(struct scratchpad_hi));
+	memset(&rpc2_work, 0, sizeof(struct work));
+
+	pthread_mutex_init(&rpc2_job_lock, NULL);
+	pthread_mutex_init(&rpc2_work_lock, NULL);
+	pthread_mutex_init(&rpc2_login_lock, NULL);
+	pthread_mutex_init(&rpc2_getscratchpad_lock, NULL);
+}
diff --git a/crypto/xmr-rpc.h b/crypto/xmr-rpc.h
new file mode 100644
index 0000000..6e6002f
--- /dev/null
+++ b/crypto/xmr-rpc.h
@@ -0,0 +1,41 @@
+
+#include <jansson.h>
+
+#include "wildkeccak.h"
+
+#ifdef WIN32
+#define _PACKED _ALIGN(4)
+#else
+#define _PACKED __attribute__((__packed__))
+#endif
+
+struct _PACKED scratchpad_hi {
+    unsigned char prevhash[32];
+    uint64_t height;
+};
+
+struct _PACKED addendums_array_entry {
+    struct scratchpad_hi prev_hi;
+    uint64_t add_size;
+};
+
+
+struct _PACKED scratchpad_file_header {
+    struct scratchpad_hi current_hi;
+    struct addendums_array_entry add_arr[WILD_KECCAK_ADDENDUMS_ARRAY_SIZE];
+    uint64_t scratchpad_size;
+};
+
+
+bool rpc2_job_decode(const json_t *job, struct work *work);
+bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *id, json_t *params);
+bool rpc2_stratum_gen_work(struct stratum_ctx *sctx, struct work *work);
+bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work);
+
+int  rpc2_stratum_thread_stuff(struct pool_infos* pool);
+
+bool rpc2_login_decode(const json_t *val);
+
+void rpc2_init();
+
+void GetScratchpad();
diff --git a/miner.h b/miner.h
index 8f48e1f..cb2e174 100644
--- a/miner.h
+++ b/miner.h
@@ -293,6 +293,7 @@ extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsign
 extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
 extern int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
+extern int scanhash_wildkeccak(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@@ -340,6 +341,7 @@ extern void free_s3(int thr_id);
 extern void free_vanilla(int thr_id);
 extern void free_veltor(int thr_id);
 extern void free_whirl(int thr_id);
+extern void free_wildkeccak(int thr_id);
 extern void free_x11evo(int thr_id);
 extern void free_x11(int thr_id);
 extern void free_x13(int thr_id);
@@ -622,6 +624,7 @@ struct stratum_ctx {
 	int pooln;
 	time_t tm_connected;
 
+	int rpc2;
 	int srvtime_diff;
 };
 
@@ -743,6 +746,8 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
 bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
 void stratum_free_job(struct stratum_ctx *sctx);
 
+bool rpc2_stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
+
 void hashlog_remember_submit(struct work* work, uint32_t nonce);
 void hashlog_remember_scan_range(struct work* work);
 uint32_t hashlog_already_submittted(char* jobid, uint32_t nounce);
@@ -831,6 +836,7 @@ void x13hash(void *output, const void *input);
 void x14hash(void *output, const void *input);
 void x15hash(void *output, const void *input);
 void x17hash(void *output, const void *input);
+void wildkeccak_hash(void *output, const void *input, uint64_t* scratchpad, uint64_t ssize);
 void zr5hash(void *output, const void *input);
 void zr5hash_pok(void *output, uint32_t *pdata);
 
diff --git a/pools.cpp b/pools.cpp
index ef7cc4e..c58c25d 100644
--- a/pools.cpp
+++ b/pools.cpp
@@ -254,6 +254,7 @@ bool pool_switch(int thr_id, int pooln)
 			// temporary... until stratum code cleanup
 			stratum = p->stratum;
 			stratum.pooln = cur_pooln;
+			stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
 
 			// unlock the stratum thread
 			tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
@@ -275,6 +276,9 @@ bool pool_switch(int thr_id, int pooln)
 		}
 
 	}
+
+	stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
+
 	return true;
 }
 
diff --git a/res/ccminer.rc b/res/ccminer.rc
index 746e6a3..e07c39c 100644
--- a/res/ccminer.rc
+++ b/res/ccminer.rc
@@ -60,8 +60,8 @@ IDI_ICON1               ICON                    "ccminer.ico"
 //
 
 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,8,4,0
- PRODUCTVERSION 1,8,4,0
+ FILEVERSION 2,0,0,0
+ PRODUCTVERSION 2,0,0,0
  FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
  FILEFLAGS 0x21L
@@ -76,10 +76,10 @@ BEGIN
     BEGIN
         BLOCK "040904e4"
         BEGIN
-            VALUE "FileVersion", "1.8.4"
+            VALUE "FileVersion", "2.0"
             VALUE "LegalCopyright", "Copyright (C) 2016"
             VALUE "ProductName", "ccminer"
-            VALUE "ProductVersion", "1.8.4"
+            VALUE "ProductVersion", "2.0"
         END
     END
     BLOCK "VarFileInfo"
diff --git a/util.cpp b/util.cpp
index 50da89a..223b871 100644
--- a/util.cpp
+++ b/util.cpp
@@ -36,6 +36,8 @@
 #include "miner.h"
 #include "elist.h"
 
+#include "crypto/xmr-rpc.h"
+
 extern pthread_mutex_t stratum_sock_lock;
 extern pthread_mutex_t stratum_work_lock;
 extern bool opt_debug_diff;
@@ -1216,6 +1218,8 @@ bool stratum_subscribe(struct stratum_ctx *sctx)
 	json_error_t err;
 	bool ret = false, retry = false;
 
+	if (sctx->rpc2) return true;
+
 start:
 	s = (char*)malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
 	if (retry)
@@ -1307,6 +1311,9 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
 	json_error_t err;
 	bool ret = false;
 
+	if (sctx->rpc2)
+		return rpc2_stratum_authorize(sctx, user, pass);
+
 	s = (char*)malloc(80 + strlen(user) + strlen(pass));
 	sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
 	        user, pass);
@@ -1849,6 +1856,10 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
 		ret = stratum_show_message(sctx, id, params);
 		goto out;
 	}
+	if (sctx->rpc2 && !strcasecmp(method, "job")) { // cryptonote
+		ret = rpc2_stratum_job(sctx, id, params);
+		goto out;
+	}
 
 	if (!ret) {
 		// don't fail = disconnect stratum on unknown (and optional?) methods