diff --git a/Makefile.am b/Makefile.am
index a8d19b5..8878096 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -42,7 +42,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  quark/quarkcoin.cu quark/animecoin.cu \
 			  quark/cuda_quark_compactionTest.cu  \
 			  neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \
-			  cuda_nist5.cu pentablake.cu skein.cu skein2.cu zr5.cu \
+			  cuda_nist5.cu pentablake.cu skein.cu skein2.cpp zr5.cu \
 			  sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 485da3d..0a8d629 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -270,6 +270,7 @@
     <ClCompile Include="neoscrypt\neoscrypt-cpu.c" />
     <ClInclude Include="neoscrypt\cuda_vectors.h" />
     <CudaCompile Include="neoscrypt\cuda_neoscrypt.cu" />
+    <ClCompile Include="skein2.cpp" />
     <ClCompile Include="sph\aes_helper.c" />
     <ClCompile Include="sph\blake.c" />
     <ClCompile Include="sph\bmw.c" />
@@ -464,9 +465,6 @@
     <CudaCompile Include="skein.cu">
       <MaxRegCount>64</MaxRegCount>
     </CudaCompile>
-    <CudaCompile Include="skein2.cu">
-      <MaxRegCount>64</MaxRegCount>
-    </CudaCompile>
     <CudaCompile Include="x11\cuda_x11_aes.cu">
       <ExcludedFromBuild>true</ExcludedFromBuild>
     </CudaCompile>
@@ -538,4 +536,4 @@
   <Target Name="AfterClean">
     <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
   </Target>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 9cc242c..c7042db 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -240,6 +240,9 @@
     <ClCompile Include="neoscrypt\neoscrypt-cpu.c">
       <Filter>Source Files\neoscrypt</Filter>
     </ClCompile>
+    <ClCompile Include="skein2.cpp">
+      <Filter>Source Files\CUDA</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h">
@@ -601,9 +604,6 @@
     <CudaCompile Include="skein.cu">
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>
-    <CudaCompile Include="skein2.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
     <CudaCompile Include="scrypt\blake.cu">
       <Filter>Source Files\CUDA\scrypt</Filter>
     </CudaCompile>
@@ -653,4 +653,4 @@
       <Filter>Ressources</Filter>
     </Text>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu
index 4bbef59..a40f75e 100644
--- a/cuda_checkhash.cu
+++ b/cuda_checkhash.cu
@@ -17,15 +17,15 @@ static uint32_t* d_resNonces[MAX_GPUS];
 __host__
 void cuda_check_cpu_init(int thr_id, uint32_t threads)
 {
-    CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 8*sizeof(uint32_t)));
-    CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 8*sizeof(uint32_t)));
+    CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32));
+    CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32));
 }
 
 // Target Difficulty
 __host__
 void cuda_check_cpu_setTarget(const void *ptarget)
 {
-	CUDA_SAFE_CALL(cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(pTarget, ptarget, 32, 0, cudaMemcpyHostToDevice));
 }
 
 /* --------------------------------------------------------------------------------------------- */
@@ -177,11 +177,11 @@ void cuda_check_hash_branch_64(uint32_t threads, uint32_t startNounce, uint32_t
 __host__
 uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
 {
-	uint32_t result = 0xffffffff;
-	cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
-
 	const uint32_t threadsperblock = 256;
 
+	uint32_t result = UINT32_MAX;
+	cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
+
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 
@@ -199,9 +199,9 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun
 
 /* Function to get the compiled Shader Model version */
 int cuda_arch[MAX_GPUS] = { 0 };
-__global__
-void nvcc_get_arch(int *d_version)
+__global__ void nvcc_get_arch(int *d_version)
 {
+	*d_version = 0;
 #ifdef __CUDA_ARCH__
 	*d_version = __CUDA_ARCH__;
 #endif
diff --git a/groestlcoin.cpp b/groestlcoin.cpp
index 400b8ef..6956102 100644
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@@ -78,8 +78,8 @@ int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 		}
 
 		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			*hashes_done = pdata[19] - start_nonce + 1;
 			pdata[19] = max_nonce;
-			*hashes_done = max_nonce - start_nonce + 1;
 			break;
 		}
 		pdata[19] += throughput;
diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp
index 5464819..4186334 100644
--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@@ -35,7 +35,7 @@ int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {
 	uint32_t _ALIGN(64) endiandata[32];
-	uint32_t start_nonce = pdata[19]++;
+	uint32_t start_nonce = pdata[19];
 	uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17);
 	throughput = min(throughput, max_nonce - start_nonce);
 
@@ -82,8 +82,8 @@ int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 		}
 
 		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			*hashes_done = pdata[19] - start_nonce;
 			pdata[19] = max_nonce;
-			*hashes_done = max_nonce - start_nonce + 1;
 			break;
 		}
 		pdata[19] += throughput;
diff --git a/skein.cu b/skein.cu
index 131812b..38b4bcc 100644
--- a/skein.cu
+++ b/skein.cu
@@ -396,12 +396,13 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
 				int res = 1;
 				uint8_t num = res;
 				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
-				while (secNonce != 0 && res < 6)
+				while (secNonce != 0 && res < 2) /* todo: up to 6 */
 				{
 					endiandata[19] = swab32_if(secNonce, swap);
 					skeincoinhash(vhash64, endiandata);
 					if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
-						pdata[19+res] = swab32_if(secNonce, !swap);
+						// todo: use 19 20 21... zr5 pok to adapt...
+						pdata[19+res*2] = swab32_if(secNonce, !swap);
 						res++;
 					}
 					num++;
@@ -416,10 +417,16 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
 				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
 			}
 		}
+
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			*hashes_done = pdata[19] - first_nonce;
+			pdata[19] = max_nonce;
+			break;
+		}
+
 		pdata[19] += throughput;
 
-	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+	} while (!work_restart[thr_id].restart);
 
-	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
diff --git a/skein2.cu b/skein2.cpp
similarity index 90%
rename from skein2.cu
rename to skein2.cpp
index b264597..bdb170e 100644
--- a/skein2.cu
+++ b/skein2.cpp
@@ -2,6 +2,7 @@
  * SKEIN512 80 + SKEIN512 64 (Woodcoin)
  * by tpruvot@github - 2015
  */
+#include <string.h>
 
 #include "sph/sph_skein.h"
 
@@ -17,7 +18,7 @@ extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNou
 
 extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern "C" void skein2hash(void *output, const void *input)
+void skein2hash(void *output, const void *input)
 {
 	uint32_t _ALIGN(64) hash[16];
 	sph_skein512_context ctx_skein;
@@ -30,12 +31,12 @@ extern "C" void skein2hash(void *output, const void *input)
 	sph_skein512(&ctx_skein, hash, 64);
 	sph_skein512_close(&ctx_skein, hash);
 
-	memcpy(output, hash, 32);
+	memcpy(output, (void*) hash, 32);
 }
 
 static bool init[MAX_GPUS] = { 0 };
 
-extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
+int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
@@ -100,7 +101,9 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptar
 			}
 		}
 
-		if (((uint64_t) throughput + pdata[19]) > max_nonce) {
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			*hashes_done = pdata[19] - first_nonce;
+			pdata[19] = max_nonce;
 			break;
 		}