diff --git a/lyra2/allium.cu b/lyra2/allium.cu
index 931e6bc..6492c92 100644
--- a/lyra2/allium.cu
+++ b/lyra2/allium.cu
@@ -30,7 +30,7 @@ extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t start
 extern void skein256_cpu_init(int thr_id, uint32_t threads);
 
 extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
-extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
+extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
 
 extern void groestl256_cpu_init(int thr_id, uint32_t threads);
 extern void groestl256_cpu_free(int thr_id);
@@ -141,9 +141,9 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
 		//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-		lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
+		lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
 		cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-		lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
+		lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
 		skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 
 		*hashes_done = pdata[19] - first_nonce + throughput;
diff --git a/lyra2/cuda_lyra2.cu b/lyra2/cuda_lyra2.cu
index 7905d23..a280200 100644
--- a/lyra2/cuda_lyra2.cu
+++ b/lyra2/cuda_lyra2.cu
@@ -409,7 +409,7 @@ __constant__ uint2x4 blake2b_IV[2] = {
 };
 
 __global__ __launch_bounds__(64, 1)
-void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
+void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash)
 {
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 
@@ -436,7 +436,7 @@ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
 
 __global__
 __launch_bounds__(TPB52, 1)
-void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
+void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash)
 {
 	const uint32_t thread = blockDim.y * blockIdx.x + threadIdx.y;
 
@@ -481,7 +481,7 @@ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_has
 }
 
 __global__ __launch_bounds__(64, 1)
-void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
+void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash)
 {
 	const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
 
@@ -502,7 +502,7 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
 		g_hash[thread + threads * 2] = state[0].z;
 		g_hash[thread + threads * 3] = state[0].w;
 
-	} //thread
+	}
 }
 #else
 #if __CUDA_ARCH__ < 500
@@ -510,9 +510,9 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
 /* for unsupported SM arch */
 __device__ void* DMatrix;
 #endif
-__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
-__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {}
-__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
+__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash) {}
+__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash) {}
+__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash) {}
 #endif
 
 __host__
@@ -523,7 +523,7 @@ void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix)
 }
 
 __host__
-void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, bool gtx750ti)
+void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_hash, bool gtx750ti)
 {
 	int dev_id = device_map[thr_id % MAX_GPUS];
 
@@ -544,11 +544,11 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
 
 	if (cuda_arch[dev_id] >= 520)
 	{
-		lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
+		lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
 
-		lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, startNounce, d_hash);
+		lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, d_hash);
 
-		lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
+		lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
 	}
 	else if (cuda_arch[dev_id] >= 500)
 	{
@@ -561,12 +561,12 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
 			// suitable amount to adjust for 10warp
 			shared_mem = 6144;
 
-		lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
+		lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
 
-		lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, startNounce, (uint2*)d_hash);
+		lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, (uint2*)d_hash);
 
-		lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
+		lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
 	}
 	else
-		lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, startNounce, d_hash);
+		lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, d_hash);
 }
diff --git a/lyra2/cuda_lyra2_sm2.cuh b/lyra2/cuda_lyra2_sm2.cuh
index 18263b2..da621d0 100644
--- a/lyra2/cuda_lyra2_sm2.cuh
+++ b/lyra2/cuda_lyra2_sm2.cuh
@@ -131,7 +131,7 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut,
 }
 
 __global__ __launch_bounds__(TPB30, 1)
-void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
+void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash)
 {
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@@ -224,5 +224,5 @@ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_h
 
 #else
 /* if __CUDA_ARCH__ < 200 .. host */
-__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {}
+__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash) {}
 #endif
diff --git a/lyra2/cuda_lyra2_sm5.cuh b/lyra2/cuda_lyra2_sm5.cuh
index fc13172..4a3caeb 100644
--- a/lyra2/cuda_lyra2_sm5.cuh
+++ b/lyra2/cuda_lyra2_sm5.cuh
@@ -589,7 +589,7 @@ void reduceDuplexRowV50_8(const int rowInOut, uint2 state[4], const uint32_t thr
 }
 
 __global__ __launch_bounds__(64, 1)
-void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
+void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash)
 {
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 
@@ -622,7 +622,7 @@ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
 }
 
 __global__ __launch_bounds__(TPB50, 1)
-void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
+void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash)
 {
 	const uint32_t thread = (blockDim.y * blockIdx.x + threadIdx.y);
 
@@ -662,7 +662,7 @@ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
 }
 
 __global__ __launch_bounds__(64, 1)
-void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
+void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash)
 {
 	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 
@@ -687,7 +687,7 @@ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
 
 #else
 /* if __CUDA_ARCH__ != 500 .. host */
-__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
-__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
-__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
+__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash) {}
+__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash) {}
+__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash) {}
 #endif
diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu
index b3ad49f..b435371 100644
--- a/lyra2/lyra2RE.cu
+++ b/lyra2/lyra2RE.cu
@@ -26,7 +26,7 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon
 extern void skein256_cpu_init(int thr_id, uint32_t threads);
 
 extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
-extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
+extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
 
 extern void groestl256_cpu_init(int thr_id, uint32_t threads);
 extern void groestl256_cpu_free(int thr_id);
@@ -130,7 +130,7 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
 		//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 		blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-		lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
+		lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
 		skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 
 		*hashes_done = pdata[19] - first_nonce + throughput;