remove pluck algo

Supcoin seems.... dead and the algo was not supported on all devices
2015-06-28 20:45:38 +02:00 · 2015-06-28 20:45:38 +02:00 · 15293d063f
commit 15293d063f
parent aa271104c1
10 changed files with 4 additions and 879 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -47,7 +47,6 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
-			  pluck/pluck.cu pluck/cuda_pluck.cu \
 			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
--- a/README.txt
+++ b/README.txt
@ -81,7 +81,6 @@ its command line interface and options.
                          neoscrypt   use to mine FeatherCoin
                          nist5       use to mine TalkCoin
                          penta       use to mine Joincoin / Pentablake
-                          pluck       use to mine Supcoin
                          quark       use to mine Quarkcoin
                          qubit       use to mine Qubit
                          scrypt      use to mine Scrypt coins
@ -221,6 +220,10 @@ features.

 >>> RELEASE HISTORY <<<

+  July 2015...
+                  Nvml api power limits
+                  Remove pluck algo
+
  June 23th 2015  v1.6.5
                  Handle Ziftrcoin PoK solo mining
                  Basic compatibility with CUDA 7.0 (generally slower hashrate)
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -102,7 +102,6 @@ enum sha_algos {
 	ALGO_NEOSCRYPT,
 	ALGO_NIST5,
 	ALGO_PENTABLAKE,
-	ALGO_PLUCK,
 	ALGO_QUARK,
 	ALGO_QUBIT,
 	ALGO_SCRYPT,
@ -137,7 +136,6 @@ static const char *algo_names[] = {
 	"neoscrypt",
 	"nist5",
 	"penta",
-	"pluck",
 	"quark",
 	"qubit",
 	"scrypt",
@ -292,7 +290,6 @@ Options:\n\
 			neoscrypt   FeatherCoin, Phoenix, UFO...\n\
 			nist5       NIST5 (TalkCoin)\n\
 			penta       Pentablake hash (5x Blake 512)\n\
-			pluck       SupCoin\n\
 			quark       Quark\n\
 			qubit       Qubit\n\
 			scrypt      Scrypt\n\
@ -595,7 +592,6 @@ static void calc_network_diff(struct work *work)
 		case ALGO_QUARK:
 			diffone = 0xFFFFFF0000000000ull;
 			break;
-		case ALGO_PLUCK:
 		case ALGO_SCRYPT:
 		case ALGO_SCRYPT_JANE:
 			// cant get the right value on these 3 algos...
@ -1429,7 +1425,6 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	switch (opt_algo) {
 		case ALGO_JACKPOT:
 		case ALGO_NEOSCRYPT:
-		case ALGO_PLUCK:
 		case ALGO_SCRYPT:
 		case ALGO_SCRYPT_JANE:
 			diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
@ -1760,9 +1755,6 @@ static void *miner_thread(void *userdata)
 			case ALGO_SCRYPT_JANE:
 				minmax = 0x100000;
 				break;
-			case ALGO_PLUCK:
-				minmax = 0x2000;
-				break;
 			}
 			max64 = max(minmax-1, max64);
 		}
@ -1897,11 +1889,6 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;

-		case ALGO_PLUCK:
-			rc = scanhash_pluck(thr_id, work.data, work.target,
-			                      max_nonce, &hashes_done);
-			break;
-
 		case ALGO_SCRYPT:
 			rc = scanhash_scrypt(thr_id, work.data, work.target, NULL,
 			                      max_nonce, &hashes_done, &tv_start, &tv_end);
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -421,8 +421,6 @@
      <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
    </CudaCompile>
-    <CudaCompile Include="pluck\pluck.cu" />
-    <CudaCompile Include="pluck\cuda_pluck.cu" />
    <CudaCompile Include="quark\cuda_quark_groestl512.cu">
    </CudaCompile>
    <CudaCompile Include="quark\cuda_quark_keccak512.cu">
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -433,12 +433,6 @@
    <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
      <Filter>Source Files\CUDA\JHA</Filter>
    </CudaCompile>
-    <CudaCompile Include="pluck\pluck.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
-    <CudaCompile Include="pluck\cuda_pluck.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
    <CudaCompile Include="quark\cuda_jh512.cu">
      <Filter>Source Files\CUDA\quark</Filter>
    </CudaCompile>
--- a/cuda_helper.h
+++ b/cuda_helper.h
@ -309,7 +309,6 @@ uint64_t shr_t64(uint64_t x, uint32_t n)
 #endif
 }

-// device asm for ?
 __device__ __forceinline__
 uint64_t shl_t64(uint64_t x, uint32_t n)
 {
@ -324,40 +323,6 @@ uint64_t shl_t64(uint64_t x, uint32_t n)
 #endif
 }

-// device asm 32 for pluck
-__device__ __forceinline__
-uint32_t andor32(uint32_t a, uint32_t b, uint32_t c) {
-#ifdef __CUDA_ARCH__
-	uint32_t result;
-	asm("{ .reg .u32 m,n,o;\n\t"
-		"and.b32 m,  %1, %2;\n\t"
-		" or.b32 n,  %1, %2;\n\t"
-		"and.b32 o,   n, %3;\n\t"
-		" or.b32 %0,  m, o ;\n\t"
-		"}\n\t"
-		: "=r"(result) : "r"(a), "r"(b), "r"(c));
-	return result;
-#else
-	// unused on host...
-	return 0;
-#endif
-}
-
-__device__ __forceinline__
-uint32_t xor3b(uint32_t a, uint32_t b, uint32_t c) {
-#ifdef __CUDA_ARCH__
-	uint32_t result;
-	asm("{ .reg .u32 t1;\n\t"
-		"xor.b32 t1, %2, %3;\n\t"
-		"xor.b32 %0, %1, t1;\n\t"
-		"}"
-		: "=r"(result) : "r"(a) ,"r"(b),"r"(c));
-	return result;
-#else
-	return a^b^c;
-#endif
-}
-
 __device__ __forceinline__
 uint32_t shr_t32(uint32_t x,uint32_t n) {
 #ifdef __CUDA_ARCH__
--- a/miner.h
+++ b/miner.h
@ -326,10 +326,6 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

-extern int scanhash_pluck(int thr_id, uint32_t *pdata,
-	const uint32_t *ptarget, uint32_t max_nonce,
-	unsigned long *hashes_done);
-
 extern int scanhash_qubit(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@ -781,7 +777,6 @@ void myriadhash(void *state, const void *input);
 void neoscrypt(uchar *output, const uchar *input, uint32_t profile);
 void nist5hash(void *state, const void *input);
 void pentablakehash(void *output, const void *input);
-void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const int N);
 void quarkhash(void *state, const void *input);
 void qubithash(void *state, const void *input);
 void scrypthash(void* output, const void* input);
--- a/pluck/cuda_pluck.cu
+++ b/pluck/cuda_pluck.cu
@ -1,573 +0,0 @@
-/*
- * "pluck" kernel implementation.
- *
- * ==========================(LICENSE BEGIN)============================
- *
- * Copyright (c) 2015  djm34
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * ===========================(LICENSE END)=============================
- *
- * @author djm34
- * @author tpruvot
- */
-
-#include <stdio.h>
-#include <stdint.h>
-#include <memory.h>
-
-#include "cuda_vector.h"
-
-#include "miner.h"
-
-uint32_t *d_PlNonce[MAX_GPUS];
-
-__device__  uint8_t *  hashbuffer;
-__constant__  uint32_t pTarget[8];
-__constant__  uint32_t c_data[20];
-
-#define HASH_MEMORY_8bit 131072
-#define HASH_MEMORY_32bit 32768
-#define HASH_MEMORY 4096
-
-static __constant__  uint32_t H256[8] = {
-	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
-	0x510E527F, 0x9B05688C,	0x1F83D9AB, 0x5BE0CD19
-};
-
-static  __constant__  uint32_t Ksha[64] = {
-	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
-	0x3956C25B, 0x59F111F1,	0x923F82A4, 0xAB1C5ED5,
-	0xD807AA98, 0x12835B01,	0x243185BE, 0x550C7DC3,
-	0x72BE5D74, 0x80DEB1FE,	0x9BDC06A7, 0xC19BF174,
-	0xE49B69C1, 0xEFBE4786,	0x0FC19DC6, 0x240CA1CC,
-	0x2DE92C6F, 0x4A7484AA,	0x5CB0A9DC, 0x76F988DA,
-	0x983E5152, 0xA831C66D,	0xB00327C8, 0xBF597FC7,
-	0xC6E00BF3, 0xD5A79147,	0x06CA6351, 0x14292967,
-	0x27B70A85, 0x2E1B2138,	0x4D2C6DFC, 0x53380D13,
-	0x650A7354, 0x766A0ABB,	0x81C2C92E, 0x92722C85,
-	0xA2BFE8A1, 0xA81A664B,	0xC24B8B70, 0xC76C51A3,
-	0xD192E819, 0xD6990624,	0xF40E3585, 0x106AA070,
-	0x19A4C116, 0x1E376C08,	0x2748774C, 0x34B0BCB5,
-	0x391C0CB3, 0x4ED8AA4A,	0x5B9CCA4F, 0x682E6FF3,
-	0x748F82EE, 0x78A5636F,	0x84C87814, 0x8CC70208,
-	0x90BEFFFA, 0xA4506CEB,	0xBEF9A3F7, 0xC67178F2
-};
-
-
-#define SALSA(a,b,c,d) { \
-    t = a+d; b^=rotateL(t,  7); \
-    t = b+a; c^=rotateL(t,  9); \
-    t = c+b; d^=rotateL(t, 13); \
-    t = d+c; a^=rotateL(t, 18); \
-}
-
-#define SALSA_CORE(state) { \
-	SALSA(state.s0,state.s4,state.s8,state.sc); \
-	SALSA(state.s5,state.s9,state.sd,state.s1); \
-	SALSA(state.sa,state.se,state.s2,state.s6); \
-	SALSA(state.sf,state.s3,state.s7,state.sb); \
-	SALSA(state.s0,state.s1,state.s2,state.s3); \
-	SALSA(state.s5,state.s6,state.s7,state.s4); \
-	SALSA(state.sa,state.sb,state.s8,state.s9); \
-	SALSA(state.sf,state.sc,state.sd,state.se); \
-}
-
-#if __CUDA_ARCH__ >= 320
-static __device__ __forceinline__ uint16 xor_salsa8(const uint16 &Bx)
-{
-	uint32_t t;
-	uint16 state = Bx;
-	SALSA_CORE(state);
-	SALSA_CORE(state);
-	SALSA_CORE(state);
-	SALSA_CORE(state);
-	return(state+Bx);
-}
-#endif
-
-// sha256
-
-static __device__ __forceinline__ uint32_t bsg2_0(const uint32_t x)
-{
-	uint32_t r1 = ROTR32(x, 2);
-	uint32_t r2 = ROTR32(x, 13);
-	uint32_t r3 = ROTR32(x, 22);
-	return xor3b(r1, r2, r3);
-}
-
-static __device__ __forceinline__ uint32_t bsg2_1(const uint32_t x)
-{
-	uint32_t r1 = ROTR32(x, 6);
-	uint32_t r2 = ROTR32(x, 11);
-	uint32_t r3 = ROTR32(x, 25);
-	return xor3b(r1, r2, r3);
-}
-
-static __device__ __forceinline__ uint32_t ssg2_0(const uint32_t x)
-{
-	uint64_t r1 = ROTR32(x, 7);
-	uint64_t r2 = ROTR32(x, 18);
-	uint64_t r3 = shr_t32(x, 3);
-	return xor3b(r1, r2, r3);
-}
-
-static __device__ __forceinline__ uint32_t ssg2_1(const uint32_t x)
-{
-	uint64_t r1 = ROTR32(x, 17);
-	uint64_t r2 = ROTR32(x, 19);
-	uint64_t r3 = shr_t32(x, 10);
-	return xor3b(r1, r2, r3);
-}
-
-static __device__ __forceinline__ void sha2_step1(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e,
-	const uint32_t f, const uint32_t g, uint32_t &h, const uint32_t in, const uint32_t Kshared)
-{
-	uint32_t t1, t2;
-	uint32_t vxandx = xandx(e, f, g);
-	uint32_t bsg21 = bsg2_1(e);
-	uint32_t bsg20 = bsg2_0(a);
-	uint32_t andorv = andor32(a, b, c);
-
-	t1 = h + bsg21 + vxandx + Kshared + in;
-	t2 = bsg20 + andorv;
-	d = d + t1;
-	h = t1 + t2;
-}
-
-static __device__ __forceinline__ void sha2_step2(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e,
-	const uint32_t f, const uint32_t g, uint32_t &h, uint32_t* in, const uint32_t pc, const uint32_t Kshared)
-{
-	uint32_t t1, t2;
-
-	int pcidx1 = (pc - 2) & 0xF;
-	int pcidx2 = (pc - 7) & 0xF;
-	int pcidx3 = (pc - 15) & 0xF;
-	uint32_t inx0 = in[pc];
-	uint32_t inx1 = in[pcidx1];
-	uint32_t inx2 = in[pcidx2];
-	uint32_t inx3 = in[pcidx3];
-
-	uint32_t ssg21 = ssg2_1(inx1);
-	uint32_t ssg20 = ssg2_0(inx3);
-	uint32_t vxandx = xandx(e, f, g);
-	uint32_t bsg21 = bsg2_1(e);
-	uint32_t bsg20 = bsg2_0(a);
-	uint32_t andorv = andor32(a, b, c);
-
-	in[pc] = ssg21 + inx2 + ssg20 + inx0;
-
-	t1 = h + bsg21 + vxandx + Kshared + in[pc];
-	t2 = bsg20 + andorv;
-	d = d + t1;
-	h = t1 + t2;
-}
-
-static __device__ __forceinline__
-void sha2_round_body(uint32_t* in, uint32_t* r)
-{
-	uint32_t a = r[0];
-	uint32_t b = r[1];
-	uint32_t c = r[2];
-	uint32_t d = r[3];
-	uint32_t e = r[4];
-	uint32_t f = r[5];
-	uint32_t g = r[6];
-	uint32_t h = r[7];
-
-	sha2_step1(a, b, c, d, e, f, g, h, in[0], Ksha[0]);
-	sha2_step1(h, a, b, c, d, e, f, g, in[1], Ksha[1]);
-	sha2_step1(g, h, a, b, c, d, e, f, in[2], Ksha[2]);
-	sha2_step1(f, g, h, a, b, c, d, e, in[3], Ksha[3]);
-	sha2_step1(e, f, g, h, a, b, c, d, in[4], Ksha[4]);
-	sha2_step1(d, e, f, g, h, a, b, c, in[5], Ksha[5]);
-	sha2_step1(c, d, e, f, g, h, a, b, in[6], Ksha[6]);
-	sha2_step1(b, c, d, e, f, g, h, a, in[7], Ksha[7]);
-	sha2_step1(a, b, c, d, e, f, g, h, in[8], Ksha[8]);
-	sha2_step1(h, a, b, c, d, e, f, g, in[9], Ksha[9]);
-	sha2_step1(g, h, a, b, c, d, e, f, in[10], Ksha[10]);
-	sha2_step1(f, g, h, a, b, c, d, e, in[11], Ksha[11]);
-	sha2_step1(e, f, g, h, a, b, c, d, in[12], Ksha[12]);
-	sha2_step1(d, e, f, g, h, a, b, c, in[13], Ksha[13]);
-	sha2_step1(c, d, e, f, g, h, a, b, in[14], Ksha[14]);
-	sha2_step1(b, c, d, e, f, g, h, a, in[15], Ksha[15]);
-
-	#pragma unroll 3
-	for (int i = 0; i<3; i++) {
-
-		sha2_step2(a, b, c, d, e, f, g, h, in, 0, Ksha[16 + 16 * i]);
-		sha2_step2(h, a, b, c, d, e, f, g, in, 1, Ksha[17 + 16 * i]);
-		sha2_step2(g, h, a, b, c, d, e, f, in, 2, Ksha[18 + 16 * i]);
-		sha2_step2(f, g, h, a, b, c, d, e, in, 3, Ksha[19 + 16 * i]);
-		sha2_step2(e, f, g, h, a, b, c, d, in, 4, Ksha[20 + 16 * i]);
-		sha2_step2(d, e, f, g, h, a, b, c, in, 5, Ksha[21 + 16 * i]);
-		sha2_step2(c, d, e, f, g, h, a, b, in, 6, Ksha[22 + 16 * i]);
-		sha2_step2(b, c, d, e, f, g, h, a, in, 7, Ksha[23 + 16 * i]);
-		sha2_step2(a, b, c, d, e, f, g, h, in, 8, Ksha[24 + 16 * i]);
-		sha2_step2(h, a, b, c, d, e, f, g, in, 9, Ksha[25 + 16 * i]);
-		sha2_step2(g, h, a, b, c, d, e, f, in, 10, Ksha[26 + 16 * i]);
-		sha2_step2(f, g, h, a, b, c, d, e, in, 11, Ksha[27 + 16 * i]);
-		sha2_step2(e, f, g, h, a, b, c, d, in, 12, Ksha[28 + 16 * i]);
-		sha2_step2(d, e, f, g, h, a, b, c, in, 13, Ksha[29 + 16 * i]);
-		sha2_step2(c, d, e, f, g, h, a, b, in, 14, Ksha[30 + 16 * i]);
-		sha2_step2(b, c, d, e, f, g, h, a, in, 15, Ksha[31 + 16 * i]);
-
-	}
-
-	r[0] += a;
-	r[1] += b;
-	r[2] += c;
-	r[3] += d;
-	r[4] += e;
-	r[5] += f;
-	r[6] += g;
-	r[7] += h;
-}
-
-static __device__ __forceinline__ uint8 sha256_64(uint32_t *data)
-{
-	uint32_t __align__(64) in[16];
-	uint32_t __align__(32) buf[8];
-
-	((uint16 *)in)[0] = swapvec((uint16*)data);
-
-	((uint8*)buf)[0] = ((uint8*)H256)[0];
-
-	sha2_round_body(in, buf);
-
-	#pragma unroll 14
-	for (int i = 0; i<14; i++) { in[i + 1] = 0; }
-
-	in[0] = 0x80000000;
-	in[15] = 0x200;
-
-	sha2_round_body(in, buf);
-	return swapvec((uint8*)buf);
-}
-
-static __device__ __forceinline__ uint8 sha256_80(uint32_t nonce)
-{
-	uint32_t __align__(64) in[16];
-	uint32_t __align__(32) buf[8];
-
-	((uint16 *)in)[0] = swapvec((uint16*)c_data);
-	((uint8*)buf)[0] = ((uint8*)H256)[0];
-
-	sha2_round_body(in, buf);
-
-	#pragma unroll 3
-	for (int i = 0; i<3; i++) { in[i] = cuda_swab32(c_data[i + 16]); }
-
-//	in[3] = cuda_swab32(nonce);
-	in[3] = nonce;
-	in[4] = 0x80000000;
-	in[15] = 0x280;
-
-	#pragma unroll
-	for (int i = 5; i<15; i++) { in[i] = 0; }
-
-	sha2_round_body(in, buf);
-	return swapvec((uint8*)buf);
-}
-
-// Pluck Factor 128
-#define SHIFT (1024 * 128)
-
-__global__ __launch_bounds__(256, 1)
-void pluck_gpu_hash0_v50(uint32_t threads, uint32_t startNonce)
-{
-	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
-	if (thread < threads)
-	{
-#if __CUDA_ARCH__ >= 320
-		const uint32_t nonce = startNonce + thread;
-		uint32_t shift = SHIFT * thread;
-		((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
-		((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
-		for (int i = 2; i < 5; i++)
-		{
-			uint32_t randmax = i * 32 - 4;
-			uint32_t randseed[16];
-			uint32_t randbuffer[16];
-			uint32_t joint[16];
-
-			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
-			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
-
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			((uint8*)joint)[0] = ((uint8*)randseed)[1];
-
-			#pragma unroll
-			for (int j = 0; j < 8; j++) {
-				uint32_t rand = randbuffer[j] % (randmax - 32);
-				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
-			}
-
-			uint8 truc = sha256_64(joint);
-			((uint8*)(hashbuffer + shift))[i] = truc;
-			((uint8*)randseed)[0] = ((uint8*)joint)[0];
-			((uint8*)randseed)[1] = truc;
-
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			for (int j = 0; j < 32; j += 2)
-			{
-				uint32_t rand = randbuffer[j / 2] % randmax;
-				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
-				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
-				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
-				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
-			}
-		} // main loop
-#endif
-	}
-}
-
-__global__ __launch_bounds__(256, 1)
-void pluck_gpu_hash_v50(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector)
-{
-	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
-	if (thread < threads)
-	{
-#if __CUDA_ARCH__ >= 320
-		const uint32_t nonce = startNonce + thread;
-		uint32_t shift = SHIFT * thread;
-
-		for (int i = 5; i < HASH_MEMORY - 1; i++)
-		{
-			uint32_t randmax = i*32-4;
-			uint32_t randseed[16];
-			uint32_t randbuffer[16];
-			uint32_t joint[16];
-			uint8 Buffbuffer[2];
-
-			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32*i-64]);
-			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32*i-32]);
-
-
-			Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32*i - 128]);
-			Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32*i - 96]);
-
-			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
-			((uint16*)randbuffer)[0]= xor_salsa8(((uint16*)randseed)[0]);
-			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i-1)<<5]);
-
-			#pragma unroll
-			for (int j = 0; j < 8; j++) {
-				uint32_t rand = randbuffer[j] % (randmax - 32);
-				joint[j+8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
-			}
-
-			uint8 truc =  sha256_64(joint);
-			((uint8*)(hashbuffer + shift))[i] = truc;
-			((uint8*)randseed)[0] = ((uint8*)joint)[0];
-			((uint8*)randseed)[1] = truc;
-
-			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			for (int j = 0; j < 32; j += 2)
-			{
-				uint32_t rand = randbuffer[j / 2] % randmax;
-
-				(hashbuffer+shift)[rand] =       __ldg(&(hashbuffer+shift)[randmax+j]);
-				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
-				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
-				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
-			}
-
-		} // main loop
-
-		uint32_t outbuf =  __ldgtoint(&(hashbuffer + shift)[28]);
-
-		if (outbuf <= pTarget[7]) {
-			nonceVector[0] = nonce;
-		}
-#endif
-	}
-}
-
-__global__ __launch_bounds__(128, 3)
-void pluck_gpu_hash0(uint32_t threads, uint32_t startNonce)
-{
-	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
-	if (thread < threads)
-	{
-#if __CUDA_ARCH__ >= 320
-		const uint32_t nonce = startNonce + thread;
-
-		uint32_t shift = SHIFT * thread;
-		((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
-		((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
-		for (int i = 2; i < 5; i++)
-		{
-			uint32_t randmax = i * 32 - 4;
-			uint32_t randseed[16];
-			uint32_t randbuffer[16];
-			uint32_t joint[16];
-
-			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
-			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
-
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			((uint8*)joint)[0] = ((uint8*)randseed)[1];
-
-			#pragma unroll
-			for (int j = 0; j < 8; j++) {
-				uint32_t rand = randbuffer[j] % (randmax - 32);
-				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
-			}
-
-			uint8 truc = sha256_64(joint);
-			((uint8*)(hashbuffer + shift))[i] = truc;
-			((uint8*)randseed)[0] = ((uint8*)joint)[0];
-			((uint8*)randseed)[1] = truc;
-
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			for (int j = 0; j < 32; j += 2)
-			{
-				uint32_t rand = randbuffer[j / 2] % randmax;
-				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
-				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
-				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
-				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
-			}
-		} // main loop
-#endif
-	}
-}
-
-__global__ __launch_bounds__(128, 3)
-void pluck_gpu_hash(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector)
-{
-	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
-	if (thread < threads)
-	{
-#if __CUDA_ARCH__ >= 320
-		const uint32_t nonce = startNonce + thread;
-
-		uint32_t shift = SHIFT * thread;
-
-		for (int i = 5; i < HASH_MEMORY - 1; i++)
-		{
-			uint32_t randmax = i * 32 - 4;
-			uint32_t randseed[16];
-			uint32_t randbuffer[16];
-			uint32_t joint[16];
-			uint8 Buffbuffer[2];
-
-			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
-			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
-
-
-			Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32 * i - 128]);
-			Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32 * i - 96]);
-			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
-
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-
-			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]);
-
-			#pragma unroll
-			for (int j = 0; j < 8; j++)
-			{
-				uint32_t rand = randbuffer[j] % (randmax - 32);
-				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
-			}
-
-			uint8 truc = sha256_64(joint);
-			((uint8*)(hashbuffer + shift))[i] = truc;
-			((uint8*)randseed)[0] = ((uint8*)joint)[0];
-			((uint8*)randseed)[1] = truc;
-
-
-			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
-			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
-			for (int j = 0; j < 32; j += 2)
-			{
-				uint32_t rand = randbuffer[j / 2] % randmax;
-
-				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
-				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
-				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
-				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
-			}
-		} // main loop
-
-		uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]);
-
-		if (outbuf <= pTarget[7]) {
-			nonceVector[0] = nonce;
-		}
-#endif
-	}
-}
-
-void pluck_cpu_init(int thr_id, uint32_t threads, uint32_t* hash)
-{
-	cuda_get_arch(thr_id);
-	cudaMemcpyToSymbol(hashbuffer, &hash, sizeof(hash), 0, cudaMemcpyHostToDevice);
-	cudaMalloc(&d_PlNonce[thr_id], sizeof(uint32_t));
-}
-
-__host__
-uint32_t pluck_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce,  int order)
-{
-	uint32_t result[8] = { 0xffffffff };
-	cudaMemset(d_PlNonce[thr_id], 0xff, sizeof(uint32_t));
-
-	const uint32_t threadsperblock = 128;
-
-	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
-	dim3 block(threadsperblock);
-	dim3 grid50((threads + 256 - 1) / 256);
-	dim3 block50(256);
-
-	if (device_sm[device_map[thr_id]] <= 300) {
-		applog(LOG_ERR,"Sorry pluck not supported on SM 3.0 devices");
-		return 0;
-	} else if (device_sm[device_map[thr_id]] >= 500) {
-		pluck_gpu_hash0_v50 <<< grid50, block50 >>>(threads, startNounce);
-		pluck_gpu_hash_v50  <<< grid50, block50 >>>(threads, startNounce, d_PlNonce[thr_id]);
-	} else {
-		pluck_gpu_hash0 <<< grid, block >>>(threads, startNounce);
-		pluck_gpu_hash <<< grid, block >>>(threads, startNounce, d_PlNonce[thr_id]);
-	}
-
-	//MyStreamSynchronize(NULL, order, thr_id);
-	CUDA_SAFE_CALL(cudaThreadSynchronize());
-	cudaMemcpy(&result[thr_id], d_PlNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
-
-	return result[thr_id];
-}
-
-__host__
-void pluck_setBlockTarget(const void *pdata, const void *ptarget)
-{
-	unsigned char PaddedMessage[80];
-	memcpy(PaddedMessage, pdata, 80);
-
-	cudaMemcpyToSymbol(c_data, PaddedMessage, 80, 0, cudaMemcpyHostToDevice);
-	cudaMemcpyToSymbol(pTarget, ptarget, 32, 0, cudaMemcpyHostToDevice);
-}
--- a/pluck/pluck.cu
+++ b/pluck/pluck.cu
@ -1,240 +0,0 @@
-/* Based on djm code */
-
-#include <stdint.h>
-
-#include "miner.h"
-#include "cuda_helper.h"
-
-#include <openssl/sha.h>
-
-static uint32_t *d_hash[MAX_GPUS] ;
-
-extern void pluck_setBlockTarget(const void* data, const void *ptarget);
-extern void pluck_cpu_init(int thr_id, uint32_t threads, uint32_t *d_outputHash);
-extern uint32_t pluck_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, int order);
-
-extern float tp_coef[MAX_GPUS];
-
-#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
-//note, this is 64 bytes
-static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
-{
-#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
-	uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
-	int i;
-
-	x00 = (B[0] ^= Bx[0]);
-	x01 = (B[1] ^= Bx[1]);
-	x02 = (B[2] ^= Bx[2]);
-	x03 = (B[3] ^= Bx[3]);
-	x04 = (B[4] ^= Bx[4]);
-	x05 = (B[5] ^= Bx[5]);
-	x06 = (B[6] ^= Bx[6]);
-	x07 = (B[7] ^= Bx[7]);
-	x08 = (B[8] ^= Bx[8]);
-	x09 = (B[9] ^= Bx[9]);
-	x10 = (B[10] ^= Bx[10]);
-	x11 = (B[11] ^= Bx[11]);
-	x12 = (B[12] ^= Bx[12]);
-	x13 = (B[13] ^= Bx[13]);
-	x14 = (B[14] ^= Bx[14]);
-	x15 = (B[15] ^= Bx[15]);
-	for (i = 0; i < 8; i += 2) {
-		/* Operate on columns. */
-		x04 ^= ROTL(x00 + x12, 7);  x09 ^= ROTL(x05 + x01, 7);
-		x14 ^= ROTL(x10 + x06, 7);  x03 ^= ROTL(x15 + x11, 7);
-
-		x08 ^= ROTL(x04 + x00, 9);  x13 ^= ROTL(x09 + x05, 9);
-		x02 ^= ROTL(x14 + x10, 9);  x07 ^= ROTL(x03 + x15, 9);
-
-		x12 ^= ROTL(x08 + x04, 13);  x01 ^= ROTL(x13 + x09, 13);
-		x06 ^= ROTL(x02 + x14, 13);  x11 ^= ROTL(x07 + x03, 13);
-
-		x00 ^= ROTL(x12 + x08, 18);  x05 ^= ROTL(x01 + x13, 18);
-		x10 ^= ROTL(x06 + x02, 18);  x15 ^= ROTL(x11 + x07, 18);
-
-		/* Operate on rows. */
-		x01 ^= ROTL(x00 + x03, 7);  x06 ^= ROTL(x05 + x04, 7);
-		x11 ^= ROTL(x10 + x09, 7);  x12 ^= ROTL(x15 + x14, 7);
-
-		x02 ^= ROTL(x01 + x00, 9);  x07 ^= ROTL(x06 + x05, 9);
-		x08 ^= ROTL(x11 + x10, 9);  x13 ^= ROTL(x12 + x15, 9);
-
-		x03 ^= ROTL(x02 + x01, 13);  x04 ^= ROTL(x07 + x06, 13);
-		x09 ^= ROTL(x08 + x11, 13);  x14 ^= ROTL(x13 + x12, 13);
-
-		x00 ^= ROTL(x03 + x02, 18);  x05 ^= ROTL(x04 + x07, 18);
-		x10 ^= ROTL(x09 + x08, 18);  x15 ^= ROTL(x14 + x13, 18);
-	}
-	B[0] += x00;
-	B[1] += x01;
-	B[2] += x02;
-	B[3] += x03;
-	B[4] += x04;
-	B[5] += x05;
-	B[6] += x06;
-	B[7] += x07;
-	B[8] += x08;
-	B[9] += x09;
-	B[10] += x10;
-	B[11] += x11;
-	B[12] += x12;
-	B[13] += x13;
-	B[14] += x14;
-	B[15] += x15;
-#undef ROTL
-}
-
-static void sha256_hash(uchar *hash, const uchar *data, int len)
-{
-	SHA256_CTX ctx;
-	SHA256_Init(&ctx);
-	SHA256_Update(&ctx, data, len);
-	SHA256_Final(hash, &ctx);
-}
-
-// hash exactly 64 bytes (ie, sha256 block size)
-static void sha256_hash512(uint32_t *hash, const uint32_t *data)
-{
-	uint32_t _ALIGN(64) S[16];
-	uint32_t _ALIGN(64) T[16];
-	uchar _ALIGN(64) E[64] = { 0 };
-	int i;
-
-	sha256_init(S);
-
-	for (i = 0; i < 16; i++)
-		T[i] = be32dec(&data[i]);
-	sha256_transform(S, T, 0);
-
-	E[3] = 0x80;
-	E[61] = 0x02; // T[15] = 8 * 64 => 0x200;
-	sha256_transform(S, (uint32_t*)E, 0);
-
-	for (i = 0; i < 8; i++)
-		be32enc(&hash[i], S[i]);
-}
-
-#define BLOCK_HEADER_SIZE 80
-void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const int N)
-{
-	int size = N * 1024;
-	sha256_hash(hashbuffer, (uchar*)data, BLOCK_HEADER_SIZE);
-	memset(&hashbuffer[32], 0, 32);
-
-	for (int i = 64; i < size - 32; i += 32)
-	{
-		uint32_t _ALIGN(64) randseed[16];
-		uint32_t _ALIGN(64) randbuffer[16];
-		uint32_t _ALIGN(64) joint[16];
-		//i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area
-		//we could use size here, but then it's probable to use 0 as the value in most cases
-		int randmax = i - 4;
-
-		//setup randbuffer to be an array of random indexes
-		memcpy(randseed, &hashbuffer[i - 64], 64);
-
-		if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
-		else memset(randbuffer, 0, 64);
-
-		xor_salsa8((uint32_t*)randbuffer, (uint32_t*)randseed);
-		memcpy(joint, &hashbuffer[i - 32], 32);
-
-		//use the last hash value as the seed
-		for (int j = 32; j < 64; j += 4)
-		{
-			//every other time, change to next random index
-			//randmax - 32 as otherwise we go beyond memory that's already been written to
-			uint32_t rand = randbuffer[(j - 32) >> 2] % (randmax - 32);
-			joint[j >> 2] = *((uint32_t *)&hashbuffer[rand]);
-		}
-
-		sha256_hash512((uint32_t*)&hashbuffer[i], joint);
-
-		//setup randbuffer to be an array of random indexes
-		//use last hash value and previous hash value(post-mixing)
-		memcpy(randseed, &hashbuffer[i - 32], 64);
-
-		if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
-		else memset(randbuffer, 0, 64);
-
-		xor_salsa8((uint32_t*)randbuffer, (uint32_t*)randseed);
-
-		//use the last hash value as the seed
-		for (int j = 0; j < 32; j += 2)
-		{
-			uint32_t rand = randbuffer[j >> 1] % randmax;
-			*((uint32_t *)(hashbuffer + rand)) = *((uint32_t *)(hashbuffer + j + randmax));
-		}
-	}
-
-	memcpy(hash, hashbuffer, 32);
-}
-
-static bool init[MAX_GPUS] = { 0 };
-
-static __thread uchar* scratchbuf = NULL;
-
-extern "C" int scanhash_pluck(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
-	uint32_t max_nonce, unsigned long *hashes_done)
-{
-	const uint32_t first_nonce = pdata[19];
-	uint32_t endiandata[20];
-	int opt_pluck_n = 128;
-
-	int intensity = is_windows() ? 17 : 19; /* beware > 20 could work and create diff problems later */
-	uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);
-	// divide by 128 for this algo which require a lot of memory
-	throughput = throughput / 128 - 256;
-	throughput = min(throughput, max_nonce - first_nonce + 1);
-
-	if (opt_benchmark)
-		((uint32_t*)ptarget)[7] = 0x0000ff;
-
-	if (!init[thr_id])
-	{
-		cudaSetDevice(device_map[thr_id]);
-		//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		//cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
-		cudaMalloc(&d_hash[thr_id], opt_pluck_n * 1024 * throughput);
-
-		if (!scratchbuf)
-			scratchbuf = (uchar*) calloc(opt_pluck_n, 1024);
-
-		pluck_cpu_init(thr_id, throughput, d_hash[thr_id]);
-
-		CUDA_SAFE_CALL(cudaGetLastError());
-		applog(LOG_INFO, "Using %d cuda threads", throughput);
-
-		init[thr_id] = true;
-	}
-
-	for (int k = 0; k < 20; k++)
-		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
-
-	pluck_setBlockTarget(endiandata,ptarget);
-
-	do {
-		uint32_t foundNonce = pluck_cpu_hash(thr_id, throughput, pdata[19], 0);
-		if (foundNonce != UINT32_MAX)
-		{
-			const uint32_t Htarg = ptarget[7];
-			uint32_t vhash64[8];
-			be32enc(&endiandata[19], foundNonce);
-			pluckhash(vhash64, endiandata, scratchbuf, opt_pluck_n);
-			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
-				*hashes_done = pdata[19] - first_nonce + throughput;
-				pdata[19] = foundNonce;
-				return 1;
-			} else {
-				applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
-			}
-		}
-
-		pdata[19] += throughput;
-
-	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
-
-	*hashes_done = pdata[19] - first_nonce;
-	return 0;
-}
--- a/util.cpp
+++ b/util.cpp
@ -1853,9 +1853,6 @@ void print_hash_tests(void)
 	pentablakehash(&hash[0], &buf[0]);
 	printpfx("pentablake", hash);

-	pluckhash((uint32_t*)&hash[0], (uint32_t*)&buf[0], scratchbuf, 128);
-	printpfx("pluck", hash);
-
 	quarkhash(&hash[0], &buf[0]);
 	printpfx("quark", hash);