Added Pluck algo

10 years ago · 8cb4fc9d6e
11 changed files with 1299 additions and 204 deletions
--- a/AUTHORS.md
+++ b/AUTHORS.md
@ -53,6 +53,7 @@ updated by many others.
				@@ -53,6 +53,7 @@ updated by many others.
 * Perry Huang <perryh>
 * Joseph Bruggeman <jbruggeman>
 * Badman74 <badman74>
+* djm34 <djm4>

 ...and many others. See:

--- a/Makefile.am
+++ b/Makefile.am
@ -70,6 +70,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
				@@ -70,6 +70,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
 sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
 sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
 sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h
+sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h

 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl

--- a/algorithm.c
+++ b/algorithm.c
@ -33,6 +33,7 @@
				@@ -33,6 +33,7 @@
 #include "algorithm/neoscrypt.h"
 #include "algorithm/whirlpoolx.h"
 #include "algorithm/lyra2re.h"
+#include "algorithm/pluck.h"

 #include "compat.h"

@ -56,7 +57,8 @@ const char *algorithm_type_str[] = {
				@@ -56,7 +57,8 @@ const char *algorithm_type_str[] = {
  "Whirlcoin",
  "Neoscrypt",
  "WhirlpoolX",
-  "Lyra2RE"
+  "Lyra2RE",
+  "Pluck"
 };

 void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
@ -699,6 +701,25 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
				@@ -699,6 +701,25 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
  return status;
 }

+static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+  cl_kernel *kernel = &clState->kernel;
+  unsigned int num = 0;
+  cl_uint le_target;
+  cl_int status = 0;
+
+  le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
+  flip80(clState->cldata, blk->work->data);
+  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
+
+  CL_SET_ARG(clState->CLbuffer0);
+  CL_SET_ARG(clState->outputBuffer);
+  CL_SET_ARG(clState->padbuffer8);
+  CL_SET_ARG(le_target);
+
+  return status;
+}
+
 typedef struct _algorithm_settings_t {
  const char *name; /* Human-readable identifier */
  algorithm_type_t type; //common algorithm type
@ -738,6 +759,10 @@ static algorithm_settings_t algos[] = {
				@@ -738,6 +759,10 @@ static algorithm_settings_t algos[] = {
  A_NEOSCRYPT("neoscrypt"),
 #undef A_NEOSCRYPT

+#define A_PLUCK(a) \
+  { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options }
+  A_PLUCK("pluck"),
+#undef A_PLUCK
  // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
 #define A_QUARK(a, b) \
  { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options }
--- a/algorithm.h
+++ b/algorithm.h
@ -26,7 +26,8 @@ typedef enum {
				@@ -26,7 +26,8 @@ typedef enum {
  ALGO_FRESH,
  ALGO_WHIRL,
  ALGO_NEOSCRYPT,
-  ALGO_LYRA2RE
+  ALGO_LYRA2RE,
+  ALGO_PLUCK
 } algorithm_type_t;

 extern const char *algorithm_type_str[];
--- a/algorithm/pluck.c
+++ b/algorithm/pluck.c
@ -0,0 +1,482 @@
				@@ -0,0 +1,482 @@
+/*-
+ * Copyright 2014 James Lovejoy
+ * Copyright 2014 phm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+static const uint32_t sha256_h[8] = {
+	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+static const uint32_t sha256_k[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+void sha256_init(uint32_t *state)
+{
+	memcpy(state, sha256_h, 32);
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z)     ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z)    ((x & (y | z)) | (y & z))
+#define ROTR(x, n)      ((x >> n) | (x << (32 - n)))
+#define S0(x)           (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x)           (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x)           (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
+#define s1(x)           (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+	do { \
+		t0 = h + S1(e) + Ch(e, f, g) + k; \
+		t1 = S0(a) + Maj(a, b, c); \
+		d += t0; \
+		h  = t0 + t1; \
+		} while (0)
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i) \
+	RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+	    S[(66 - i) % 8], S[(67 - i) % 8], \
+	    S[(68 - i) % 8], S[(69 - i) % 8], \
+	    S[(70 - i) % 8], S[(71 - i) % 8], \
+	    W[i] + sha256_k[i])
+
+
+/*
+* SHA256 block compression function.  The 256-bit state is transformed via
+* the 512-bit input block to produce a new state.
+*/
+void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
+{
+	uint32_t W[64];
+	uint32_t S[8];
+	uint32_t t0, t1;
+	int i;
+
+	/* 1. Prepare message schedule W. */
+	if (swap) {
+		for (i = 0; i < 16; i++)
+			W[i] = swab32(block[i]);
+	}
+	else
+		memcpy(W, block, 64);
+	for (i = 16; i < 64; i += 2) {
+		W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+		W[i + 1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+	}
+
+	/* 2. Initialize working variables. */
+	memcpy(S, state, 32);
+
+	/* 3. Mix. */
+	RNDr(S, W, 0);
+	RNDr(S, W, 1);
+	RNDr(S, W, 2);
+	RNDr(S, W, 3);
+	RNDr(S, W, 4);
+	RNDr(S, W, 5);
+	RNDr(S, W, 6);
+	RNDr(S, W, 7);
+	RNDr(S, W, 8);
+	RNDr(S, W, 9);
+	RNDr(S, W, 10);
+	RNDr(S, W, 11);
+	RNDr(S, W, 12);
+	RNDr(S, W, 13);
+	RNDr(S, W, 14);
+	RNDr(S, W, 15);
+	RNDr(S, W, 16);
+	RNDr(S, W, 17);
+	RNDr(S, W, 18);
+	RNDr(S, W, 19);
+	RNDr(S, W, 20);
+	RNDr(S, W, 21);
+	RNDr(S, W, 22);
+	RNDr(S, W, 23);
+	RNDr(S, W, 24);
+	RNDr(S, W, 25);
+	RNDr(S, W, 26);
+	RNDr(S, W, 27);
+	RNDr(S, W, 28);
+	RNDr(S, W, 29);
+	RNDr(S, W, 30);
+	RNDr(S, W, 31);
+	RNDr(S, W, 32);
+	RNDr(S, W, 33);
+	RNDr(S, W, 34);
+	RNDr(S, W, 35);
+	RNDr(S, W, 36);
+	RNDr(S, W, 37);
+	RNDr(S, W, 38);
+	RNDr(S, W, 39);
+	RNDr(S, W, 40);
+	RNDr(S, W, 41);
+	RNDr(S, W, 42);
+	RNDr(S, W, 43);
+	RNDr(S, W, 44);
+	RNDr(S, W, 45);
+	RNDr(S, W, 46);
+	RNDr(S, W, 47);
+	RNDr(S, W, 48);
+	RNDr(S, W, 49);
+	RNDr(S, W, 50);
+	RNDr(S, W, 51);
+	RNDr(S, W, 52);
+	RNDr(S, W, 53);
+	RNDr(S, W, 54);
+	RNDr(S, W, 55);
+	RNDr(S, W, 56);
+	RNDr(S, W, 57);
+	RNDr(S, W, 58);
+	RNDr(S, W, 59);
+	RNDr(S, W, 60);
+	RNDr(S, W, 61);
+	RNDr(S, W, 62);
+	RNDr(S, W, 63);
+
+	/* 4. Mix local working variables into global state */
+	for (i = 0; i < 8; i++)
+		state[i] += S[i];
+}
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+	uint32_t i;
+
+	for (i = 0; i < len; i++)
+		dst[i] = htobe32(src[i]);
+}
+static inline void be32enc(void *pp, uint32_t x)
+{
+	uint8_t *p = (uint8_t *)pp;
+	p[3] = x & 0xff;
+	p[2] = (x >> 8) & 0xff;
+	p[1] = (x >> 16) & 0xff;
+	p[0] = (x >> 24) & 0xff;
+}
+static inline uint32_t be32dec(const void *pp)
+{
+	const uint8_t *p = (uint8_t const *)pp;
+	return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
+		((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
+}
+#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+//note, this is 64 bytes
+static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
+{
+#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+	uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
+	int i;
+
+	x00 = (B[0] ^= Bx[0]);
+	x01 = (B[1] ^= Bx[1]);
+	x02 = (B[2] ^= Bx[2]);
+	x03 = (B[3] ^= Bx[3]);
+	x04 = (B[4] ^= Bx[4]);
+	x05 = (B[5] ^= Bx[5]);
+	x06 = (B[6] ^= Bx[6]);
+	x07 = (B[7] ^= Bx[7]);
+	x08 = (B[8] ^= Bx[8]);
+	x09 = (B[9] ^= Bx[9]);
+	x10 = (B[10] ^= Bx[10]);
+	x11 = (B[11] ^= Bx[11]);
+	x12 = (B[12] ^= Bx[12]);
+	x13 = (B[13] ^= Bx[13]);
+	x14 = (B[14] ^= Bx[14]);
+	x15 = (B[15] ^= Bx[15]);
+	for (i = 0; i < 8; i += 2) {
+		/* Operate on columns. */
+		x04 ^= ROTL(x00 + x12, 7);  x09 ^= ROTL(x05 + x01, 7);
+		x14 ^= ROTL(x10 + x06, 7);  x03 ^= ROTL(x15 + x11, 7);
+
+		x08 ^= ROTL(x04 + x00, 9);  x13 ^= ROTL(x09 + x05, 9);
+		x02 ^= ROTL(x14 + x10, 9);  x07 ^= ROTL(x03 + x15, 9);
+
+		x12 ^= ROTL(x08 + x04, 13);  x01 ^= ROTL(x13 + x09, 13);
+		x06 ^= ROTL(x02 + x14, 13);  x11 ^= ROTL(x07 + x03, 13);
+
+		x00 ^= ROTL(x12 + x08, 18);  x05 ^= ROTL(x01 + x13, 18);
+		x10 ^= ROTL(x06 + x02, 18);  x15 ^= ROTL(x11 + x07, 18);
+
+		/* Operate on rows. */
+		x01 ^= ROTL(x00 + x03, 7);  x06 ^= ROTL(x05 + x04, 7);
+		x11 ^= ROTL(x10 + x09, 7);  x12 ^= ROTL(x15 + x14, 7);
+
+		x02 ^= ROTL(x01 + x00, 9);  x07 ^= ROTL(x06 + x05, 9);
+		x08 ^= ROTL(x11 + x10, 9);  x13 ^= ROTL(x12 + x15, 9);
+
+		x03 ^= ROTL(x02 + x01, 13);  x04 ^= ROTL(x07 + x06, 13);
+		x09 ^= ROTL(x08 + x11, 13);  x14 ^= ROTL(x13 + x12, 13);
+
+		x00 ^= ROTL(x03 + x02, 18);  x05 ^= ROTL(x04 + x07, 18);
+		x10 ^= ROTL(x09 + x08, 18);  x15 ^= ROTL(x14 + x13, 18);
+	}
+	B[0] += x00;
+	B[1] += x01;
+	B[2] += x02;
+	B[3] += x03;
+	B[4] += x04;
+	B[5] += x05;
+	B[6] += x06;
+	B[7] += x07;
+	B[8] += x08;
+	B[9] += x09;
+	B[10] += x10;
+	B[11] += x11;
+	B[12] += x12;
+	B[13] += x13;
+	B[14] += x14;
+	B[15] += x15;
+#undef ROTL
+}
+
+void sha256_hash(unsigned char *hash, const unsigned char *data, int len)
+{
+	uint32_t S[16], T[16];
+	int i, r;
+
+	sha256_init(S);
+	for (r = len; r > -9; r -= 64) {
+		if (r < 64)
+			memset(T, 0, 64);
+		memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
+		if (r >= 0 && r < 64)
+			((unsigned char *)T)[r] = 0x80;
+		for (i = 0; i < 16; i++)
+			T[i] = be32dec(T + i);
+
+		if (r < 56)
+			T[15] = 8 * len;
+		sha256_transform(S, T, 0);
+	}
+	for (i = 0; i < 8; i++)
+		be32enc((uint32_t *)hash + i, S[i]);
+}
+
+void sha256_hash512(unsigned char *hash, const unsigned char *data)
+{
+	uint32_t S[16], T[16];
+	int i;
+
+	sha256_init(S);
+
+	memcpy(T, data, 64);
+	
+	for (i = 0; i < 16; i++)
+		T[i] = be32dec(T + i);
+	sha256_transform(S, T, 0);
+
+	memset(T, 0, 64);
+	//memcpy(T, data + 64, 0);
+	((unsigned char *)T)[0] = 0x80;
+	for (i = 0; i < 16; i++)
+		T[i] = be32dec(T + i);
+	T[15] = 8 * 64;
+	sha256_transform(S, T, 0);
+
+	for (i = 0; i < 8; i++)
+		be32enc((uint32_t *)hash + i, S[i]);
+}
+
+inline void pluckrehash(void *state, const void *input)
+{
+
+	int i,j;
+	uint32_t data[20];
+	
+	const int HASH_MEMORY = 128 * 1024;
+	uint8_t * scratchbuf = (uint8_t*)malloc(HASH_MEMORY);
+	memcpy(data,input,80);
+
+	uint8_t hashbuffer[128*1024]; //don't allocate this on stack, since it's huge.. 
+	int size = HASH_MEMORY;
+	memset(hashbuffer, 0, 64);
+	sha256_hash(&hashbuffer[0], (uint8_t*)data, 80);
+	for (i = 64; i < size - 32; i += 32)
+	{
+		int randmax = i - 4; //we could use size here, but then it's probable to use 0 as the value in most cases
+		uint32_t joint[16];
+		uint32_t randbuffer[16];
+
+		uint32_t randseed[16];
+		memcpy(randseed, &hashbuffer[i - 64], 64);
+		if (i>128)
+		{
+			memcpy(randbuffer, &hashbuffer[i - 128], 64);
+		}
+		else
+		{
+			memset(&randbuffer, 0, 64);
+		}
+
+		xor_salsa8(randbuffer, randseed);
+
+		memcpy(joint, &hashbuffer[i - 32], 32);
+		//use the last hash value as the seed
+		for (j = 32; j < 64; j += 4)
+		{
+			uint32_t rand = randbuffer[(j - 32) / 4] % (randmax - 32); 
+			joint[j / 4] = *((uint32_t*)&hashbuffer[rand]);
+			
+		}
+		sha256_hash512(&hashbuffer[i], (uint8_t*)joint);
+		
+		memcpy(randseed, &hashbuffer[i - 32], 64); 
+		if (i>128)
+		{
+			memcpy(randbuffer, &hashbuffer[i - 128], 64);
+		}
+		else
+		{
+			memset(randbuffer, 0, 64);
+		}
+		xor_salsa8(randbuffer, randseed);
+		for (j = 0; j < 32; j += 2)
+		{
+			uint32_t rand = randbuffer[j / 2] % randmax;
+			*((uint32_t*)&hashbuffer[rand]) = *((uint32_t*)&hashbuffer[j + i - 4]);
+		}
+	}
+
+	
+	//printf("cpu hashbuffer %08x nonce %08x\n", ((uint32_t*)hashbuffer)[7],data[19]);
+		
+	memcpy(state, hashbuffer, 32);
+}
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+
+/* Used externally as confirmation of correct OCL code */
+int pluck_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[20], ohash[8];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = htobe32(nonce);
+	pluckrehash(ohash, data);
+
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+		(long unsigned int)Htarg,
+		(long unsigned int)diff1targ,
+		(long unsigned int)tmp_hash7);
+
+	if (tmp_hash7 > diff1targ)
+		return -1;
+
+	if (tmp_hash7 > Htarg)
+		return 0;
+
+	return 1;
+}
+
+void pluck_regenhash(struct work *work)
+{
+        uint32_t data[20];
+        uint32_t *nonce = (uint32_t *)(work->data + 76);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        be32enc_vect(data, (const uint32_t *)work->data, 19);
+        data[19] = htobe32(*nonce);	
+
+        pluckrehash(ohash, data);
+}
+
+
+bool scanhash_pluck(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+	unsigned char *pdata, unsigned char __maybe_unused *phash1,
+	unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	while (1)
+	{
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[19] = (n);
+		pluckrehash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg))
+		{
+			((uint32_t *)pdata)[19] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart))
+		{
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
--- a/algorithm/pluck.h
+++ b/algorithm/pluck.h
@ -0,0 +1,10 @@
				@@ -0,0 +1,10 @@
+#ifndef PLUCK_H
+#define PLUCK_H
+
+#include "miner.h"
+#define PLUCK_SCRATCHBUF_SIZE (128 * 1024)
+extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget,
+			uint32_t nonce);
+extern void pluck_regenhash(struct work *work);
+
+#endif /* PLUCK_H */
--- a/kernel/pluck.cl
+++ b/kernel/pluck.cl
@ -0,0 +1,463 @@
				@@ -0,0 +1,463 @@
+/*
+* "pluck" kernel implementation.
+*
+* ==========================(LICENSE BEGIN)============================
+*
+* Copyright (c) 2015  djm34
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+* ===========================(LICENSE END)=============================
+*
+* @author   djm34
+*/
+#if !defined(cl_khr_byte_addressable_store)
+#error "Device does not support unaligned stores"
+#endif
+#define ROL32(x, n)  rotate(x, (uint) n)
+//#define ROL32(x, n)   (((x) << (n)) | ((x) >> (32 - (n))))
+#define HASH_MEMORY 4096
+
+
+#define SALSA(a,b,c,d) do { \
+    t =a+d; b^=rotate(t,  7U);    \
+    t =b+a; c^=rotate(t,  9U);    \
+    t =c+b; d^=rotate(t, 13U);    \
+    t =d+c; a^=rotate(t, 18U);     \
+} while(0)
+
+
+#define SALSA_CORE(state) do { \
+\
+SALSA(state.s0,state.s4,state.s8,state.sc); \
+SALSA(state.s5,state.s9,state.sd,state.s1); \
+SALSA(state.sa,state.se,state.s2,state.s6); \
+SALSA(state.sf,state.s3,state.s7,state.sb); \
+SALSA(state.s0,state.s1,state.s2,state.s3); \
+SALSA(state.s5,state.s6,state.s7,state.s4); \
+SALSA(state.sa,state.sb,state.s8,state.s9); \
+SALSA(state.sf,state.sc,state.sd,state.se); \
+	} while(0)
+
+/*
+#define SALSA_CORE(state)	do { \
+	state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \
+	state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \
+	state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \
+	state.s3 ^= rotate(state.sf + state.sb, 7U); state.s7 ^= rotate(state.s3 + state.sf, 9U); state.sb ^= rotate(state.s7 + state.s3, 13U); state.sf ^= rotate(state.sb + state.s7, 18U); \
+	state.s1 ^= rotate(state.s0 + state.s3, 7U); state.s2 ^= rotate(state.s1 + state.s0, 9U); state.s3 ^= rotate(state.s2 + state.s1, 13U); state.s0 ^= rotate(state.s3 + state.s2, 18U); \
+	state.s6 ^= rotate(state.s5 + state.s4, 7U); state.s7 ^= rotate(state.s6 + state.s5, 9U); state.s4 ^= rotate(state.s7 + state.s6, 13U); state.s5 ^= rotate(state.s4 + state.s7, 18U); \
+	state.sb ^= rotate(state.sa + state.s9, 7U); state.s8 ^= rotate(state.sb + state.sa, 9U); state.s9 ^= rotate(state.s8 + state.sb, 13U); state.sa ^= rotate(state.s9 + state.s8, 18U); \
+	state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \
+} while(0)
+*/
+uint16 xor_salsa8(uint16 Bx)
+{
+uint t;
+	uint16 st = Bx;
+	SALSA_CORE(st);
+	SALSA_CORE(st);
+	SALSA_CORE(st);
+	SALSA_CORE(st);
+	return(st + Bx);
+}
+
+
+
+#define SHR(x, n)    ((x) >> n)
+#define SWAP32(a)    (as_uint(as_uchar4(a).wzyx))
+
+#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^  SHR(x, 3))
+#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^  SHR(x, 10))
+
+#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
+#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
+
+#define P(a,b,c,d,e,f,g,h,x,K)                  \
+{                                               \
+    temp1 = h + S3(e) + F1(e,f,g) + (K + x);      \
+    d += temp1; h = temp1 + S2(a) + F0(a,b,c);  \
+}
+
+#define PLAST(a,b,c,d,e,f,g,h,x,K)                  \
+{                                               \
+    d += h + S3(e) + F1(e,f,g) + (x + K);              \
+}
+
+#define F0(y, x, z) bitselect(z, y, z ^ x)
+#define F1(x, y, z) bitselect(z, y, x)
+
+#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
+#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
+#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
+#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
+#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
+#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
+#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
+#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
+#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
+#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
+#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
+#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
+#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
+#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
+#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
+#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
+
+#define RD14 (S1(W12) + W7 + S0(W15) + W14)
+#define RD15 (S1(W13) + W8 + S0(W0) + W15)
+
+inline uint8 sha256_round1(uint16 data)
+{
+	uint temp1;
+    uint8 res;
+	uint W0 = SWAP32(data.s0);
+	uint W1 = SWAP32(data.s1);
+	uint W2 = SWAP32(data.s2);
+	uint W3 = SWAP32(data.s3);
+	uint W4 = SWAP32(data.s4);
+	uint W5 = SWAP32(data.s5);
+	uint W6 = SWAP32(data.s6);
+	uint W7 = SWAP32(data.s7);
+	uint W8 = SWAP32(data.s8);
+	uint W9 = SWAP32(data.s9);
+	uint W10 = SWAP32(data.sA);
+	uint W11 = SWAP32(data.sB);
+	uint W12 = SWAP32(data.sC);
+	uint W13 = SWAP32(data.sD);
+	uint W14 = SWAP32(data.sE);
+	uint W15 = SWAP32(data.sF);
+
+	uint v0 = 0x6A09E667;
+	uint v1 = 0xBB67AE85;
+	uint v2 = 0x3C6EF372;
+	uint v3 = 0xA54FF53A;
+	uint v4 = 0x510E527F;
+	uint v5 = 0x9B05688C;
+	uint v6 = 0x1F83D9AB;
+	uint v7 = 0x5BE0CD19;
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
+
+	res.s0 = v0 + 0x6A09E667;
+	res.s1 = v1 + 0xBB67AE85;
+	res.s2 = v2 + 0x3C6EF372;
+	res.s3 = v3 + 0xA54FF53A;
+	res.s4 = v4 + 0x510E527F;
+	res.s5 = v5 + 0x9B05688C;
+	res.s6 = v6 + 0x1F83D9AB;
+	res.s7 = v7 + 0x5BE0CD19;
+	return (res);
+}
+
+
+inline uint8 sha256_round2(uint16 data,uint8 buf)
+{
+	uint temp1;
+	uint8 res;
+	uint W0 = data.s0;
+	uint W1 = data.s1;
+	uint W2 = data.s2;
+	uint W3 = data.s3;
+	uint W4 = data.s4;
+	uint W5 = data.s5;
+	uint W6 = data.s6;
+	uint W7 = data.s7;
+	uint W8 = data.s8;
+	uint W9 = data.s9;
+	uint W10 = data.sA;
+	uint W11 = data.sB;
+	uint W12 = data.sC;
+	uint W13 = data.sD;
+	uint W14 = data.sE;
+	uint W15 = data.sF;
+
+	uint v0 = buf.s0;
+	uint v1 = buf.s1;
+	uint v2 = buf.s2;
+	uint v3 = buf.s3;
+	uint v4 = buf.s4;
+	uint v5 = buf.s5;
+	uint v6 = buf.s6;
+	uint v7 = buf.s7;
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
+
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
+	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
+	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
+	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
+	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
+	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
+	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
+	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
+	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
+
+	res.s0 = SWAP32(v0 + buf.s0);
+	res.s1 = SWAP32(v1 + buf.s1);
+	res.s2 = SWAP32(v2 + buf.s2);
+	res.s3 = SWAP32(v3 + buf.s3);
+	res.s4 = SWAP32(v4 + buf.s4);
+	res.s5 = SWAP32(v5 + buf.s5);
+	res.s6 = SWAP32(v6 + buf.s6);
+	res.s7 = SWAP32(v7 + buf.s7);
+	return (res);
+}
+
+inline uint8 sha256_80(uint* data,uint nonce)
+{
+
+uint8 buf = sha256_round1( ((uint16*)data)[0]);
+uint in[16];
+for (int i = 0; i<3; i++) { in[i] = SWAP32(data[i + 16]); }
+in[3] = SWAP32(nonce);
+in[4] = 0x80000000;
+in[15] = 0x280;
+for (int i = 5; i<15; i++) { in[i] = 0; }
+
+return(sha256_round2(((uint16*)in)[0], buf));
+}
+
+inline uint8 sha256_64(uint* data)
+{
+	
+uint8 buf=sha256_round1(((uint16*)data)[0]);
+uint in[16];
+for (int i = 1; i<15; i++) { in[i] = 0; }
+in[0] = 0x80000000;
+in[15] = 0x200;
+
+	return(sha256_round2(((uint16*)in)[0],buf));
+}
+
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target)
+{
+
+	__global uchar *hashbuffer = (__global uchar *)(padcache + (1024*128 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
+
+    uint  data[20];
+
+	((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
+	((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
+     
+        ((__global uint8*)hashbuffer)[0] = sha256_80(data,get_global_id(0));
+		((__global uint8*)hashbuffer)[1] = 0;
+     		
+	for (int i = 2; i < 4096 - 1; i++)
+	{
+		uint randmax = i * 32 - 4;
+		uint randseed[16];
+		uint randbuffer[16];
+		uint joint[16];
+        
+		((uint8*)randseed)[0] = ((__global uint8*)hashbuffer)[i - 2];
+		((uint8*)randseed)[1] = ((__global uint8*)hashbuffer)[i - 1];
+        
+		if (i>4)
+		{
+
+			((uint8*)randseed)[0] ^= ((__global uint8*)hashbuffer)[i - 4];
+			((uint8*)randseed)[1] ^= ((__global uint8*)hashbuffer)[i - 3];
+		}
+
+		((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+		
+
+       ((uint8*)joint)[0] = ((__global uint8*)hashbuffer)[i - 1];
+		for (int j = 0; j < 8; j++)
+		{
+			uint rand = randbuffer[j] % (randmax - 32);
+
+	     ((uchar4*)joint)[(j + 8)].x =((__global uchar*)(hashbuffer))[0+rand];
+	     ((uchar4*)joint)[(j + 8)].y =((__global uchar*)(hashbuffer))[1+rand];
+	     ((uchar4*)joint)[(j + 8)].z =((__global uchar*)(hashbuffer))[2+rand];
+	     ((uchar4*)joint)[(j + 8)].w =((__global uchar*)(hashbuffer))[3+rand];
+}
+		((__global uint8*)(hashbuffer))[i] = sha256_64(joint);
+
+
+
+		(( uint8*)randseed)[0] = ((__global uint8*)(hashbuffer))[i - 1];
+		(( uint8*)randseed)[1] = ((__global uint8*)(hashbuffer))[i];
+
+
+		if (i>4)
+		{
+
+			((uint8*)randseed)[0] ^= ((__global uint8*)(hashbuffer))[i - 4];
+			((uint8*)randseed)[1] ^= ((__global uint8*)(hashbuffer))[i - 3];
+
+		}
+
+		((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+		for (int j = 0; j < 32; j += 2)
+		{
+			uint rand = randbuffer[j / 2] % randmax;
+            uchar4 Tohere;
+
+			Tohere.x =  ((__global uchar*)(hashbuffer))[randmax + j];
+			Tohere.y =  ((__global uchar*)(hashbuffer))[randmax + j + 1];
+			Tohere.z =  ((__global uchar*)(hashbuffer))[randmax + j + 2];
+			Tohere.w =  ((__global uchar*)(hashbuffer))[randmax + j + 3];
+			((__global uchar*)(hashbuffer))[rand] =  Tohere.x;
+			((__global uchar*)(hashbuffer))[rand+1] =  Tohere.y;
+			((__global uchar*)(hashbuffer))[rand+2] =  Tohere.z;
+			((__global uchar*)(hashbuffer))[rand+3] =  Tohere.w;
+
+		}
+
+	} // main loop
+
+	
+	if( ((__global uint *)hashbuffer)[7] <= (target)) {output[atomic_inc(output + 0xFF)] = SWAP32(get_global_id(0));
+//printf("gpu hashbuffer %08x nonce %08x\n",((__global uint *)hashbuffer)[7] ,SWAP32(get_global_id(0)));
+}
+
+
+
+/////////////////////////////////////////////////////////////////
+
+}
--- a/ocl.c
+++ b/ocl.c
@ -35,6 +35,7 @@
				@@ -35,6 +35,7 @@
 #include "ocl/build_kernel.h"
 #include "ocl/binary_kernel.h"
 #include "algorithm/neoscrypt.h"
+#include "algorithm/pluck.h"

 /* FIXME: only here for global config vars, replace with configuration.h
 * or similar as soon as config is in a struct instead of littered all
@ -333,7 +334,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -333,7 +334,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
  if (!cgpu->opt_lg) {
    applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
    cgpu->lookup_gap = 2;
-  } else
+  }
+  else
    cgpu->lookup_gap = cgpu->opt_lg;

  if ((strcmp(cgpu->algorithm.name, "zuikkis") == 0) && (cgpu->lookup_gap != 2)) {
@ -429,7 +431,94 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -429,7 +431,94 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg

    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));

-  } else if (!cgpu->opt_tc) {
+  }
+
+  /////////////////////////////////// pluck 
+  // neoscrypt TC
+  else if (!safe_cmp(cgpu->algorithm.name, "pluck") && !cgpu->opt_tc) {
+    size_t glob_thread_count;
+    long max_int;
+    unsigned char type = 0;
+
+    // determine which intensity type to use
+    if (cgpu->rawintensity > 0) {
+      glob_thread_count = cgpu->rawintensity;
+      max_int = glob_thread_count;
+      type = 2;
+    }
+    else if (cgpu->xintensity > 0) {
+      glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
+      max_int = cgpu->xintensity;
+      type = 1;
+    }
+    else {
+      glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
+      max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
+    }
+
+    glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
+
+    // if TC * scratchbuf size is too big for memory... reduce to max
+    if ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
+
+      /* Selected intensity will not run on this GPU. Not enough memory.
+      * Adapt the memory setting. */
+      // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
+      switch (type) {
+        //raw intensity
+      case 2:
+        while ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
+          --glob_thread_count;
+        }
+
+        max_int = glob_thread_count;
+        cgpu->rawintensity = glob_thread_count;
+        break;
+
+        //x intensity
+      case 1:
+        glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE;
+        max_int = glob_thread_count / clState->compute_shaders;
+
+        while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
+          --max_int;
+        }
+
+        /* Check if max_intensity is >0. */
+        if (max_int < MIN_XINTENSITY) {
+          applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
+          max_int = MIN_XINTENSITY;
+        }
+
+        cgpu->xintensity = max_int;
+        glob_thread_count = clState->compute_shaders * (1UL << max_int);
+        break;
+
+      default:
+        glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE;
+        while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
+          --max_int;
+        }
+
+        /* Check if max_intensity is >0. */
+        if (max_int < MIN_INTENSITY) {
+          applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
+          max_int = MIN_INTENSITY;
+        }
+
+        cgpu->intensity = max_int;
+        glob_thread_count = 1UL << max_int;
+        break;
+      }
+    }
+
+    // TC is glob thread count
+    cgpu->thread_concurrency = glob_thread_count;
+
+    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
+
+  }
+  else if (!cgpu->opt_tc) {
    unsigned int sixtyfours;

    sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n / 1024) - 1;
@ -442,7 +531,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -442,7 +531,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      }
    }
    applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
-  } else {
+  }
+  else {
    cgpu->thread_concurrency = cgpu->opt_tc;
  }

@ -495,7 +585,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -495,7 +585,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
        clReleaseProgram(clState->program);
        clState->program = load_opencl_binary_kernel(build_data);
      }
-    } else {
+    }
+    else {
      if (build_data->patch_bfi)
        quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch");
    }
@ -546,12 +637,25 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -546,12 +637,25 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg

      applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
      // scrypt/n-scrypt
-    } else {
+    }
+    else if (!safe_cmp(algorithm->name, "pluck")) {
+      /* The scratch/pad-buffer needs 32kBytes memory per thread. */
+      bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
+
+      /* This is the input buffer. For pluck this is guaranteed to be
+      * 80 bytes only. */
+      readbufsize = 80;
+
+      applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
+      // scrypt/n-scrypt
+    }
+    else {
      size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0));
      bufsize = 128 * ipt * cgpu->thread_concurrency;
      applog(LOG_DEBUG, "Scrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
    }
-  } else {
+  }
+  else {
    bufsize = (size_t)algorithm->rw_buffer_size;
    applog(LOG_DEBUG, "Buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
  }
--- a/sgminer.c
+++ b/sgminer.c
@ -7080,7 +7080,7 @@ bool test_nonce(struct work *work, uint32_t nonce)
				@@ -7080,7 +7080,7 @@ bool test_nonce(struct work *work, uint32_t nonce)
  rebuild_nonce(work, nonce);

  // for Neoscrypt, the diff1targ value is in work->target
-  if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
+  if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")) {
    diff1targ = ((uint32_t *)work->target)[7];
  }
  else {
--- a/winbuild/sgminer.vcxproj
+++ b/winbuild/sgminer.vcxproj
@ -266,6 +266,7 @@
				@@ -266,6 +266,7 @@
    <ClCompile Include="..\algorithm\lyra2.c" />
    <ClCompile Include="..\algorithm\lyra2re.c" />
    <ClCompile Include="..\algorithm\neoscrypt.c" />
+    <ClCompile Include="..\algorithm\pluck.c" />
    <ClCompile Include="..\algorithm\sponge.c" />
    <ClCompile Include="..\algorithm\talkcoin.c" />
    <ClCompile Include="..\algorithm\whirlpoolx.c" />
@ -331,6 +332,7 @@
				@@ -331,6 +332,7 @@
    <ClInclude Include="..\algorithm\lyra2.h" />
    <ClInclude Include="..\algorithm\lyra2re.h" />
    <ClInclude Include="..\algorithm\neoscrypt.h" />
+    <ClInclude Include="..\algorithm\pluck.h" />
    <ClInclude Include="..\algorithm\sponge.h" />
    <ClInclude Include="..\algorithm\talkcoin.h" />
    <ClInclude Include="..\algorithm\whirlpoolx.h" />
--- a/winbuild/sgminer.vcxproj.filters
+++ b/winbuild/sgminer.vcxproj.filters
@ -218,6 +218,9 @@
				@@ -218,6 +218,9 @@
    <ClCompile Include="..\algorithm\sponge.c">
      <Filter>Source Files\algorithm</Filter>
    </ClCompile>
+    <ClCompile Include="..\algorithm\pluck.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\adl.h">
@ -415,6 +418,9 @@
				@@ -415,6 +418,9 @@
    <ClInclude Include="..\algorithm\sponge.h">
      <Filter>Header Files\algorithm</Filter>
    </ClInclude>
+    <ClInclude Include="..\algorithm\pluck.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="README.txt" />