pluck algo

10 years ago · cdae391248
8 changed files with 1092 additions and 6 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -68,6 +68,7 @@ sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
 sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h
 sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
 sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
 sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
 sgminer_SOURCES += algorithm/Lyra2RE.c algorithm/Lyra2RE.h algorithm/Lyra2.c algorithm/Lyra2.h algorithm/Sponge.c algorithm/Sponge.h
 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl
--- a/algorithm.c
+++ b/algorithm.c
@ -32,6 +32,7 @@
 #include "algorithm/whirlcoin.h"
 #include "algorithm/neoscrypt.h"
 #include "algorithm/Lyra2RE.h"
 #include "algorithm/pluck.h"
 #include "compat.h"
@ -54,7 +55,8 @@ const char *algorithm_type_str[] = {
  "Fresh",
  "Whirlcoin",
  "Neoscrypt",
-  "Lyra2RE"
+  "Lyra2RE",
  "pluck"
 };
 void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
@ -180,6 +182,29 @@ static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __mayb
  return status;
 }
 static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
 	cl_kernel *kernel = &clState->kernel;
 	unsigned int num = 0;
 	cl_uint le_target;
 	cl_int status = 0;
 //	le_target = (*(cl_uint *)(blk->work->device_target + 28));
 	le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
 //	memcpy(clState->cldata, blk->work->data, 80);
 	flip80(clState->cldata, blk->work->data);
 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
 	CL_SET_ARG(clState->CLbuffer0);
 	CL_SET_ARG(clState->outputBuffer);
 	CL_SET_ARG(clState->padbuffer8);
 	CL_SET_ARG(le_target);
 	return status;
 }
 static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel = &clState->kernel;
@ -717,6 +742,12 @@ static algorithm_settings_t algos[] = {
  A_NEOSCRYPT("neoscrypt"),
 #undef A_NEOSCRYPT
 #define A_PLUCK(a) \
            { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options}
  A_PLUCK("pluck"),
 #undef A_PLUCK
  // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
 #define A_QUARK(a, b) \
    { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options}
@ -753,7 +784,7 @@ static algorithm_settings_t algos[] = {
  { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL},
-  { "Lyra2RE", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4,2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
+  { "Lyra2RE", ALGO_LYRA2RE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4,2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
  // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
 #define A_FUGUE(a, b, c) \
--- a/algorithm.h
+++ b/algorithm.h
@ -26,7 +26,8 @@ typedef enum {
  ALGO_FRESH,
  ALGO_WHIRL,
  ALGO_NEOSCRYPT,
-  ALGO_LYRA2RE
+  ALGO_LYRA2RE,
  ALGO_PLUCK
 } algorithm_type_t;
 extern const char *algorithm_type_str[];
--- a/algorithm/pluck.c
+++ b/algorithm/pluck.c
@ -0,0 +1,482 @@
 /*-
 * Copyright 2014 James Lovejoy
 * Copyright 2014 phm
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 #include "config.h"
 #include "miner.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 static const uint32_t sha256_h[8] = {
 	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
 	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
 };
 static const uint32_t sha256_k[64] = {
 	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
 	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
 	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
 	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
 	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
 	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
 	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
 	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
 	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
 	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
 	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
 	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
 	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
 	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 };
 void sha256_init(uint32_t *state)
 {
 	memcpy(state, sha256_h, 32);
 }
 /* Elementary functions used by SHA256 */
 #define Ch(x, y, z)     ((x & (y ^ z)) ^ z)
 #define Maj(x, y, z)    ((x & (y | z)) | (y & z))
 #define ROTR(x, n)      ((x >> n) | (x << (32 - n)))
 #define S0(x)           (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
 #define S1(x)           (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
 #define s0(x)           (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
 #define s1(x)           (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
 /* SHA256 round function */
 #define RND(a, b, c, d, e, f, g, h, k) \
 	do { \
 		t0 = h + S1(e) + Ch(e, f, g) + k; \
 		t1 = S0(a) + Maj(a, b, c); \
 		d += t0; \
 		h  = t0 + t1; \
 		} while (0)
 /* Adjusted round function for rotating state */
 #define RNDr(S, W, i) \
 	RND(S[(64 - i) % 8], S[(65 - i) % 8], \
 	    S[(66 - i) % 8], S[(67 - i) % 8], \
 	    S[(68 - i) % 8], S[(69 - i) % 8], \
 	    S[(70 - i) % 8], S[(71 - i) % 8], \
 	    W[i] + sha256_k[i])
 /*
 * SHA256 block compression function.  The 256-bit state is transformed via
 * the 512-bit input block to produce a new state.
 */
 void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
 {
 	uint32_t W[64];
 	uint32_t S[8];
 	uint32_t t0, t1;
 	int i;
 	/* 1. Prepare message schedule W. */
 	if (swap) {
 		for (i = 0; i < 16; i++)
 			W[i] = swab32(block[i]);
 	}
 	else
 		memcpy(W, block, 64);
 	for (i = 16; i < 64; i += 2) {
 		W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
 		W[i + 1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
 	}
 	/* 2. Initialize working variables. */
 	memcpy(S, state, 32);
 	/* 3. Mix. */
 	RNDr(S, W, 0);
 	RNDr(S, W, 1);
 	RNDr(S, W, 2);
 	RNDr(S, W, 3);
 	RNDr(S, W, 4);
 	RNDr(S, W, 5);
 	RNDr(S, W, 6);
 	RNDr(S, W, 7);
 	RNDr(S, W, 8);
 	RNDr(S, W, 9);
 	RNDr(S, W, 10);
 	RNDr(S, W, 11);
 	RNDr(S, W, 12);
 	RNDr(S, W, 13);
 	RNDr(S, W, 14);
 	RNDr(S, W, 15);
 	RNDr(S, W, 16);
 	RNDr(S, W, 17);
 	RNDr(S, W, 18);
 	RNDr(S, W, 19);
 	RNDr(S, W, 20);
 	RNDr(S, W, 21);
 	RNDr(S, W, 22);
 	RNDr(S, W, 23);
 	RNDr(S, W, 24);
 	RNDr(S, W, 25);
 	RNDr(S, W, 26);
 	RNDr(S, W, 27);
 	RNDr(S, W, 28);
 	RNDr(S, W, 29);
 	RNDr(S, W, 30);
 	RNDr(S, W, 31);
 	RNDr(S, W, 32);
 	RNDr(S, W, 33);
 	RNDr(S, W, 34);
 	RNDr(S, W, 35);
 	RNDr(S, W, 36);
 	RNDr(S, W, 37);
 	RNDr(S, W, 38);
 	RNDr(S, W, 39);
 	RNDr(S, W, 40);
 	RNDr(S, W, 41);
 	RNDr(S, W, 42);
 	RNDr(S, W, 43);
 	RNDr(S, W, 44);
 	RNDr(S, W, 45);
 	RNDr(S, W, 46);
 	RNDr(S, W, 47);
 	RNDr(S, W, 48);
 	RNDr(S, W, 49);
 	RNDr(S, W, 50);
 	RNDr(S, W, 51);
 	RNDr(S, W, 52);
 	RNDr(S, W, 53);
 	RNDr(S, W, 54);
 	RNDr(S, W, 55);
 	RNDr(S, W, 56);
 	RNDr(S, W, 57);
 	RNDr(S, W, 58);
 	RNDr(S, W, 59);
 	RNDr(S, W, 60);
 	RNDr(S, W, 61);
 	RNDr(S, W, 62);
 	RNDr(S, W, 63);
 	/* 4. Mix local working variables into global state */
 	for (i = 0; i < 8; i++)
 		state[i] += S[i];
 }
 /*
 * Encode a length len/4 vector of (uint32_t) into a length len vector of
 * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
 */
 static inline void
 be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 {
 	uint32_t i;
 	for (i = 0; i < len; i++)
 		dst[i] = htobe32(src[i]);
 }
 static inline void be32enc(void *pp, uint32_t x)
 {
 	uint8_t *p = (uint8_t *)pp;
 	p[3] = x & 0xff;
 	p[2] = (x >> 8) & 0xff;
 	p[1] = (x >> 16) & 0xff;
 	p[0] = (x >> 24) & 0xff;
 }
 static inline uint32_t be32dec(const void *pp)
 {
 	const uint8_t *p = (uint8_t const *)pp;
 	return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
 		((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
 }
 #define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
 //note, this is 64 bytes
 static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
 {
 #define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
 	uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
 	int i;
 	x00 = (B[0] ^= Bx[0]);
 	x01 = (B[1] ^= Bx[1]);
 	x02 = (B[2] ^= Bx[2]);
 	x03 = (B[3] ^= Bx[3]);
 	x04 = (B[4] ^= Bx[4]);
 	x05 = (B[5] ^= Bx[5]);
 	x06 = (B[6] ^= Bx[6]);
 	x07 = (B[7] ^= Bx[7]);
 	x08 = (B[8] ^= Bx[8]);
 	x09 = (B[9] ^= Bx[9]);
 	x10 = (B[10] ^= Bx[10]);
 	x11 = (B[11] ^= Bx[11]);
 	x12 = (B[12] ^= Bx[12]);
 	x13 = (B[13] ^= Bx[13]);
 	x14 = (B[14] ^= Bx[14]);
 	x15 = (B[15] ^= Bx[15]);
 	for (i = 0; i < 8; i += 2) {
 		/* Operate on columns. */
 		x04 ^= ROTL(x00 + x12, 7);  x09 ^= ROTL(x05 + x01, 7);
 		x14 ^= ROTL(x10 + x06, 7);  x03 ^= ROTL(x15 + x11, 7);
 		x08 ^= ROTL(x04 + x00, 9);  x13 ^= ROTL(x09 + x05, 9);
 		x02 ^= ROTL(x14 + x10, 9);  x07 ^= ROTL(x03 + x15, 9);
 		x12 ^= ROTL(x08 + x04, 13);  x01 ^= ROTL(x13 + x09, 13);
 		x06 ^= ROTL(x02 + x14, 13);  x11 ^= ROTL(x07 + x03, 13);
 		x00 ^= ROTL(x12 + x08, 18);  x05 ^= ROTL(x01 + x13, 18);
 		x10 ^= ROTL(x06 + x02, 18);  x15 ^= ROTL(x11 + x07, 18);
 		/* Operate on rows. */
 		x01 ^= ROTL(x00 + x03, 7);  x06 ^= ROTL(x05 + x04, 7);
 		x11 ^= ROTL(x10 + x09, 7);  x12 ^= ROTL(x15 + x14, 7);
 		x02 ^= ROTL(x01 + x00, 9);  x07 ^= ROTL(x06 + x05, 9);
 		x08 ^= ROTL(x11 + x10, 9);  x13 ^= ROTL(x12 + x15, 9);
 		x03 ^= ROTL(x02 + x01, 13);  x04 ^= ROTL(x07 + x06, 13);
 		x09 ^= ROTL(x08 + x11, 13);  x14 ^= ROTL(x13 + x12, 13);
 		x00 ^= ROTL(x03 + x02, 18);  x05 ^= ROTL(x04 + x07, 18);
 		x10 ^= ROTL(x09 + x08, 18);  x15 ^= ROTL(x14 + x13, 18);
 	}
 	B[0] += x00;
 	B[1] += x01;
 	B[2] += x02;
 	B[3] += x03;
 	B[4] += x04;
 	B[5] += x05;
 	B[6] += x06;
 	B[7] += x07;
 	B[8] += x08;
 	B[9] += x09;
 	B[10] += x10;
 	B[11] += x11;
 	B[12] += x12;
 	B[13] += x13;
 	B[14] += x14;
 	B[15] += x15;
 #undef ROTL
 }
 void sha256_hash(unsigned char *hash, const unsigned char *data, int len)
 {
 	uint32_t S[16], T[16];
 	int i, r;
 	sha256_init(S);
 	for (r = len; r > -9; r -= 64) {
 		if (r < 64)
 			memset(T, 0, 64);
 		memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
 		if (r >= 0 && r < 64)
 			((unsigned char *)T)[r] = 0x80;
 		for (i = 0; i < 16; i++)
 			T[i] = be32dec(T + i);
 		if (r < 56)
 			T[15] = 8 * len;
 		sha256_transform(S, T, 0);
 	}
 	for (i = 0; i < 8; i++)
 		be32enc((uint32_t *)hash + i, S[i]);
 }
 void sha256_hash512(unsigned char *hash, const unsigned char *data)
 {
 	uint32_t S[16], T[16];
 	int i;
 	sha256_init(S);
 	memcpy(T, data, 64);
 	for (i = 0; i < 16; i++)
 		T[i] = be32dec(T + i);
 	sha256_transform(S, T, 0);
 	memset(T, 0, 64);
 	//memcpy(T, data + 64, 0);
 	((unsigned char *)T)[0] = 0x80;
 	for (i = 0; i < 16; i++)
 		T[i] = be32dec(T + i);
 	T[15] = 8 * 64;
 	sha256_transform(S, T, 0);
 	for (i = 0; i < 8; i++)
 		be32enc((uint32_t *)hash + i, S[i]);
 }
 inline void pluckrehash(void *state, const void *input)
 {
 	int i,j;
 	uint32_t data[20];
 	const int HASH_MEMORY = 128 * 1024;
 	uint8_t * scratchbuf = (uint8_t*)malloc(HASH_MEMORY);
 	memcpy(data,input,80);
 	uint8_t hashbuffer[128*1024]; //don't allocate this on stack, since it's huge.. 
 	int size = HASH_MEMORY;
 	memset(hashbuffer, 0, 64);
 	sha256_hash(&hashbuffer[0], (uint8_t*)data, 80);
 	for (i = 64; i < size - 32; i += 32)
 	{
 		int randmax = i - 4; //we could use size here, but then it's probable to use 0 as the value in most cases
 		uint32_t joint[16];
 		uint32_t randbuffer[16];
 		uint32_t randseed[16];
 		memcpy(randseed, &hashbuffer[i - 64], 64);
 		if (i>128)
 		{
 			memcpy(randbuffer, &hashbuffer[i - 128], 64);
 		}
 		else
 		{
 			memset(&randbuffer, 0, 64);
 		}
 		xor_salsa8(randbuffer, randseed);
 		memcpy(joint, &hashbuffer[i - 32], 32);
 		//use the last hash value as the seed
 		for (j = 32; j < 64; j += 4)
 		{
 			uint32_t rand = randbuffer[(j - 32) / 4] % (randmax - 32); 
 			joint[j / 4] = *((uint32_t*)&hashbuffer[rand]);
 		}
 		sha256_hash512(&hashbuffer[i], (uint8_t*)joint);
 		memcpy(randseed, &hashbuffer[i - 32], 64); 
 		if (i>128)
 		{
 			memcpy(randbuffer, &hashbuffer[i - 128], 64);
 		}
 		else
 		{
 			memset(randbuffer, 0, 64);
 		}
 		xor_salsa8(randbuffer, randseed);
 		for (j = 0; j < 32; j += 2)
 		{
 			uint32_t rand = randbuffer[j / 2] % randmax;
 			*((uint32_t*)&hashbuffer[rand]) = *((uint32_t*)&hashbuffer[j + i - 4]);
 		}
 	}
 	//printf("cpu hashbuffer %08x nonce %08x\n", ((uint32_t*)hashbuffer)[7],data[19]);
 	memcpy(state, hashbuffer, 32);
 }
 static const uint32_t diff1targ = 0x0000ffff;
 /* Used externally as confirmation of correct OCL code */
 int pluck_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
 {
 	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
 	uint32_t data[20], ohash[8];
 	be32enc_vect(data, (const uint32_t *)pdata, 19);
 	data[19] = htobe32(nonce);
 	pluckrehash(ohash, data);
 	tmp_hash7 = be32toh(ohash[7]);
 	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
 		(long unsigned int)Htarg,
 		(long unsigned int)diff1targ,
 		(long unsigned int)tmp_hash7);
 	if (tmp_hash7 > diff1targ)
 		return -1;
 	if (tmp_hash7 > Htarg)
 		return 0;
 	return 1;
 }
 void pluck_regenhash(struct work *work)
 {
        uint32_t data[20];
        uint32_t *nonce = (uint32_t *)(work->data + 76);
        uint32_t *ohash = (uint32_t *)(work->hash);
        be32enc_vect(data, (const uint32_t *)work->data, 19);
        data[19] = htobe32(*nonce);	
        pluckrehash(ohash, data);
 }
 bool scanhash_pluck(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
 	unsigned char *pdata, unsigned char __maybe_unused *phash1,
 	unsigned char __maybe_unused *phash, const unsigned char *ptarget,
 	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
 {
 	uint32_t *nonce = (uint32_t *)(pdata + 76);
 	uint32_t data[20];
 	uint32_t tmp_hash7;
 	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
 	bool ret = false;
 	be32enc_vect(data, (const uint32_t *)pdata, 19);
 	while (1)
 	{
 		uint32_t ostate[8];
 		*nonce = ++n;
 		data[19] = (n);
 		pluckrehash(ostate, data);
 		tmp_hash7 = (ostate[7]);
 		applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
 		if (unlikely(tmp_hash7 <= Htarg))
 		{
 			((uint32_t *)pdata)[19] = htobe32(n);
 			*last_nonce = n;
 			ret = true;
 			break;
 		}
 		if (unlikely((n >= max_nonce) || thr->work_restart))
 		{
 			*last_nonce = n;
 			break;
 		}
 	}
 	return ret;
 }
--- a/algorithm/pluck.h
+++ b/algorithm/pluck.h
@ -0,0 +1,10 @@
 #ifndef PLUCK_H
 #define PLUCK_H
 #include "miner.h"
 #define PLUCK_SCRATCHBUF_SIZE (128 * 1024)
 extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget,
 			uint32_t nonce);
 extern void pluck_regenhash(struct work *work);
 #endif /* PLUCK_H */
--- a/kernel/pluck.cl
+++ b/kernel/pluck.cl
@ -0,0 +1,463 @@
 /*
 * "pluck" kernel implementation.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2015  djm34
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @author   djm34
 */
 #if !defined(cl_khr_byte_addressable_store)
 #error "Device does not support unaligned stores"
 #endif
 #define ROL32(x, n)  rotate(x, (uint) n)
 //#define ROL32(x, n)   (((x) << (n)) | ((x) >> (32 - (n))))
 #define HASH_MEMORY 4096
 #define SALSA(a,b,c,d) do { \
    t =a+d; b^=rotate(t,  7U);    \
    t =b+a; c^=rotate(t,  9U);    \
    t =c+b; d^=rotate(t, 13U);    \
    t =d+c; a^=rotate(t, 18U);     \
 } while(0)
 #define SALSA_CORE(state) do { \
 \
 SALSA(state.s0,state.s4,state.s8,state.sc); \
 SALSA(state.s5,state.s9,state.sd,state.s1); \
 SALSA(state.sa,state.se,state.s2,state.s6); \
 SALSA(state.sf,state.s3,state.s7,state.sb); \
 SALSA(state.s0,state.s1,state.s2,state.s3); \
 SALSA(state.s5,state.s6,state.s7,state.s4); \
 SALSA(state.sa,state.sb,state.s8,state.s9); \
 SALSA(state.sf,state.sc,state.sd,state.se); \
 	} while(0)
 /*
 #define SALSA_CORE(state)	do { \
 	state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \
 	state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \
 	state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \
 	state.s3 ^= rotate(state.sf + state.sb, 7U); state.s7 ^= rotate(state.s3 + state.sf, 9U); state.sb ^= rotate(state.s7 + state.s3, 13U); state.sf ^= rotate(state.sb + state.s7, 18U); \
 	state.s1 ^= rotate(state.s0 + state.s3, 7U); state.s2 ^= rotate(state.s1 + state.s0, 9U); state.s3 ^= rotate(state.s2 + state.s1, 13U); state.s0 ^= rotate(state.s3 + state.s2, 18U); \
 	state.s6 ^= rotate(state.s5 + state.s4, 7U); state.s7 ^= rotate(state.s6 + state.s5, 9U); state.s4 ^= rotate(state.s7 + state.s6, 13U); state.s5 ^= rotate(state.s4 + state.s7, 18U); \
 	state.sb ^= rotate(state.sa + state.s9, 7U); state.s8 ^= rotate(state.sb + state.sa, 9U); state.s9 ^= rotate(state.s8 + state.sb, 13U); state.sa ^= rotate(state.s9 + state.s8, 18U); \
 	state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \
 } while(0)
 */
 uint16 xor_salsa8(uint16 Bx)
 {
 uint t;
 	uint16 st = Bx;
 	SALSA_CORE(st);
 	SALSA_CORE(st);
 	SALSA_CORE(st);
 	SALSA_CORE(st);
 	return(st + Bx);
 }
 #define SHR(x, n)    ((x) >> n)
 #define SWAP32(a)    (as_uint(as_uchar4(a).wzyx))
 #define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^  SHR(x, 3))
 #define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^  SHR(x, 10))
 #define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
 #define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
 #define P(a,b,c,d,e,f,g,h,x,K)                  \
 {                                               \
    temp1 = h + S3(e) + F1(e,f,g) + (K + x);      \
    d += temp1; h = temp1 + S2(a) + F0(a,b,c);  \
 }
 #define PLAST(a,b,c,d,e,f,g,h,x,K)                  \
 {                                               \
    d += h + S3(e) + F1(e,f,g) + (x + K);              \
 }
 #define F0(y, x, z) bitselect(z, y, z ^ x)
 #define F1(x, y, z) bitselect(z, y, x)
 #define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
 #define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
 #define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
 #define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
 #define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
 #define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
 #define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
 #define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
 #define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
 #define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
 #define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
 #define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
 #define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
 #define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
 #define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
 #define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
 #define RD14 (S1(W12) + W7 + S0(W15) + W14)
 #define RD15 (S1(W13) + W8 + S0(W0) + W15)
 inline uint8 sha256_round1(uint16 data)
 {
 	uint temp1;
    uint8 res;
 	uint W0 = SWAP32(data.s0);
 	uint W1 = SWAP32(data.s1);
 	uint W2 = SWAP32(data.s2);
 	uint W3 = SWAP32(data.s3);
 	uint W4 = SWAP32(data.s4);
 	uint W5 = SWAP32(data.s5);
 	uint W6 = SWAP32(data.s6);
 	uint W7 = SWAP32(data.s7);
 	uint W8 = SWAP32(data.s8);
 	uint W9 = SWAP32(data.s9);
 	uint W10 = SWAP32(data.sA);
 	uint W11 = SWAP32(data.sB);
 	uint W12 = SWAP32(data.sC);
 	uint W13 = SWAP32(data.sD);
 	uint W14 = SWAP32(data.sE);
 	uint W15 = SWAP32(data.sF);
 	uint v0 = 0x6A09E667;
 	uint v1 = 0xBB67AE85;
 	uint v2 = 0x3C6EF372;
 	uint v3 = 0xA54FF53A;
 	uint v4 = 0x510E527F;
 	uint v5 = 0x9B05688C;
 	uint v6 = 0x1F83D9AB;
 	uint v7 = 0x5BE0CD19;
 	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
 	res.s0 = v0 + 0x6A09E667;
 	res.s1 = v1 + 0xBB67AE85;
 	res.s2 = v2 + 0x3C6EF372;
 	res.s3 = v3 + 0xA54FF53A;
 	res.s4 = v4 + 0x510E527F;
 	res.s5 = v5 + 0x9B05688C;
 	res.s6 = v6 + 0x1F83D9AB;
 	res.s7 = v7 + 0x5BE0CD19;
 	return (res);
 }
 inline uint8 sha256_round2(uint16 data,uint8 buf)
 {
 	uint temp1;
 	uint8 res;
 	uint W0 = data.s0;
 	uint W1 = data.s1;
 	uint W2 = data.s2;
 	uint W3 = data.s3;
 	uint W4 = data.s4;
 	uint W5 = data.s5;
 	uint W6 = data.s6;
 	uint W7 = data.s7;
 	uint W8 = data.s8;
 	uint W9 = data.s9;
 	uint W10 = data.sA;
 	uint W11 = data.sB;
 	uint W12 = data.sC;
 	uint W13 = data.sD;
 	uint W14 = data.sE;
 	uint W15 = data.sF;
 	uint v0 = buf.s0;
 	uint v1 = buf.s1;
 	uint v2 = buf.s2;
 	uint v3 = buf.s3;
 	uint v4 = buf.s4;
 	uint v5 = buf.s5;
 	uint v6 = buf.s6;
 	uint v7 = buf.s7;
 	P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
 	P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
 	P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
 	P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
 	P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
 	P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
 	P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
 	P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
 	P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
 	res.s0 = SWAP32(v0 + buf.s0);
 	res.s1 = SWAP32(v1 + buf.s1);
 	res.s2 = SWAP32(v2 + buf.s2);
 	res.s3 = SWAP32(v3 + buf.s3);
 	res.s4 = SWAP32(v4 + buf.s4);
 	res.s5 = SWAP32(v5 + buf.s5);
 	res.s6 = SWAP32(v6 + buf.s6);
 	res.s7 = SWAP32(v7 + buf.s7);
 	return (res);
 }
 inline uint8 sha256_80(uint* data,uint nonce)
 {
 uint8 buf = sha256_round1( ((uint16*)data)[0]);
 uint in[16];
 for (int i = 0; i<3; i++) { in[i] = SWAP32(data[i + 16]); }
 in[3] = SWAP32(nonce);
 in[4] = 0x80000000;
 in[15] = 0x280;
 for (int i = 5; i<15; i++) { in[i] = 0; }
 return(sha256_round2(((uint16*)in)[0], buf));
 }
 inline uint8 sha256_64(uint* data)
 {
 uint8 buf=sha256_round1(((uint16*)data)[0]);
 uint in[16];
 for (int i = 1; i<15; i++) { in[i] = 0; }
 in[0] = 0x80000000;
 in[15] = 0x200;
 	return(sha256_round2(((uint16*)in)[0],buf));
 }
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target)
 {
 	__global uchar *hashbuffer = (__global uchar *)(padcache + (1024*128 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
    uint  data[20];
 	((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
 	((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
        ((__global uint8*)hashbuffer)[0] = sha256_80(data,get_global_id(0));
 		((__global uint8*)hashbuffer)[1] = 0;
 	for (int i = 2; i < 4096 - 1; i++)
 	{
 		uint randmax = i * 32 - 4;
 		uint randseed[16];
 		uint randbuffer[16];
 		uint joint[16];
 		((uint8*)randseed)[0] = ((__global uint8*)hashbuffer)[i - 2];
 		((uint8*)randseed)[1] = ((__global uint8*)hashbuffer)[i - 1];
 		if (i>4)
 		{
 			((uint8*)randseed)[0] ^= ((__global uint8*)hashbuffer)[i - 4];
 			((uint8*)randseed)[1] ^= ((__global uint8*)hashbuffer)[i - 3];
 		}
 		((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
       ((uint8*)joint)[0] = ((__global uint8*)hashbuffer)[i - 1];
 		for (int j = 0; j < 8; j++)
 		{
 			uint rand = randbuffer[j] % (randmax - 32);
 	     ((uchar4*)joint)[(j + 8)].x =((__global uchar*)(hashbuffer))[0+rand];
 	     ((uchar4*)joint)[(j + 8)].y =((__global uchar*)(hashbuffer))[1+rand];
 	     ((uchar4*)joint)[(j + 8)].z =((__global uchar*)(hashbuffer))[2+rand];
 	     ((uchar4*)joint)[(j + 8)].w =((__global uchar*)(hashbuffer))[3+rand];
 }
 		((__global uint8*)(hashbuffer))[i] = sha256_64(joint);
 		(( uint8*)randseed)[0] = ((__global uint8*)(hashbuffer))[i - 1];
 		(( uint8*)randseed)[1] = ((__global uint8*)(hashbuffer))[i];
 		if (i>4)
 		{
 			((uint8*)randseed)[0] ^= ((__global uint8*)(hashbuffer))[i - 4];
 			((uint8*)randseed)[1] ^= ((__global uint8*)(hashbuffer))[i - 3];
 		}
 		((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
 		for (int j = 0; j < 32; j += 2)
 		{
 			uint rand = randbuffer[j / 2] % randmax;
            uchar4 Tohere;
 			Tohere.x =  ((__global uchar*)(hashbuffer))[randmax + j];
 			Tohere.y =  ((__global uchar*)(hashbuffer))[randmax + j + 1];
 			Tohere.z =  ((__global uchar*)(hashbuffer))[randmax + j + 2];
 			Tohere.w =  ((__global uchar*)(hashbuffer))[randmax + j + 3];
 			((__global uchar*)(hashbuffer))[rand] =  Tohere.x;
 			((__global uchar*)(hashbuffer))[rand+1] =  Tohere.y;
 			((__global uchar*)(hashbuffer))[rand+2] =  Tohere.z;
 			((__global uchar*)(hashbuffer))[rand+3] =  Tohere.w;
 		}
 	} // main loop
 	if( ((__global uint *)hashbuffer)[7] <= (target)) {output[atomic_inc(output + 0xFF)] = SWAP32(get_global_id(0));
 //printf("gpu hashbuffer %08x nonce %08x\n",((__global uint *)hashbuffer)[7] ,SWAP32(get_global_id(0)));
 }
 /////////////////////////////////////////////////////////////////
 }
--- a/ocl.c
+++ b/ocl.c
@ -35,6 +35,7 @@
 #include "ocl/build_kernel.h"
 #include "ocl/binary_kernel.h"
 #include "algorithm/neoscrypt.h"
 #include "algorithm/pluck.h"
 /* FIXME: only here for global config vars, replace with configuration.h
 * or similar as soon as config is in a struct instead of littered all
@ -429,7 +430,92 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
-  } else if (!cgpu->opt_tc) {
+  } 
 /////////////////////////////////// pluck 
  // neoscrypt TC
  else if (!safe_cmp(cgpu->algorithm.name, "pluck") && !cgpu->opt_tc) {
 	  size_t glob_thread_count;
 	  long max_int;
 	  unsigned char type = 0;
 	  // determine which intensity type to use
 	  if (cgpu->rawintensity > 0) {
 		  glob_thread_count = cgpu->rawintensity;
 		  max_int = glob_thread_count;
 		  type = 2;
 	  }
 	  else if (cgpu->xintensity > 0) {
 		  glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
 		  max_int = cgpu->xintensity;
 		  type = 1;
 	  }
 	  else {
 		  glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
 		  max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
 	  }
 	  glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
 	  // if TC * scratchbuf size is too big for memory... reduce to max
 	  if ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
 		  /* Selected intensity will not run on this GPU. Not enough memory.
 		  * Adapt the memory setting. */
 		  // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
 		  switch (type) {
 			  //raw intensity
 		  case 2:
 			  while ((glob_thread_count * PLUCK_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
 				  --glob_thread_count;
 			  }
 			  max_int = glob_thread_count;
 			  cgpu->rawintensity = glob_thread_count;
 			  break;
 			  //x intensity
 		  case 1:
 			  glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE;
 			  max_int = glob_thread_count / clState->compute_shaders;
 			  while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
 				  --max_int;
 			  }
 			  /* Check if max_intensity is >0. */
 			  if (max_int < MIN_XINTENSITY) {
 				  applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
 				  max_int = MIN_XINTENSITY;
 			  }
 			  cgpu->xintensity = max_int;
 			  glob_thread_count = clState->compute_shaders * (1UL << max_int);
 			  break;
 		  default:
 			  glob_thread_count = cgpu->max_alloc / PLUCK_SCRATCHBUF_SIZE;
 			  while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
 				  --max_int;
 			  }
 			  /* Check if max_intensity is >0. */
 			  if (max_int < MIN_INTENSITY) {
 				  applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
 				  max_int = MIN_INTENSITY;
 			  }
 			  cgpu->intensity = max_int;
 			  glob_thread_count = 1UL << max_int;
 			  break;
 		  }
 	  }
 	  // TC is glob thread count
 	  cgpu->thread_concurrency = glob_thread_count;
 	  applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
 } else if (!cgpu->opt_tc) {
    unsigned int sixtyfours;
    sixtyfours =  cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1;
@ -546,7 +632,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
    // scrypt/n-scrypt
-    } else {
+    } 
 	else if (!safe_cmp(algorithm->name, "pluck")) {
 		/* The scratch/pad-buffer needs 32kBytes memory per thread. */
 		bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
 		/* This is the input buffer. For pluck this is guaranteed to be
 		* 80 bytes only. */
 		readbufsize = 80;
 		applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
 		// scrypt/n-scrypt
 	}
     else {
      size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0));
      bufsize = 128 * ipt * cgpu->thread_concurrency;
      applog(LOG_DEBUG, "Scrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
--- a/sgminer.c
+++ b/sgminer.c
@ -7081,7 +7081,7 @@ bool test_nonce(struct work *work, uint32_t nonce)
  rebuild_nonce(work, nonce);
  // for Neoscrypt, the diff1targ value is in work->target
-  if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
+  if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")) {
    diff1targ = ((uint32_t *)work->target)[7];
  }
  else {