ccminer-gostd-lite/pluck/pluck.cu

/* Based on djm code */

#include <stdint.h>

#include "miner.h"
#include "cuda_helper.h"

#include <openssl/sha.h>

static uint32_t *d_hash[MAX_GPUS] ;

extern void pluck_setBlockTarget(const void* data, const void *ptarget);
extern void pluck_cpu_init(int thr_id, uint32_t threads, uint32_t *d_outputHash);
extern uint32_t pluck_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, int order);

extern float tp_coef[MAX_GPUS];

#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
//note, this is 64 bytes
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
	uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
	int i;

	x00 = (B[0] ^= Bx[0]);
	x01 = (B[1] ^= Bx[1]);
	x02 = (B[2] ^= Bx[2]);
	x03 = (B[3] ^= Bx[3]);
	x04 = (B[4] ^= Bx[4]);
	x05 = (B[5] ^= Bx[5]);
	x06 = (B[6] ^= Bx[6]);
	x07 = (B[7] ^= Bx[7]);
	x08 = (B[8] ^= Bx[8]);
	x09 = (B[9] ^= Bx[9]);
	x10 = (B[10] ^= Bx[10]);
	x11 = (B[11] ^= Bx[11]);
	x12 = (B[12] ^= Bx[12]);
	x13 = (B[13] ^= Bx[13]);
	x14 = (B[14] ^= Bx[14]);
	x15 = (B[15] ^= Bx[15]);
	for (i = 0; i < 8; i += 2) {
		/* Operate on columns. */
		x04 ^= ROTL(x00 + x12, 7);  x09 ^= ROTL(x05 + x01, 7);
		x14 ^= ROTL(x10 + x06, 7);  x03 ^= ROTL(x15 + x11, 7);

		x08 ^= ROTL(x04 + x00, 9);  x13 ^= ROTL(x09 + x05, 9);
		x02 ^= ROTL(x14 + x10, 9);  x07 ^= ROTL(x03 + x15, 9);

		x12 ^= ROTL(x08 + x04, 13);  x01 ^= ROTL(x13 + x09, 13);
		x06 ^= ROTL(x02 + x14, 13);  x11 ^= ROTL(x07 + x03, 13);

		x00 ^= ROTL(x12 + x08, 18);  x05 ^= ROTL(x01 + x13, 18);
		x10 ^= ROTL(x06 + x02, 18);  x15 ^= ROTL(x11 + x07, 18);

		/* Operate on rows. */
		x01 ^= ROTL(x00 + x03, 7);  x06 ^= ROTL(x05 + x04, 7);
		x11 ^= ROTL(x10 + x09, 7);  x12 ^= ROTL(x15 + x14, 7);

		x02 ^= ROTL(x01 + x00, 9);  x07 ^= ROTL(x06 + x05, 9);
		x08 ^= ROTL(x11 + x10, 9);  x13 ^= ROTL(x12 + x15, 9);

		x03 ^= ROTL(x02 + x01, 13);  x04 ^= ROTL(x07 + x06, 13);
		x09 ^= ROTL(x08 + x11, 13);  x14 ^= ROTL(x13 + x12, 13);

		x00 ^= ROTL(x03 + x02, 18);  x05 ^= ROTL(x04 + x07, 18);
		x10 ^= ROTL(x09 + x08, 18);  x15 ^= ROTL(x14 + x13, 18);
	}
	B[0] += x00;
	B[1] += x01;
	B[2] += x02;
	B[3] += x03;
	B[4] += x04;
	B[5] += x05;
	B[6] += x06;
	B[7] += x07;
	B[8] += x08;
	B[9] += x09;
	B[10] += x10;
	B[11] += x11;
	B[12] += x12;
	B[13] += x13;
	B[14] += x14;
	B[15] += x15;
#undef ROTL
}

static void sha256_hash(uchar *hash, const uchar *data, int len)
{
	SHA256_CTX ctx;
	SHA256_Init(&ctx);
	SHA256_Update(&ctx, data, len);
	SHA256_Final(hash, &ctx);
}

// hash exactly 64 bytes (ie, sha256 block size)
static void sha256_hash512(uint32_t *hash, const uint32_t *data)
{
	uint32_t _ALIGN(64) S[16];
	uint32_t _ALIGN(64) T[16];
	uchar _ALIGN(64) E[64] = { 0 };
	int i;

	sha256_init(S);

	for (i = 0; i < 16; i++)
		T[i] = be32dec(&data[i]);
	sha256_transform(S, T, 0);

	E[3] = 0x80;
	E[61] = 0x02; // T[15] = 8 * 64 => 0x200;
	sha256_transform(S, (uint32_t*)E, 0);

	for (i = 0; i < 8; i++)
		be32enc(&hash[i], S[i]);
}

#define BLOCK_HEADER_SIZE 80
void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const int N)
{
	int size = N * 1024;
	sha256_hash(hashbuffer, (uchar*)data, BLOCK_HEADER_SIZE);
	memset(&hashbuffer[32], 0, 32);

	for (int i = 64; i < size - 32; i += 32)
	{
		uint32_t _ALIGN(64) randseed[16];
		uint32_t _ALIGN(64) randbuffer[16];
		uint32_t _ALIGN(64) joint[16];
		//i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area
		//we could use size here, but then it's probable to use 0 as the value in most cases
		int randmax = i - 4;

		//setup randbuffer to be an array of random indexes
		memcpy(randseed, &hashbuffer[i - 64], 64);

		if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
		else memset(randbuffer, 0, 64);

		xor_salsa8((uint32_t*)randbuffer, (uint32_t*)randseed);
		memcpy(joint, &hashbuffer[i - 32], 32);

		//use the last hash value as the seed
		for (int j = 32; j < 64; j += 4)
		{
			//every other time, change to next random index
			//randmax - 32 as otherwise we go beyond memory that's already been written to
			uint32_t rand = randbuffer[(j - 32) >> 2] % (randmax - 32);
			joint[j >> 2] = *((uint32_t *)&hashbuffer[rand]);
		}

		sha256_hash512((uint32_t*)&hashbuffer[i], joint);

		//setup randbuffer to be an array of random indexes
		//use last hash value and previous hash value(post-mixing)
		memcpy(randseed, &hashbuffer[i - 32], 64);

		if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);
		else memset(randbuffer, 0, 64);

		xor_salsa8((uint32_t*)randbuffer, (uint32_t*)randseed);

		//use the last hash value as the seed
		for (int j = 0; j < 32; j += 2)
		{
			uint32_t rand = randbuffer[j >> 1] % randmax;
			*((uint32_t *)(hashbuffer + rand)) = *((uint32_t *)(hashbuffer + j + randmax));
		}
	}

	memcpy(hash, hashbuffer, 32);
}

static bool init[MAX_GPUS] = { 0 };

static __thread uchar* scratchbuf = NULL;

extern "C" int scanhash_pluck(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
	uint32_t max_nonce, unsigned long *hashes_done)
{
	const uint32_t first_nonce = pdata[19];
	uint32_t endiandata[20];
	int opt_pluck_n = 128;

	int intensity = is_windows() ? 17 : 19; /* beware > 20 could work and create diff problems later */
	uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);
	// divide by 128 for this algo which require a lot of memory
	throughput = throughput / 128 - 256;
	throughput = min(throughput, max_nonce - first_nonce + 1);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x0000ff;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
		//cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
		cudaMalloc(&d_hash[thr_id], opt_pluck_n * 1024 * throughput);

		if (!scratchbuf)
			scratchbuf = (uchar*) calloc(opt_pluck_n, 1024);

		pluck_cpu_init(thr_id, throughput, d_hash[thr_id]);

		CUDA_SAFE_CALL(cudaGetLastError());
		applog(LOG_INFO, "Using %d cuda threads", throughput);

		init[thr_id] = true;
	}

	for (int k = 0; k < 20; k++)
		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);

	pluck_setBlockTarget(endiandata,ptarget);

	do {
		uint32_t foundNonce = pluck_cpu_hash(thr_id, throughput, pdata[19], 0);
		if (foundNonce != UINT32_MAX)
		{
			const uint32_t Htarg = ptarget[7];
			uint32_t vhash64[8];
			be32enc(&endiandata[19], foundNonce);
			pluckhash(vhash64, endiandata, scratchbuf, opt_pluck_n);
			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
				*hashes_done = pdata[19] - first_nonce + throughput;
				pdata[19] = foundNonce;
				return 1;
			} else {
				applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
			}
		}

		pdata[19] += throughput;

	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);

	*hashes_done = pdata[19] - first_nonce;
	return 0;
}
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`/* Based on djm code */`

pluck: finish the algo, add cpu validation 10 years ago			`#include <stdint.h>`

pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`#include "miner.h"`
pluck: finish the algo, add cpu validation 10 years ago			`#include "cuda_helper.h"`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`#include <openssl/sha.h>`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
			`static uint32_t *d_hash[MAX_GPUS] ;`

			`extern void pluck_setBlockTarget(const void* data, const void *ptarget);`
			`extern void pluck_cpu_init(int thr_id, uint32_t threads, uint32_t *d_outputHash);`
			`extern uint32_t pluck_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, int order);`

			`extern float tp_coef[MAX_GPUS];`

			`#define ROTL(a, b) (((a) << (b)) \| ((a) >> (32 - (b))))`
			`//note, this is 64 bytes`
			`static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])`
			`{`
			`#define ROTL(a, b) (((a) << (b)) \| ((a) >> (32 - (b))))`
			`uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;`
			`int i;`

			`x00 = (B[0] ^= Bx[0]);`
			`x01 = (B[1] ^= Bx[1]);`
			`x02 = (B[2] ^= Bx[2]);`
			`x03 = (B[3] ^= Bx[3]);`
			`x04 = (B[4] ^= Bx[4]);`
			`x05 = (B[5] ^= Bx[5]);`
			`x06 = (B[6] ^= Bx[6]);`
			`x07 = (B[7] ^= Bx[7]);`
			`x08 = (B[8] ^= Bx[8]);`
			`x09 = (B[9] ^= Bx[9]);`
			`x10 = (B[10] ^= Bx[10]);`
			`x11 = (B[11] ^= Bx[11]);`
			`x12 = (B[12] ^= Bx[12]);`
			`x13 = (B[13] ^= Bx[13]);`
			`x14 = (B[14] ^= Bx[14]);`
			`x15 = (B[15] ^= Bx[15]);`
			`for (i = 0; i < 8; i += 2) {`
			`/* Operate on columns. */`
			`x04 ^= ROTL(x00 + x12, 7); x09 ^= ROTL(x05 + x01, 7);`
			`x14 ^= ROTL(x10 + x06, 7); x03 ^= ROTL(x15 + x11, 7);`

			`x08 ^= ROTL(x04 + x00, 9); x13 ^= ROTL(x09 + x05, 9);`
			`x02 ^= ROTL(x14 + x10, 9); x07 ^= ROTL(x03 + x15, 9);`

			`x12 ^= ROTL(x08 + x04, 13); x01 ^= ROTL(x13 + x09, 13);`
			`x06 ^= ROTL(x02 + x14, 13); x11 ^= ROTL(x07 + x03, 13);`

			`x00 ^= ROTL(x12 + x08, 18); x05 ^= ROTL(x01 + x13, 18);`
			`x10 ^= ROTL(x06 + x02, 18); x15 ^= ROTL(x11 + x07, 18);`

			`/* Operate on rows. */`
			`x01 ^= ROTL(x00 + x03, 7); x06 ^= ROTL(x05 + x04, 7);`
			`x11 ^= ROTL(x10 + x09, 7); x12 ^= ROTL(x15 + x14, 7);`

			`x02 ^= ROTL(x01 + x00, 9); x07 ^= ROTL(x06 + x05, 9);`
			`x08 ^= ROTL(x11 + x10, 9); x13 ^= ROTL(x12 + x15, 9);`

			`x03 ^= ROTL(x02 + x01, 13); x04 ^= ROTL(x07 + x06, 13);`
			`x09 ^= ROTL(x08 + x11, 13); x14 ^= ROTL(x13 + x12, 13);`

			`x00 ^= ROTL(x03 + x02, 18); x05 ^= ROTL(x04 + x07, 18);`
			`x10 ^= ROTL(x09 + x08, 18); x15 ^= ROTL(x14 + x13, 18);`
			`}`
			`B[0] += x00;`
			`B[1] += x01;`
			`B[2] += x02;`
			`B[3] += x03;`
			`B[4] += x04;`
			`B[5] += x05;`
			`B[6] += x06;`
			`B[7] += x07;`
			`B[8] += x08;`
			`B[9] += x09;`
			`B[10] += x10;`
			`B[11] += x11;`
			`B[12] += x12;`
			`B[13] += x13;`
			`B[14] += x14;`
			`B[15] += x15;`
			`#undef ROTL`
			`}`

pluck: finish the algo, add cpu validation 10 years ago			`static void sha256_hash(uchar hash, const uchar data, int len)`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`{`
pluck: finish the algo, add cpu validation 10 years ago			`SHA256_CTX ctx;`
			`SHA256_Init(&ctx);`
			`SHA256_Update(&ctx, data, len);`
			`SHA256_Final(hash, &ctx);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`}`

pluck: finish the algo, add cpu validation 10 years ago			`// hash exactly 64 bytes (ie, sha256 block size)`
			`static void sha256_hash512(uint32_t hash, const uint32_t data)`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`{`
pluck: finish the algo, add cpu validation 10 years ago			`uint32_t _ALIGN(64) S[16];`
			`uint32_t _ALIGN(64) T[16];`
			`uchar _ALIGN(64) E[64] = { 0 };`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`int i;`

			`sha256_init(S);`

			`for (i = 0; i < 16; i++)`
pluck: finish the algo, add cpu validation 10 years ago			`T[i] = be32dec(&data[i]);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`sha256_transform(S, T, 0);`

pluck: finish the algo, add cpu validation 10 years ago			`E[3] = 0x80;`
			`E[61] = 0x02; // T[15] = 8 * 64 => 0x200;`
			`sha256_transform(S, (uint32_t*)E, 0);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
			`for (i = 0; i < 8; i++)`
pluck: finish the algo, add cpu validation 10 years ago			`be32enc(&hash[i], S[i]);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`}`

pluck: finish the algo, add cpu validation 10 years ago			`#define BLOCK_HEADER_SIZE 80`
			`void pluckhash(uint32_t hash, const uint32_t data, uchar *hashbuffer, const int N)`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`{`
pluck: finish the algo, add cpu validation 10 years ago			`int size = N * 1024;`
			`sha256_hash(hashbuffer, (uchar*)data, BLOCK_HEADER_SIZE);`
			`memset(&hashbuffer[32], 0, 32);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`for (int i = 64; i < size - 32; i += 32)`
			`{`
			`uint32_t _ALIGN(64) randseed[16];`
			`uint32_t _ALIGN(64) randbuffer[16];`
			`uint32_t _ALIGN(64) joint[16];`
			`//i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area`
			`//we could use size here, but then it's probable to use 0 as the value in most cases`
			`int randmax = i - 4;`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`//setup randbuffer to be an array of random indexes`
			`memcpy(randseed, &hashbuffer[i - 64], 64);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);`
			`else memset(randbuffer, 0, 64);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`xor_salsa8((uint32_t)randbuffer, (uint32_t)randseed);`
			`memcpy(joint, &hashbuffer[i - 32], 32);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`//use the last hash value as the seed`
			`for (int j = 32; j < 64; j += 4)`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`{`
pluck: finish the algo, add cpu validation 10 years ago			`//every other time, change to next random index`
			`//randmax - 32 as otherwise we go beyond memory that's already been written to`
			`uint32_t rand = randbuffer[(j - 32) >> 2] % (randmax - 32);`
			`joint[j >> 2] = ((uint32_t )&hashbuffer[rand]);`
			`}`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`sha256_hash512((uint32_t*)&hashbuffer[i], joint);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`//setup randbuffer to be an array of random indexes`
			`//use last hash value and previous hash value(post-mixing)`
			`memcpy(randseed, &hashbuffer[i - 32], 64);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`if (i > 128) memcpy(randbuffer, &hashbuffer[i - 128], 64);`
			`else memset(randbuffer, 0, 64);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`xor_salsa8((uint32_t)randbuffer, (uint32_t)randseed);`

			`//use the last hash value as the seed`
			`for (int j = 0; j < 32; j += 2)`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`{`
pluck: finish the algo, add cpu validation 10 years ago			`uint32_t rand = randbuffer[j >> 1] % randmax;`
			`((uint32_t )(hashbuffer + rand)) = ((uint32_t )(hashbuffer + j + randmax));`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`}`
pluck: finish the algo, add cpu validation 10 years ago			`}`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`memcpy(hash, hashbuffer, 32);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`}`

			`static bool init[MAX_GPUS] = { 0 };`

pluck: some cleanup 10 years ago			`static __thread uchar* scratchbuf = NULL;`
pluck: finish the algo, add cpu validation 10 years ago
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`extern "C" int scanhash_pluck(int thr_id, uint32_t pdata, const uint32_t ptarget,`
			`uint32_t max_nonce, unsigned long *hashes_done)`
			`{`
			`const uint32_t first_nonce = pdata[19];`
			`uint32_t endiandata[20];`
pluck: finish the algo, add cpu validation 10 years ago			`int opt_pluck_n = 128;`
cputest: cleanup + calloc 10 years ago
			`int intensity = is_windows() ? 17 : 19; /* beware > 20 could work and create diff problems later */`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);`
			`// divide by 128 for this algo which require a lot of memory`
			`throughput = throughput / 128 - 256;`
			`throughput = min(throughput, max_nonce - first_nonce + 1);`

			`if (opt_benchmark)`
			`((uint32_t*)ptarget)[7] = 0x0000ff;`

			`if (!init[thr_id])`
			`{`
			`cudaSetDevice(device_map[thr_id]);`
			`//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
			`//cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);`
pluck: finish the algo, add cpu validation 10 years ago			`cudaMalloc(&d_hash[thr_id], opt_pluck_n * 1024 * throughput);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
pluck: finish the algo, add cpu validation 10 years ago			`if (!scratchbuf)`
			`scratchbuf = (uchar*) calloc(opt_pluck_n, 1024);`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago
			`pluck_cpu_init(thr_id, throughput, d_hash[thr_id]);`
neoscrypt: fix bad source file ext (cu/cpp) 10 years ago
			`CUDA_SAFE_CALL(cudaGetLastError());`
			`applog(LOG_INFO, "Using %d cuda threads", throughput);`

pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`init[thr_id] = true;`
			`}`

			`for (int k = 0; k < 20; k++)`
			`be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);`

			`pluck_setBlockTarget(endiandata,ptarget);`

			`do {`
			`uint32_t foundNonce = pluck_cpu_hash(thr_id, throughput, pdata[19], 0);`
			`if (foundNonce != UINT32_MAX)`
			`{`
pluck: finish the algo, add cpu validation 10 years ago			`const uint32_t Htarg = ptarget[7];`
			`uint32_t vhash64[8];`
			`be32enc(&endiandata[19], foundNonce);`
			`pluckhash(vhash64, endiandata, scratchbuf, opt_pluck_n);`
			`if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`*hashes_done = pdata[19] - first_nonce + throughput;`
			`pdata[19] = foundNonce;`
			`return 1;`
pluck: finish the algo, add cpu validation 10 years ago			`} else {`
warnings: use the right device id (device_map[thr_id]) 10 years ago			`applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);`
pluck: finish the algo, add cpu validation 10 years ago			`}`
pluck: adaptation from djm repo remains the cpu validation check to do... throughput for this algo is divided by 128 to keep same kind of intensity values (default 18.0) 10 years ago			`}`

			`pdata[19] += throughput;`

			`} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);`

			`*hashes_done = pdata[19] - first_nonce;`
			`return 0;`
			`}`