ccminer/lbry/lbry.cu

/**
 * Lbry Algo (sha-256 / sha-512 / ripemd)
 *
 * tpruvot and Provos Alexis - Jul / Sep 2016
 *
 * Sponsored by LBRY.IO team
 */

#include <string.h>
#include <stdint.h>

extern "C" {
#include <sph/sph_sha2.h>
#include <sph/sph_ripemd.h>
}

#include <cuda_helper.h>
#include <miner.h>

#define A 64
#define debug_cpu 0

extern "C" void lbry_hash(void* output, const void* input)
{
	uint32_t _ALIGN(A) hashA[16];
	uint32_t _ALIGN(A) hashB[8];
	uint32_t _ALIGN(A) hashC[8];

	sph_sha256_context ctx_sha256;
	sph_sha512_context ctx_sha512;
	sph_ripemd160_context ctx_ripemd;

	sph_sha256_init(&ctx_sha256);
	sph_sha256(&ctx_sha256, input, 112);
	sph_sha256_close(&ctx_sha256, hashA);

	sph_sha256(&ctx_sha256, hashA, 32);
	sph_sha256_close(&ctx_sha256, hashA);

	sph_sha512_init(&ctx_sha512);
	sph_sha512(&ctx_sha512, hashA, 32);
	sph_sha512_close(&ctx_sha512, hashA);

	sph_ripemd160_init(&ctx_ripemd);
	sph_ripemd160(&ctx_ripemd, hashA, 32);  // sha512 low
	sph_ripemd160_close(&ctx_ripemd, hashB);
	if (debug_cpu) applog_hex(hashB, 20);

	sph_ripemd160(&ctx_ripemd, &hashA[8], 32); // sha512 high
	sph_ripemd160_close(&ctx_ripemd, hashC);
	if (debug_cpu) applog_hex(hashC, 20);

	sph_sha256(&ctx_sha256, hashB, 20);
	sph_sha256(&ctx_sha256, hashC, 20);
	sph_sha256_close(&ctx_sha256, hashA);
	if (debug_cpu) applog_hex(hashA,32);

	sph_sha256(&ctx_sha256, hashA, 32);
	sph_sha256_close(&ctx_sha256, hashA);

	memcpy(output, hashA, 32);
}

/* ############################################################################################################################### */

extern void lbry_sha256_init(int thr_id);
extern void lbry_sha256_free(int thr_id);
extern void lbry_sha256_setBlock_112(uint32_t *pdata);
extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash);
extern void lbry_sha512_init(int thr_id);
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash);
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t *d_inputHash, uint32_t *d_resNonce, const uint64_t target64);

extern void lbry_sha256_setBlock_112_merged(uint32_t *pdata);
extern void lbry_merged(int thr_id,uint32_t startNonce, uint32_t threads, uint32_t *d_resNonce, const uint64_t target64);

static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
	return iftrue ? swab32(val) : val;
}

static bool init[MAX_GPUS] = { 0 };

static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];
// nonce position is different
#define LBC_NONCE_OFT32 27

extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(A) vhash[8];
	uint32_t _ALIGN(A) endiandata[28];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;

	const uint32_t first_nonce = pdata[LBC_NONCE_OFT32];
	const int swap = 0; // to toggle nonce endian (need kernel change)

	const int dev_id = device_map[thr_id];
	int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 22 : 20;
	if (device_sm[dev_id] >= 600) intensity = 23;
	if (device_sm[dev_id] < 350) intensity = 18;

	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
	if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

	if (opt_benchmark) {
		ptarget[7] = 0xf;
	}

	if (!init[thr_id]){
		cudaSetDevice(dev_id);
		if (opt_cudaschedule == -1 && gpu_threads == 1) {
			cudaDeviceReset();
			// reduce cpu usage
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
			cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
			CUDA_LOG_ERROR();
		}
		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

		if(device_sm[dev_id] <= 500)
			CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 8 * sizeof(uint64_t) * throughput));

		CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));
		CUDA_LOG_ERROR();

		init[thr_id] = true;
	}

	for (int i=0; i < LBC_NONCE_OFT32; i++) {
		be32enc(&endiandata[i], pdata[i]);
	}

	if(device_sm[dev_id] <= 500)
		lbry_sha256_setBlock_112(endiandata);
	else
		lbry_sha256_setBlock_112_merged(endiandata);

	cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));

	do {
		// Hash with CUDA
		if(device_sm[dev_id] <= 500){
			lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);
			lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id]);
			lbry_sha256d_hash_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], *(uint64_t*)&ptarget[6]);
		}else{
			lbry_merged(thr_id,pdata[LBC_NONCE_OFT32], throughput, d_resNonce[thr_id], *(uint64_t*)&ptarget[6]);
		}
		uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
		cudaMemcpy(resNonces, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);

		if (resNonces[0] != UINT32_MAX)
		{
			const uint32_t startNonce = pdata[LBC_NONCE_OFT32];

			resNonces[0] += startNonce;

			endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[0], !swap);
			lbry_hash(vhash, endiandata);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
				int res = 1;
				*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
				work->nonces[0] = swab32_if(resNonces[0], swap);
				work_set_target_ratio(work, vhash);
				if (resNonces[1] != UINT32_MAX) {
					resNonces[1] += startNonce;
					if (opt_debug)
						gpulog(LOG_BLUE, thr_id, "Found second nonce %08x", swab32(resNonces[1]));
					endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[1], !swap);
					lbry_hash(vhash, endiandata);
					work->nonces[1] = swab32_if(resNonces[1], swap);
					if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
						work_set_target_ratio(work, vhash);
						xchg(work->nonces[0], work->nonces[1]);
					}
					res++;
				}
				pdata[LBC_NONCE_OFT32] = work->nonces[0];
				return res;
			} else if (vhash[7] > ptarget[7]) {
				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU %08x > %08x!", resNonces[0], vhash[7], ptarget[7]);
				cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));
			}
		}

		if ((uint64_t) throughput + pdata[LBC_NONCE_OFT32] >= max_nonce) {
			pdata[LBC_NONCE_OFT32] = max_nonce;
			break;
		}

		pdata[LBC_NONCE_OFT32] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce;

	return 0;
}

// cleanup
void free_lbry(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	if(device_sm[device_map[thr_id]]<=500)
		cudaFree(d_hash[thr_id]);

	cudaFree(d_resNonce[thr_id]);

	init[thr_id] = false;

	cudaDeviceSynchronize();
}
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`/**`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`* Lbry Algo (sha-256 / sha-512 / ripemd)`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`*`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`* tpruvot and Provos Alexis - Jul / Sep 2016`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`*`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`* Sponsored by LBRY.IO team`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`*/`

			`#include <string.h>`
			`#include <stdint.h>`

			`extern "C" {`
			`#include <sph/sph_sha2.h>`
			`#include <sph/sph_ripemd.h>`
			`}`

			`#include <cuda_helper.h>`
			`#include <miner.h>`

			`#define A 64`
			`#define debug_cpu 0`

			`extern "C" void lbry_hash(void* output, const void* input)`
			`{`
			`uint32_t _ALIGN(A) hashA[16];`
			`uint32_t _ALIGN(A) hashB[8];`
			`uint32_t _ALIGN(A) hashC[8];`

			`sph_sha256_context ctx_sha256;`
			`sph_sha512_context ctx_sha512;`
			`sph_ripemd160_context ctx_ripemd;`

			`sph_sha256_init(&ctx_sha256);`
			`sph_sha256(&ctx_sha256, input, 112);`
			`sph_sha256_close(&ctx_sha256, hashA);`

			`sph_sha256(&ctx_sha256, hashA, 32);`
			`sph_sha256_close(&ctx_sha256, hashA);`

			`sph_sha512_init(&ctx_sha512);`
			`sph_sha512(&ctx_sha512, hashA, 32);`
			`sph_sha512_close(&ctx_sha512, hashA);`

			`sph_ripemd160_init(&ctx_ripemd);`
			`sph_ripemd160(&ctx_ripemd, hashA, 32); // sha512 low`
			`sph_ripemd160_close(&ctx_ripemd, hashB);`
			`if (debug_cpu) applog_hex(hashB, 20);`

			`sph_ripemd160(&ctx_ripemd, &hashA[8], 32); // sha512 high`
			`sph_ripemd160_close(&ctx_ripemd, hashC);`
			`if (debug_cpu) applog_hex(hashC, 20);`

			`sph_sha256(&ctx_sha256, hashB, 20);`
			`sph_sha256(&ctx_sha256, hashC, 20);`
			`sph_sha256_close(&ctx_sha256, hashA);`
			`if (debug_cpu) applog_hex(hashA,32);`

			`sph_sha256(&ctx_sha256, hashA, 32);`
			`sph_sha256_close(&ctx_sha256, hashA);`

			`memcpy(output, hashA, 32);`
			`}`

			`/* ############################################################################################################################### */`

			`extern void lbry_sha256_init(int thr_id);`
			`extern void lbry_sha256_free(int thr_id);`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`extern void lbry_sha256_setBlock_112(uint32_t *pdata);`
preview 3, with alexis78 touch 8 years ago			`extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`extern void lbry_sha512_init(int thr_id);`
preview 3, with alexis78 touch 8 years ago			`extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash);`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t d_inputHash, uint32_t d_resNonce, const uint64_t target64);`

			`extern void lbry_sha256_setBlock_112_merged(uint32_t *pdata);`
			`extern void lbry_merged(int thr_id,uint32_t startNonce, uint32_t threads, uint32_t *d_resNonce, const uint64_t target64);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {`
			`return iftrue ? swab32(val) : val;`
			`}`

			`static bool init[MAX_GPUS] = { 0 };`

			`static uint32_t *d_hash[MAX_GPUS];`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`static uint32_t *d_resNonce[MAX_GPUS];`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`// nonce position is different`
			`#define LBC_NONCE_OFT32 27`

			`extern "C" int scanhash_lbry(int thr_id, struct work work, uint32_t max_nonce, unsigned long hashes_done)`
			`{`
			`uint32_t _ALIGN(A) vhash[8];`
			`uint32_t _ALIGN(A) endiandata[28];`
			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`

			`const uint32_t first_nonce = pdata[LBC_NONCE_OFT32];`
preview 3, with alexis78 touch 8 years ago			`const int swap = 0; // to toggle nonce endian (need kernel change)`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`const int dev_id = device_map[thr_id];`
			`int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 22 : 20;`
			`if (device_sm[dev_id] >= 600) intensity = 23;`
			`if (device_sm[dev_id] < 350) intensity = 18;`

			`uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`if (opt_benchmark) {`
lbry: small changes for second build sha512/ripemd swab this was preview 2 8 years ago			`ptarget[7] = 0xf;`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`}`

			`if (!init[thr_id]){`
			`cudaSetDevice(dev_id);`
			`if (opt_cudaschedule == -1 && gpu_threads == 1) {`
			`cudaDeviceReset();`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`// reduce cpu usage`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`CUDA_LOG_ERROR();`
			`}`
cuda: throughput2intensity function to show default 8 years ago			`gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`if(device_sm[dev_id] <= 500)`
			`CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 8 * sizeof(uint64_t) * throughput));`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`CUDA_LOG_ERROR();`

			`init[thr_id] = true;`
			`}`

			`for (int i=0; i < LBC_NONCE_OFT32; i++) {`
			`be32enc(&endiandata[i], pdata[i]);`
			`}`

lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`if(device_sm[dev_id] <= 500)`
			`lbry_sha256_setBlock_112(endiandata);`
			`else`
			`lbry_sha256_setBlock_112_merged(endiandata);`

lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`do {`
			`// Hash with CUDA`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`if(device_sm[dev_id] <= 500){`
			`lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);`
			`lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id]);`
			`lbry_sha256d_hash_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], (uint64_t)&ptarget[6]);`
			`}else{`
			`lbry_merged(thr_id,pdata[LBC_NONCE_OFT32], throughput, d_resNonce[thr_id], (uint64_t)&ptarget[6]);`
lbry: exit on card failure, dont loop 8 years ago			`}`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`cudaMemcpy(resNonces, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`if (resNonces[0] != UINT32_MAX)`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`{`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`const uint32_t startNonce = pdata[LBC_NONCE_OFT32];`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`resNonces[0] += startNonce;`

			`endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[0], !swap);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`lbry_hash(vhash, endiandata);`

			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {`
			`int res = 1;`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`work->nonces[0] = swab32_if(resNonces[0], swap);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`work_set_target_ratio(work, vhash);`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`if (resNonces[1] != UINT32_MAX) {`
			`resNonces[1] += startNonce;`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`if (opt_debug)`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`gpulog(LOG_BLUE, thr_id, "Found second nonce %08x", swab32(resNonces[1]));`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[1], !swap);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`lbry_hash(vhash, endiandata);`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`work->nonces[1] = swab32_if(resNonces[1], swap);`
diff: show by default, rework shares diff storage This will allow later more gpu candidates. Note: This is an unfinished work, we keep the previous behavior for now To finish this, all algos solutions should be migrated and submitted nonces attributes stored. Its required to handle the different share diff per nonce and fix the possible solved count error (if 1/2 nonces is solved). 8 years ago			`if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`work_set_target_ratio(work, vhash);`
			`xchg(work->nonces[0], work->nonces[1]);`
			`}`
			`res++;`
			`}`
			`pdata[LBC_NONCE_OFT32] = work->nonces[0];`
			`return res;`
lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`} else if (vhash[7] > ptarget[7]) {`
			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU %08x > %08x!", resNonces[0], vhash[7], ptarget[7]);`
			`cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`}`
			`}`

			`if ((uint64_t) throughput + pdata[LBC_NONCE_OFT32] >= max_nonce) {`
			`pdata[LBC_NONCE_OFT32] = max_nonce;`
			`break;`
			`}`

			`pdata[LBC_NONCE_OFT32] += throughput;`

			`} while (!work_restart[thr_id].restart);`

lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce;`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`return 0;`
			`}`

			`// cleanup`
lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`void free_lbry(int thr_id)`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago			`{`
			`if (!init[thr_id])`
			`return;`

			`cudaThreadSynchronize();`

lbry maxwell and pascal update (up to 10% on pascal) Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86 8 years ago			`if(device_sm[device_map[thr_id]]<=500)`
			`cudaFree(d_hash[thr_id]);`

lbry: some changes from alexis, remove shared mem 105 LBC tipped ;) 8 years ago			`cudaFree(d_resNonce[thr_id]);`
lbry algo (stratum only) Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 8 years ago
			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
			`}`