ccminer/x11/c11.cu

extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"

#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
}

#include "miner.h"
#include "cuda_helper.h"
#include "cuda_x11.h"

void tribus_echo512_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t *d_resNonce, const uint64_t target);

#include <stdio.h>
#include <memory.h>

static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];

// Flax/Chaincoin C11 CPU Hash
extern "C" void c11hash(void *output, const void *input)
{
	unsigned char hash[128] = { 0 };

	sph_blake512_context ctx_blake;
	sph_bmw512_context ctx_bmw;
	sph_groestl512_context ctx_groestl;
	sph_jh512_context ctx_jh;
	sph_keccak512_context ctx_keccak;
	sph_skein512_context ctx_skein;
	sph_luffa512_context ctx_luffa;
	sph_cubehash512_context ctx_cubehash;
	sph_shavite512_context ctx_shavite;
	sph_simd512_context ctx_simd;
	sph_echo512_context ctx_echo;

	sph_blake512_init(&ctx_blake);
	sph_blake512 (&ctx_blake, input, 80);
	sph_blake512_close(&ctx_blake, (void*) hash);

	sph_bmw512_init(&ctx_bmw);
	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
	sph_bmw512_close(&ctx_bmw, (void*) hash);

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
	sph_groestl512_close(&ctx_groestl, (void*) hash);

	sph_jh512_init(&ctx_jh);
	sph_jh512 (&ctx_jh, (const void*) hash, 64);
	sph_jh512_close(&ctx_jh, (void*) hash);

	sph_keccak512_init(&ctx_keccak);
	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
	sph_keccak512_close(&ctx_keccak, (void*) hash);

	sph_skein512_init(&ctx_skein);
	sph_skein512 (&ctx_skein, (const void*) hash, 64);
	sph_skein512_close(&ctx_skein, (void*) hash);

	sph_luffa512_init(&ctx_luffa);
	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
	sph_luffa512_close (&ctx_luffa, (void*) hash);

	sph_cubehash512_init(&ctx_cubehash);
	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
	sph_cubehash512_close(&ctx_cubehash, (void*) hash);

	sph_shavite512_init(&ctx_shavite);
	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
	sph_shavite512_close(&ctx_shavite, (void*) hash);

	sph_simd512_init(&ctx_simd);
	sph_simd512 (&ctx_simd, (const void*) hash, 64);
	sph_simd512_close(&ctx_simd, (void*) hash);

	sph_echo512_init(&ctx_echo);
	sph_echo512 (&ctx_echo, (const void*) hash, 64);
	sph_echo512_close(&ctx_echo, (void*) hash);

	memcpy(output, hash, 32);
}

#ifdef _DEBUG
#define TRACE(algo) { \
	if (max_nonce == 1 && pdata[19] <= 1) { \
		uint32_t* debugbuf = NULL; \
		cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
		cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
		printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
			swab32(debugbuf[2]), swab32(debugbuf[3])); \
		cudaFreeHost(debugbuf); \
	} \
}
#else
#define TRACE(algo) {}
#endif

static bool init[MAX_GPUS] = { 0 };
static bool use_compat_kernels[MAX_GPUS] = { 0 };

extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	const uint32_t first_nonce = pdata[19];
	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
	//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x5;

	if (!init[thr_id])
	{
		int dev_id = device_map[thr_id];
		cudaSetDevice(dev_id);
		if (opt_cudaschedule == -1 && gpu_threads == 1) {
			cudaDeviceReset();
			// reduce cpu usage
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
			CUDA_LOG_ERROR();
		}
		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

		cuda_get_arch(thr_id);
		use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500);

		quark_blake512_cpu_init(thr_id, throughput);
		quark_bmw512_cpu_init(thr_id, throughput);
		quark_groestl512_cpu_init(thr_id, throughput);
		quark_skein512_cpu_init(thr_id, throughput);
		quark_keccak512_cpu_init(thr_id, throughput);
		quark_jh512_cpu_init(thr_id, throughput);
		x11_luffaCubehash512_cpu_init(thr_id, throughput);
		x11_shavite512_cpu_init(thr_id, throughput);
		if (use_compat_kernels[thr_id])
			x11_echo512_cpu_init(thr_id, throughput);
		if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
			return 0;
		}
		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0);
		CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));

		cuda_check_cpu_init(thr_id, throughput);

		init[thr_id] = true;
	}

	uint32_t endiandata[20];
	for (int k=0; k < 20; k++)
		be32enc(&endiandata[k], pdata[k]);

	quark_blake512_cpu_setBlock_80(thr_id, endiandata);
	if (use_compat_kernels[thr_id])
		cuda_check_cpu_setTarget(ptarget);
	else
		cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));

	do {
		int order = 0;

		// Hash with CUDA
		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
		TRACE("blake  :");
		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("bmw    :");
		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("groestl:");
		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("jh512  :");
		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("keccak :");
		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("skein  :");
		x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);
		TRACE("luffa+c:");
		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("shavite:");
		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		TRACE("simd   :");

		if (use_compat_kernels[thr_id]) {
			x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
			work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
			work->nonces[1] = UINT32_MAX;
		} else {
			tribus_echo512_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], AS_U64(&ptarget[6]));
			cudaMemcpy(&work->nonces[0], d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
		}

		*hashes_done = pdata[19] - first_nonce + throughput;

		if (work->nonces[0] != UINT32_MAX)
		{
			uint32_t _ALIGN(64) vhash[8];
			const uint32_t Htarg = ptarget[7];
			const uint32_t startNounce = pdata[19];
			if (!use_compat_kernels[thr_id]) work->nonces[0] += startNounce;
			be32enc(&endiandata[19], work->nonces[0]);
			c11hash(vhash, endiandata);

			if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
				work->valid_nonces = 1;
				work_set_target_ratio(work, vhash);
				if (work->nonces[1] != UINT32_MAX) {
					work->nonces[1] += startNounce;
					be32enc(&endiandata[19], work->nonces[1]);
					c11hash(vhash, endiandata);
					bn_set_target_ratio(work, vhash, 1);
					work->valid_nonces++;
					pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
				} else {
					pdata[19] = work->nonces[0] + 1; // cursor
				}
				return work->valid_nonces;
			}
			else if (vhash[7] > Htarg) {
				gpu_increment_reject(thr_id);
				if (!opt_quiet)
					gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
				cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));
				pdata[19] = work->nonces[0] + 1;
				continue;
			}
		}

		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
			pdata[19] = max_nonce;
			break;
		}

		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - first_nonce;
	return 0;
}

// cleanup
extern "C" void free_c11(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	cudaFree(d_hash[thr_id]);
	cudaFree(d_resNonce[thr_id]);

	quark_blake512_cpu_free(thr_id);
	quark_groestl512_cpu_free(thr_id);
	x11_simd512_cpu_free(thr_id);

	cuda_check_cpu_free(thr_id);
	init[thr_id] = false;

	cudaDeviceSynchronize();
}
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`extern "C"`
			`{`
			`#include "sph/sph_blake.h"`
			`#include "sph/sph_bmw.h"`
			`#include "sph/sph_groestl.h"`
			`#include "sph/sph_skein.h"`
			`#include "sph/sph_jh.h"`
			`#include "sph/sph_keccak.h"`

			`#include "sph/sph_luffa.h"`
			`#include "sph/sph_cubehash.h"`
			`#include "sph/sph_shavite.h"`
			`#include "sph/sph_simd.h"`
			`#include "sph/sph_echo.h"`
			`}`

			`#include "miner.h"`
			`#include "cuda_helper.h"`
cuda: header for common kernel functions (quark/x11) Was thinking about doing that since months ;) lets go 9 years ago			`#include "cuda_x11.h"`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
update c11 like tribus + 2.2.1 readme 7 years ago			`void tribus_echo512_final(int thr_id, uint32_t threads, uint32_t d_hash, uint32_t d_resNonce, const uint64_t target);`

Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`#include <stdio.h>`
			`#include <memory.h>`

			`static uint32_t *d_hash[MAX_GPUS];`
update c11 like tribus + 2.2.1 readme 7 years ago			`static uint32_t *d_resNonce[MAX_GPUS];`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
cuda: header for common kernel functions (quark/x11) Was thinking about doing that since months ;) lets go 9 years ago			`// Flax/Chaincoin C11 CPU Hash`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`extern "C" void c11hash(void output, const void input)`
			`{`
cuda: header for common kernel functions (quark/x11) Was thinking about doing that since months ;) lets go 9 years ago			`unsigned char hash[128] = { 0 };`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
			`sph_blake512_context ctx_blake;`
			`sph_bmw512_context ctx_bmw;`
			`sph_groestl512_context ctx_groestl;`
			`sph_jh512_context ctx_jh;`
			`sph_keccak512_context ctx_keccak;`
			`sph_skein512_context ctx_skein;`
			`sph_luffa512_context ctx_luffa;`
			`sph_cubehash512_context ctx_cubehash;`
			`sph_shavite512_context ctx_shavite;`
			`sph_simd512_context ctx_simd;`
			`sph_echo512_context ctx_echo;`

			`sph_blake512_init(&ctx_blake);`
			`sph_blake512 (&ctx_blake, input, 80);`
			`sph_blake512_close(&ctx_blake, (void*) hash);`

			`sph_bmw512_init(&ctx_bmw);`
			`sph_bmw512 (&ctx_bmw, (const void*) hash, 64);`
			`sph_bmw512_close(&ctx_bmw, (void*) hash);`

			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512 (&ctx_groestl, (const void*) hash, 64);`
			`sph_groestl512_close(&ctx_groestl, (void*) hash);`

			`sph_jh512_init(&ctx_jh);`
			`sph_jh512 (&ctx_jh, (const void*) hash, 64);`
			`sph_jh512_close(&ctx_jh, (void*) hash);`

			`sph_keccak512_init(&ctx_keccak);`
			`sph_keccak512 (&ctx_keccak, (const void*) hash, 64);`
			`sph_keccak512_close(&ctx_keccak, (void*) hash);`

			`sph_skein512_init(&ctx_skein);`
			`sph_skein512 (&ctx_skein, (const void*) hash, 64);`
			`sph_skein512_close(&ctx_skein, (void*) hash);`

			`sph_luffa512_init(&ctx_luffa);`
			`sph_luffa512 (&ctx_luffa, (const void*) hash, 64);`
			`sph_luffa512_close (&ctx_luffa, (void*) hash);`

			`sph_cubehash512_init(&ctx_cubehash);`
			`sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);`
			`sph_cubehash512_close(&ctx_cubehash, (void*) hash);`

			`sph_shavite512_init(&ctx_shavite);`
			`sph_shavite512 (&ctx_shavite, (const void*) hash, 64);`
			`sph_shavite512_close(&ctx_shavite, (void*) hash);`

			`sph_simd512_init(&ctx_simd);`
			`sph_simd512 (&ctx_simd, (const void*) hash, 64);`
			`sph_simd512_close(&ctx_simd, (void*) hash);`

			`sph_echo512_init(&ctx_echo);`
			`sph_echo512 (&ctx_echo, (const void*) hash, 64);`
			`sph_echo512_close(&ctx_echo, (void*) hash);`

			`memcpy(output, hash, 32);`
			`}`

			`#ifdef _DEBUG`
			`#define TRACE(algo) { \`
			`if (max_nonce == 1 && pdata[19] <= 1) { \`
			`uint32_t* debugbuf = NULL; \`
			`cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \`
			`cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \`
			`printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \`
			`swab32(debugbuf[2]), swab32(debugbuf[3])); \`
			`cudaFreeHost(debugbuf); \`
			`} \`
			`}`
			`#else`
			`#define TRACE(algo) {}`
			`#endif`

			`static bool init[MAX_GPUS] = { 0 };`
update c11 like tribus + 2.2.1 readme 7 years ago			`static bool use_compat_kernels[MAX_GPUS] = { 0 };`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`const uint32_t first_nonce = pdata[19];`
			`int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=2562568;`
attempt to reduce shared mem errors 8 years ago			`//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
			`if (opt_benchmark)`
			`((uint32_t*)ptarget)[7] = 0x5;`

			`if (!init[thr_id])`
			`{`
update c11 like tribus + 2.2.1 readme 7 years ago			`int dev_id = device_map[thr_id];`
			`cudaSetDevice(dev_id);`
1.7.1 release set schedule flags to reduce linux cpu usage without MyStreamSynchronize() 9 years ago			`if (opt_cudaschedule == -1 && gpu_threads == 1) {`
			`cudaDeviceReset();`
			`// reduce cpu usage`
			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
			`CUDA_LOG_ERROR();`
			`}`
Show intensity on init for all algos 8 years ago			`gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
update c11 like tribus + 2.2.1 readme 7 years ago			`cuda_get_arch(thr_id);`
			`use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500);`

Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`quark_blake512_cpu_init(thr_id, throughput);`
			`quark_bmw512_cpu_init(thr_id, throughput);`
			`quark_groestl512_cpu_init(thr_id, throughput);`
			`quark_skein512_cpu_init(thr_id, throughput);`
			`quark_keccak512_cpu_init(thr_id, throughput);`
			`quark_jh512_cpu_init(thr_id, throughput);`
			`x11_luffaCubehash512_cpu_init(thr_id, throughput);`
			`x11_shavite512_cpu_init(thr_id, throughput);`
update c11 like tribus + 2.2.1 readme 7 years ago			`if (use_compat_kernels[thr_id])`
			`x11_echo512_cpu_init(thr_id, throughput);`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`if (x11_simd512_cpu_init(thr_id, throughput) != 0) {`
			`return 0;`
			`}`
update c11 like tribus + 2.2.1 readme 7 years ago			`CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0);`
			`CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
			`cuda_check_cpu_init(thr_id, throughput);`

			`init[thr_id] = true;`
			`}`

			`uint32_t endiandata[20];`
			`for (int k=0; k < 20; k++)`
			`be32enc(&endiandata[k], pdata[k]);`

			`quark_blake512_cpu_setBlock_80(thr_id, endiandata);`
update c11 like tribus + 2.2.1 readme 7 years ago			`if (use_compat_kernels[thr_id])`
			`cuda_check_cpu_setTarget(ptarget);`
			`else`
			`cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
			`do {`
			`int order = 0;`

			`// Hash with CUDA`
			`quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;`
			`TRACE("blake :");`
			`quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("bmw :");`
			`quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("groestl:");`
			`quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("jh512 :");`
			`quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("keccak :");`
			`quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("skein :");`
			`x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);`
			`TRACE("luffa+c:");`
			`x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("shavite:");`
			`x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`TRACE("simd :");`
update c11 like tribus + 2.2.1 readme 7 years ago
			`if (use_compat_kernels[thr_id]) {`
			`x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);`
			`work->nonces[1] = UINT32_MAX;`
			`} else {`
			`tribus_echo512_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], AS_U64(&ptarget[6]));`
			`cudaMemcpy(&work->nonces[0], d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);`
			`}`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`*hashes_done = pdata[19] - first_nonce + throughput;`

migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`if (work->nonces[0] != UINT32_MAX)`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`{`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`uint32_t _ALIGN(64) vhash[8];`
update c11 like tribus + 2.2.1 readme 7 years ago			`const uint32_t Htarg = ptarget[7];`
			`const uint32_t startNounce = pdata[19];`
			`if (!use_compat_kernels[thr_id]) work->nonces[0] += startNounce;`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`be32enc(&endiandata[19], work->nonces[0]);`
			`c11hash(vhash, endiandata);`

			`if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {`
			`work->valid_nonces = 1;`
			`work_set_target_ratio(work, vhash);`
update c11 like tribus + 2.2.1 readme 7 years ago			`if (work->nonces[1] != UINT32_MAX) {`
			`work->nonces[1] += startNounce;`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`be32enc(&endiandata[19], work->nonces[1]);`
			`c11hash(vhash, endiandata);`
			`bn_set_target_ratio(work, vhash, 1);`
			`work->valid_nonces++;`
			`pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;`
			`} else {`
			`pdata[19] = work->nonces[0] + 1; // cursor`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`}`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`return work->valid_nonces;`
			`}`
			`else if (vhash[7] > Htarg) {`
api: report per thread cpu hash checks (ACC/REJ) + update all algos for that... 8 years ago			`gpu_increment_reject(thr_id);`
			`if (!opt_quiet)`
update c11 like tribus + 2.2.1 readme 7 years ago			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);`
			`cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`pdata[19] = work->nonces[0] + 1;`
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`continue;`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`}`
			`}`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if ((uint64_t) throughput + pdata[19] >= max_nonce) {`
			`pdata[19] = max_nonce;`
			`break;`
			`}`

Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`pdata[19] += throughput;`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`} while (!work_restart[thr_id].restart);`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - first_nonce;`
Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript 9 years ago			`return 0;`
			`}`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`// cleanup`
			`extern "C" void free_c11(int thr_id)`
			`{`
			`if (!init[thr_id])`
			`return;`

benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`cudaThreadSynchronize();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`cudaFree(d_hash[thr_id]);`
update c11 like tribus + 2.2.1 readme 7 years ago			`cudaFree(d_resNonce[thr_id]);`

use blake512 sp kernels on SM 5+ (80+64) import and keep my code for older archs, like skein 64 reduce the gap between our versions... +150kH x11 GTX 960 / +30kH 750Ti +900kH quark GTX 960 / +230kH 750Ti 9 years ago			`quark_blake512_cpu_free(thr_id);`
algos: free allocated mem for algo switch All can be freed propertly now, except script (reset) and lyra2 (leak) 9 years ago			`quark_groestl512_cpu_free(thr_id);`
			`x11_simd512_cpu_free(thr_id);`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`cuda_check_cpu_free(thr_id);`
			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`}`