ccminer-gostd-lite/x13/x13.cu

/*
 * X13 algorithm
 */
extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"

#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"

#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
}
#include "miner.h"

#include "cuda_helper.h"
#include "x11/cuda_x11.h"

static uint32_t *d_hash[MAX_GPUS];

extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads);
extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);

// X13 CPU Hash
extern "C" void x13hash(void *output, const void *input)
{
	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13

	sph_blake512_context ctx_blake;
	sph_bmw512_context ctx_bmw;
	sph_groestl512_context ctx_groestl;
	sph_jh512_context ctx_jh;
	sph_keccak512_context ctx_keccak;
	sph_skein512_context ctx_skein;
	sph_luffa512_context ctx_luffa;
	sph_cubehash512_context ctx_cubehash;
	sph_shavite512_context ctx_shavite;
	sph_simd512_context ctx_simd;
	sph_echo512_context ctx_echo;
	sph_hamsi512_context ctx_hamsi;
	sph_fugue512_context ctx_fugue;

	uint32_t hash[32];
	memset(hash, 0, sizeof hash);

	sph_blake512_init(&ctx_blake);
	sph_blake512 (&ctx_blake, input, 80);
	sph_blake512_close(&ctx_blake, (void*) hash);

	sph_bmw512_init(&ctx_bmw);
	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
	sph_bmw512_close(&ctx_bmw, (void*) hash);

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
	sph_groestl512_close(&ctx_groestl, (void*) hash);

	sph_skein512_init(&ctx_skein);
	sph_skein512 (&ctx_skein, (const void*) hash, 64);
	sph_skein512_close(&ctx_skein, (void*) hash);

	sph_jh512_init(&ctx_jh);
	sph_jh512 (&ctx_jh, (const void*) hash, 64);
	sph_jh512_close(&ctx_jh, (void*) hash);

	sph_keccak512_init(&ctx_keccak);
	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
	sph_keccak512_close(&ctx_keccak, (void*) hash);

	sph_luffa512_init(&ctx_luffa);
	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
	sph_luffa512_close (&ctx_luffa, (void*) hash);

	sph_cubehash512_init(&ctx_cubehash);
	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
	sph_cubehash512_close(&ctx_cubehash, (void*) hash);

	sph_shavite512_init(&ctx_shavite);
	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
	sph_shavite512_close(&ctx_shavite, (void*) hash);

	sph_simd512_init(&ctx_simd);
	sph_simd512 (&ctx_simd, (const void*) hash, 64);
	sph_simd512_close(&ctx_simd, (void*) hash);

	sph_echo512_init(&ctx_echo);
	sph_echo512 (&ctx_echo, (const void*) hash, 64);
	sph_echo512_close(&ctx_echo, (void*) hash);

	sph_hamsi512_init(&ctx_hamsi);
	sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
	sph_hamsi512_close(&ctx_hamsi, (void*) hash);

	sph_fugue512_init(&ctx_fugue);
	sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
	sph_fugue512_close(&ctx_fugue, (void*) hash);

	memcpy(output, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	const uint32_t first_nonce = pdata[19];
	int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;
	uint32_t throughput =  cuda_default_throughput(thr_id, 1 << intensity); // 19=256*256*8;
	//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x000f;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		if (opt_cudaschedule == -1 && gpu_threads == 1) {
			cudaDeviceReset();
			// reduce cpu usage
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
			CUDA_LOG_ERROR();
		}
		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

		quark_blake512_cpu_init(thr_id, throughput);
		quark_groestl512_cpu_init(thr_id, throughput);
		quark_skein512_cpu_init(thr_id, throughput);
		quark_bmw512_cpu_init(thr_id, throughput);
		quark_keccak512_cpu_init(thr_id, throughput);
		quark_jh512_cpu_init(thr_id, throughput);
		x11_luffaCubehash512_cpu_init(thr_id, throughput);
		x11_shavite512_cpu_init(thr_id, throughput);
		if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
			return 0;
		}
		x11_echo512_cpu_init(thr_id, throughput);
		x13_hamsi512_cpu_init(thr_id, throughput);
		x13_fugue512_cpu_init(thr_id, throughput);

		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);

		cuda_check_cpu_init(thr_id, throughput);

		init[thr_id] = true;
	}

	uint32_t endiandata[20];
	for (int k=0; k < 20; k++)
		be32enc(&endiandata[k], pdata[k]);

	quark_blake512_cpu_setBlock_80(thr_id, endiandata);
	cuda_check_cpu_setTarget(ptarget);

	do {
		int order = 0;

		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);
		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);

		*hashes_done = pdata[19] - first_nonce + throughput;

		CUDA_LOG_ERROR();

		work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
		if (work->nonces[0] != UINT32_MAX)
		{
			const uint32_t Htarg = ptarget[7];
			uint32_t _ALIGN(64) vhash[8];
			be32enc(&endiandata[19], work->nonces[0]);
			x13hash(vhash, endiandata);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
				work->valid_nonces = 1;
				work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
				work_set_target_ratio(work, vhash);
				if (work->nonces[1] != 0) {
					be32enc(&endiandata[19], work->nonces[1]);
					x13hash(vhash, endiandata);
					bn_set_target_ratio(work, vhash, 1);
					work->valid_nonces++;
					pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
				} else {
					pdata[19] = work->nonces[0] + 1; // cursor
				}
				return work->valid_nonces;
			}
			else if (vhash[7] > Htarg) {
				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
				pdata[19] = work->nonces[0] + 1;
				continue;
			}
		}

		if ((uint64_t)throughput + pdata[19] >= max_nonce) {
			pdata[19] = max_nonce;
			break;
		}
		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - first_nonce;

	CUDA_LOG_ERROR();

	return 0;
}

// cleanup
extern "C" void free_x13(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	cudaFree(d_hash[thr_id]);

	quark_blake512_cpu_free(thr_id);
	quark_groestl512_cpu_free(thr_id);
	x11_simd512_cpu_free(thr_id);
	x13_fugue512_cpu_free(thr_id);

	cuda_check_cpu_free(thr_id);
	CUDA_LOG_ERROR();

	cudaDeviceSynchronize();
	init[thr_id] = false;
}
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`/*`
Implement x14 (cuda + cpu functions) Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago			`* X13 algorithm`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`*/`
			`extern "C"`
			`{`
			`#include "sph/sph_blake.h"`
			`#include "sph/sph_bmw.h"`
			`#include "sph/sph_groestl.h"`
			`#include "sph/sph_skein.h"`
			`#include "sph/sph_jh.h"`
			`#include "sph/sph_keccak.h"`

			`#include "sph/sph_luffa.h"`
			`#include "sph/sph_cubehash.h"`
			`#include "sph/sph_shavite.h"`
			`#include "sph/sph_simd.h"`
			`#include "sph/sph_echo.h"`

			`#include "sph/sph_hamsi.h"`
			`#include "sph/sph_fugue.h"`
Move common check_cpu functions to root 10 years ago			`}`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`#include "miner.h"`

Remove duplicated defines present in cuda_helper.h also add cudaDeviceReset() on Ctrl+C for nvprof 10 years ago			`#include "cuda_helper.h"`
cuda: header for common kernel functions (quark/x11) Was thinking about doing that since months ;) lets go 9 years ago			`#include "x11/cuda_x11.h"`
Implement x14 (cuda + cpu functions) Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago
Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static uint32_t *d_hash[MAX_GPUS];`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
cleanup: use unsigned throughput parameters Yes, its a big commit, was waiting 1.6 to do that... Sorry for your possible merge issues ;) 10 years ago			`extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads);`
			`extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t d_nonceVector, uint32_t d_hash, int order);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
cleanup: use unsigned throughput parameters Yes, its a big commit, was waiting 1.6 to do that... Sorry for your possible merge issues ;) 10 years ago			`extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);`
			`extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t d_nonceVector, uint32_t d_hash, int order);`
algos: free allocated mem for algo switch All can be freed propertly now, except script (reset) and lyra2 (leak) 9 years ago			`extern void x13_fugue512_cpu_free(int thr_id);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
simd512: restore SM3/3.5 perfs Simple change which affect all algos based on SIMD512 fresh, qubit, s3, x11 to x17... 10 years ago			`// X13 CPU Hash`
Implement x14 (cuda + cpu functions) Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago			`extern "C" void x13hash(void output, const void input)`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`{`
Implement x14 (cuda + cpu functions) Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago			`// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13`

			`sph_blake512_context ctx_blake;`
			`sph_bmw512_context ctx_bmw;`
			`sph_groestl512_context ctx_groestl;`
			`sph_jh512_context ctx_jh;`
			`sph_keccak512_context ctx_keccak;`
			`sph_skein512_context ctx_skein;`
			`sph_luffa512_context ctx_luffa;`
			`sph_cubehash512_context ctx_cubehash;`
			`sph_shavite512_context ctx_shavite;`
			`sph_simd512_context ctx_simd;`
			`sph_echo512_context ctx_echo;`
			`sph_hamsi512_context ctx_hamsi;`
			`sph_fugue512_context ctx_fugue;`

			`uint32_t hash[32];`
			`memset(hash, 0, sizeof hash);`

			`sph_blake512_init(&ctx_blake);`
			`sph_blake512 (&ctx_blake, input, 80);`
			`sph_blake512_close(&ctx_blake, (void*) hash);`

			`sph_bmw512_init(&ctx_bmw);`
			`sph_bmw512 (&ctx_bmw, (const void*) hash, 64);`
			`sph_bmw512_close(&ctx_bmw, (void*) hash);`

			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512 (&ctx_groestl, (const void*) hash, 64);`
			`sph_groestl512_close(&ctx_groestl, (void*) hash);`

			`sph_skein512_init(&ctx_skein);`
			`sph_skein512 (&ctx_skein, (const void*) hash, 64);`
			`sph_skein512_close(&ctx_skein, (void*) hash);`

			`sph_jh512_init(&ctx_jh);`
			`sph_jh512 (&ctx_jh, (const void*) hash, 64);`
			`sph_jh512_close(&ctx_jh, (void*) hash);`

			`sph_keccak512_init(&ctx_keccak);`
			`sph_keccak512 (&ctx_keccak, (const void*) hash, 64);`
			`sph_keccak512_close(&ctx_keccak, (void*) hash);`

			`sph_luffa512_init(&ctx_luffa);`
			`sph_luffa512 (&ctx_luffa, (const void*) hash, 64);`
			`sph_luffa512_close (&ctx_luffa, (void*) hash);`

			`sph_cubehash512_init(&ctx_cubehash);`
			`sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);`
			`sph_cubehash512_close(&ctx_cubehash, (void*) hash);`

			`sph_shavite512_init(&ctx_shavite);`
			`sph_shavite512 (&ctx_shavite, (const void*) hash, 64);`
			`sph_shavite512_close(&ctx_shavite, (void*) hash);`

			`sph_simd512_init(&ctx_simd);`
			`sph_simd512 (&ctx_simd, (const void*) hash, 64);`
			`sph_simd512_close(&ctx_simd, (void*) hash);`

			`sph_echo512_init(&ctx_echo);`
			`sph_echo512 (&ctx_echo, (const void*) hash, 64);`
			`sph_echo512_close(&ctx_echo, (void*) hash);`

			`sph_hamsi512_init(&ctx_hamsi);`
			`sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);`
			`sph_hamsi512_close(&ctx_hamsi, (void*) hash);`

			`sph_fugue512_init(&ctx_fugue);`
			`sph_fugue512 (&ctx_fugue, (const void*) hash, 64);`
			`sph_fugue512_close(&ctx_fugue, (void*) hash);`

			`memcpy(output, hash, 32);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static bool init[MAX_GPUS] = { 0 };`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`const uint32_t first_nonce = pdata[19];`
Allow intermediate intensity (decimals) Sample with -i 18.5 Adding 131072 threads to intensity 18, 393216 cuda threads And with -i 19.5 Adding 262144 threads to intensity 19, 786432 cuda threads 10 years ago			`int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); // 19=2562568;`
attempt to reduce shared mem errors 8 years ago			`//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
Rework benchmark mode and min/max range Was maybe my fault, but the benchmark mode was always recomputing from nonce 0. Also fix blake if -d 1 is used (one thread but second gpu) stats: do not use thread id as key, prefer gpu id... 10 years ago			`if (opt_benchmark)`
			`((uint32_t*)ptarget)[7] = 0x000f;`

adding third party X13 and Diamond Groestl code contributions. 11 years ago			`if (!init[thr_id])`
			`{`
Prepare trap of hardware/mem failures 10 years ago			`cudaSetDevice(device_map[thr_id]);`
1.7.1 release set schedule flags to reduce linux cpu usage without MyStreamSynchronize() 9 years ago			`if (opt_cudaschedule == -1 && gpu_threads == 1) {`
			`cudaDeviceReset();`
			`// reduce cpu usage`
			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
			`CUDA_LOG_ERROR();`
			`}`
Show intensity on init for all algos 8 years ago			`gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
			`quark_blake512_cpu_init(thr_id, throughput);`
			`quark_groestl512_cpu_init(thr_id, throughput);`
			`quark_skein512_cpu_init(thr_id, throughput);`
			`quark_bmw512_cpu_init(thr_id, throughput);`
			`quark_keccak512_cpu_init(thr_id, throughput);`
			`quark_jh512_cpu_init(thr_id, throughput);`
Luffa and simd merged to one kernal. Small echo rewrite. +10KHASH on the 650(compute 3.0) tpruvot: add Linux Makefile - Force to 80 registers (else -30KH/s) Note : the hashrate seems more constant with this change 10 years ago			`x11_luffaCubehash512_cpu_init(thr_id, throughput);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`x11_shavite512_cpu_init(thr_id, throughput);`
Prepare trap of hardware/mem failures 10 years ago			`if (x11_simd512_cpu_init(thr_id, throughput) != 0) {`
			`return 0;`
			`}`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`x11_echo512_cpu_init(thr_id, throughput);`
			`x13_hamsi512_cpu_init(thr_id, throughput);`
			`x13_fugue512_cpu_init(thr_id, throughput);`
Prepare trap of hardware/mem failures 10 years ago
			`CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);`

Remove duplicated defines present in cuda_helper.h also add cudaDeviceReset() on Ctrl+C for nvprof 10 years ago			`cuda_check_cpu_init(thr_id, throughput);`
cuda: check for errors on cuda mem alloc 10 years ago
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`init[thr_id] = true;`
			`}`

cuda: check for errors on cuda mem alloc 10 years ago			`uint32_t endiandata[20];`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`for (int k=0; k < 20; k++)`
remove uint32_t cast 10 years ago			`be32enc(&endiandata[k], pdata[k]);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
blake80: some changes and launch bounds, no perf changes 10 years ago			`quark_blake512_cpu_setBlock_80(thr_id, endiandata);`
Remove duplicated defines present in cuda_helper.h also add cudaDeviceReset() on Ctrl+C for nvprof 10 years ago			`cuda_check_cpu_setTarget(ptarget);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
			`do {`
			`int order = 0;`

blake80: some changes and launch bounds, no perf changes 10 years ago			`quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
x11: update sp luffa/cube to get closer x11 speeds.. i had to clean it... lot of unused defines... 10 years ago			`x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
			`x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
cuda: check for errors on cuda mem alloc 10 years ago			`x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`*hashes_done = pdata[19] - first_nonce + throughput;`

warn on cuda errors + various small changes The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case 9 years ago			`CUDA_LOG_ERROR();`

migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);`
			`if (work->nonces[0] != UINT32_MAX)`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`{`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`const uint32_t Htarg = ptarget[7];`
			`uint32_t _ALIGN(64) vhash[8];`
			`be32enc(&endiandata[19], work->nonces[0]);`
start v1.7, apply new prototypes to all algos 9 years ago			`x13hash(vhash, endiandata);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`work->valid_nonces = 1;`
			`work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);`
diff: use the new function in all algos 9 years ago			`work_set_target_ratio(work, vhash);`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`if (work->nonces[1] != 0) {`
			`be32enc(&endiandata[19], work->nonces[1]);`
start v1.7, apply new prototypes to all algos 9 years ago			`x13hash(vhash, endiandata);`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`bn_set_target_ratio(work, vhash, 1);`
			`work->valid_nonces++;`
			`pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;`
			`} else {`
			`pdata[19] = work->nonces[0] + 1; // cursor`
Check and submit multiple nonces in one loop Added to most algos, checkhash function scans a big range and can find multiple nonces at once if the difficulty is low. Stop ignoring them, submit second one if found... Clean the draft code for rc=2 implemented for blake and pentablake btw... fix the reduced displayed hashrate when a nonce is found... Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago			`}`
migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... 8 years ago			`return work->valid_nonces;`
			`}`
			`else if (vhash[7] > Htarg) {`
			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);`
			`pdata[19] = work->nonces[0] + 1;`
			`continue;`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`}`
			`}`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if ((uint64_t)throughput + pdata[19] >= max_nonce) {`
Rework benchmark mode and min/max range Was maybe my fault, but the benchmark mode was always recomputing from nonce 0. Also fix blake if -d 1 is used (one thread but second gpu) stats: do not use thread id as key, prefer gpu id... 10 years ago			`pdata[19] = max_nonce;`
			`break;`
			`}`
adding third party X13 and Diamond Groestl code contributions. 11 years ago			`pdata[19] += throughput;`

Rework benchmark mode and min/max range Was maybe my fault, but the benchmark mode was always recomputing from nonce 0. Also fix blake if -d 1 is used (one thread but second gpu) stats: do not use thread id as key, prefer gpu id... 10 years ago			`} while (!work_restart[thr_id].restart);`
adding third party X13 and Diamond Groestl code contributions. 11 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - first_nonce;`
warn on cuda errors + various small changes The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case 9 years ago
			`CUDA_LOG_ERROR();`

adding third party X13 and Diamond Groestl code contributions. 11 years ago			`return 0;`
			`}`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`// cleanup`
			`extern "C" void free_x13(int thr_id)`
			`{`
			`if (!init[thr_id])`
			`return;`

warn on cuda errors + various small changes The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case 9 years ago			`cudaThreadSynchronize();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`cudaFree(d_hash[thr_id]);`

use blake512 sp kernels on SM 5+ (80+64) import and keep my code for older archs, like skein 64 reduce the gap between our versions... +150kH x11 GTX 960 / +30kH 750Ti +900kH quark GTX 960 / +230kH 750Ti 9 years ago			`quark_blake512_cpu_free(thr_id);`
algos: free allocated mem for algo switch All can be freed propertly now, except script (reset) and lyra2 (leak) 9 years ago			`quark_groestl512_cpu_free(thr_id);`
			`x11_simd512_cpu_free(thr_id);`
			`x13_fugue512_cpu_free(thr_id);`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`cuda_check_cpu_free(thr_id);`
warn on cuda errors + various small changes The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case 9 years ago			`CUDA_LOG_ERROR();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`cudaDeviceSynchronize();`
warn on cuda errors + various small changes The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case 9 years ago			`init[thr_id] = false;`
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`}`