ccminer-gostd-lite/qubit/luffa.cu

/*
 * luffa 80 algo (Introduced by Doomcoin)
 */
extern "C" {
#include "sph/sph_luffa.h"
}

#include "miner.h"

#include "cuda_helper.h"

static uint32_t *d_hash[MAX_GPUS];

extern void qubit_luffa512_cpu_init(int thr_id, uint32_t threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
extern void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);

extern "C" void luffa_hash(void *state, const void *input)
{
	uint8_t _ALIGN(64) hash[64];

	sph_luffa512_context ctx_luffa;

	sph_luffa512_init(&ctx_luffa);
	sph_luffa512 (&ctx_luffa, input, 80);
	sph_luffa512_close(&ctx_luffa, (void*) hash);

	memcpy(state, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(64) endiandata[20];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	const uint32_t first_nonce = pdata[19];
	uint32_t throughput = device_intensity(thr_id, __func__, 1U << 22); // 256*256*8*8
	throughput = min(throughput, max_nonce - first_nonce);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x0000f;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		if (opt_cudaschedule == -1) // to reduce cpu usage...
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);

		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));

		qubit_luffa512_cpu_init(thr_id, throughput);
		cuda_check_cpu_init(thr_id, throughput);

		init[thr_id] = true;
	}

	for (int k=0; k < 19; k++)
		be32enc(&endiandata[k], pdata[k]);

	qubit_luffa512_cpu_setBlock_80((void*)endiandata);
	cuda_check_cpu_setTarget(ptarget);

	do {
		int order = 0;
		*hashes_done = pdata[19] - first_nonce + throughput;

		qubit_luffa512_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);

		uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
		if (foundNonce != UINT32_MAX)
		{
			uint32_t _ALIGN(64) vhash64[8];
			be32enc(&endiandata[19], foundNonce);
			luffa_hash(vhash64, endiandata);

			if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
				bn_store_hash_target_ratio(vhash64, ptarget, work);
				pdata[19] = foundNonce;
				return 1;
			} else {
				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
			}
		}

		if ((uint64_t) throughput + pdata[19] > max_nonce) {
			// pdata[19] = max_nonce;
			break;
		}

		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - first_nonce + 1;
	return 0;
}

// cleanup
extern "C" void free_luffa(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaSetDevice(device_map[thr_id]);

	cudaFree(d_hash[thr_id]);

	cuda_check_cpu_free(thr_id);
	init[thr_id] = false;

	cudaDeviceSynchronize();
}
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`/*`
remove double reference to luffa algo doomcoin is dead but the luffa algo is still used (Joincoin).. keep doom as alias for compat... rename functions... 10 years ago			`* luffa 80 algo (Introduced by Doomcoin)`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`*/`
			`extern "C" {`
			`#include "sph/sph_luffa.h"`
			`}`

			`#include "miner.h"`

			`#include "cuda_helper.h"`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static uint32_t *d_hash[MAX_GPUS];`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
cleanup: use unsigned throughput parameters Yes, its a big commit, was waiting 1.6 to do that... Sorry for your possible merge issues ;) 10 years ago			`extern void qubit_luffa512_cpu_init(int thr_id, uint32_t threads);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`extern void qubit_luffa512_cpu_setBlock_80(void *pdata);`
cleanup: use unsigned throughput parameters Yes, its a big commit, was waiting 1.6 to do that... Sorry for your possible merge issues ;) 10 years ago			`extern void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
remove double reference to luffa algo doomcoin is dead but the luffa algo is still used (Joincoin).. keep doom as alias for compat... rename functions... 10 years ago			`extern "C" void luffa_hash(void state, const void input)`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`{`
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`uint8_t _ALIGN(64) hash[64];`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`sph_luffa512_context ctx_luffa;`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`sph_luffa512_init(&ctx_luffa);`
			`sph_luffa512 (&ctx_luffa, input, 80);`
			`sph_luffa512_close(&ctx_luffa, (void*) hash);`

			`memcpy(state, hash, 32);`
			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static bool init[MAX_GPUS] = { 0 };`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`{`
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`uint32_t _ALIGN(64) endiandata[20];`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`const uint32_t first_nonce = pdata[19];`
Allow different intensity per device and clean the old variables, no more required 10 years ago			`uint32_t throughput = device_intensity(thr_id, __func__, 1U << 22); // 2562568*8`
cleanup: use unsigned throughput parameters Yes, its a big commit, was waiting 1.6 to do that... Sorry for your possible merge issues ;) 10 years ago			`throughput = min(throughput, max_nonce - first_nonce);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`if (opt_benchmark)`
Reduce keccak, deep & anime intensity + handle groestl -i param default intensity was the max supported by the card, and perf is not really better. I prefer to let it one under for cards with lower memory (1GB) 10 years ago			`((uint32_t*)ptarget)[7] = 0x0000f;`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`if (!init[thr_id])`
			`{`
			`cudaSetDevice(device_map[thr_id]);`
Add a new cuda-schedule parameter 0: cudaDeviceScheduleAuto 1: cudaDeviceScheduleSpin 2: cudaDeviceScheduleYield 4: cudaDeviceScheduleBlockingSync Also set the best one (4) for luffa algo by default... 9 years ago			`if (opt_cudaschedule == -1) // to reduce cpu usage...`
			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`qubit_luffa512_cpu_init(thr_id, throughput);`
			`cuda_check_cpu_init(thr_id, throughput);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`init[thr_id] = true;`
			`}`

qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`for (int k=0; k < 19; k++)`
remove uint32_t cast 10 years ago			`be32enc(&endiandata[k], pdata[k]);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`qubit_luffa512_cpu_setBlock_80((void*)endiandata);`
			`cuda_check_cpu_setTarget(ptarget);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`do {`
			`int order = 0;`
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`*hashes_done = pdata[19] - first_nonce + throughput;`

			`qubit_luffa512_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);`
Check and submit multiple nonces in one loop Added to most algos, checkhash function scans a big range and can find multiple nonces at once if the difficulty is low. Stop ignoring them, submit second one if found... Clean the draft code for rc=2 implemented for blake and pentablake btw... fix the reduced displayed hashrate when a nonce is found... Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> 10 years ago			`if (foundNonce != UINT32_MAX)`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`{`
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`uint32_t _ALIGN(64) vhash64[8];`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`be32enc(&endiandata[19], foundNonce);`
remove double reference to luffa algo doomcoin is dead but the luffa algo is still used (Joincoin).. keep doom as alias for compat... rename functions... 10 years ago			`luffa_hash(vhash64, endiandata);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {`
start v1.7, apply new prototypes to all algos 9 years ago			`bn_store_hash_target_ratio(vhash64, ptarget, work);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`pdata[19] = foundNonce;`
Enhance stale work detection + throughput fixes seems to resolve solo mining lock on share. export also computed solo work diff in api (not perfect) In high rate algos, throughput should be unsigned... This fixes keccak, blake and doom problems And change terminal color of debug lines, to be selectable in putty, color code is not supported in windows but selection is ok there. 10 years ago			`return 1;`
warnings: use the right device id (device_map[thr_id]) 10 years ago			`} else {`
			`applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago			`}`
			`}`

qubit: implement cpu precalc (klaust) improve qubit (+5%) deep and doom (+10%) hashrate based on klausT code, simplified... 10 years ago			`if ((uint64_t) throughput + pdata[19] > max_nonce) {`
Enhance stale work detection + throughput fixes seems to resolve solo mining lock on share. export also computed solo work diff in api (not perfect) In high rate algos, throughput should be unsigned... This fixes keccak, blake and doom problems And change terminal color of debug lines, to be selectable in putty, color code is not supported in windows but selection is ok there. 10 years ago			`// pdata[19] = max_nonce;`
Add cuda error checks on qubit algos And rename doom to luffa, like djm34 10 years ago			`break;`
			`}`

Enhance stale work detection + throughput fixes seems to resolve solo mining lock on share. export also computed solo work diff in api (not perfect) In high rate algos, throughput should be unsigned... This fixes keccak, blake and doom problems And change terminal color of debug lines, to be selectable in putty, color code is not supported in windows but selection is ok there. 10 years ago			`pdata[19] += throughput;`

Add cuda error checks on qubit algos And rename doom to luffa, like djm34 10 years ago			`} while (!work_restart[thr_id].restart);`
Import djm34 qubit, deep and doom algos Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments) 10 years ago
			`*hashes_done = pdata[19] - first_nonce + 1;`
			`return 0;`
			`}`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`// cleanup`
			`extern "C" void free_luffa(int thr_id)`
			`{`
			`if (!init[thr_id])`
			`return;`

			`cudaSetDevice(device_map[thr_id]);`

			`cudaFree(d_hash[thr_id]);`

			`cuda_check_cpu_free(thr_id);`
			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
			`}`