ccminer-gostd-lite/fuguecoin.cpp

#include <string.h>
#include <stdint.h>
#include <cuda_runtime.h>

#include "sph/sph_fugue.h"

#include "miner.h"

#include "cuda_fugue256.h"

extern "C" void my_fugue256_init(void *cc);
extern "C" void my_fugue256(void *cc, const void *data, size_t len);
extern "C" void my_fugue256_close(void *cc, void *dst);
extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);

// vorbereitete Kontexte nach den ersten 80 Bytes
// sph_fugue256_context  ctx_fugue_const[MAX_GPUS];

#define SWAP32(x) \
    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))

void fugue256_hash(unsigned char* output, const unsigned char* input, int len)
{
	sph_fugue256_context ctx;

	sph_fugue256_init(&ctx);
	sph_fugue256(&ctx, input, len);
	sph_fugue256_close(&ctx, (void *)output);
}

static bool init[MAX_GPUS] = { 0 };

int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(64) endiandata[20];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	uint32_t start_nonce = pdata[19]++;
	int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19;
	uint32_t throughput =  cuda_default_throughput(thr_id, 1U << intensity); // 256*256*8
	if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0xf;

	// init
	if(!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);

		fugue256_cpu_init(thr_id, throughput);
		init[thr_id] = true;
	}

	// Endian
	for (int kk=0; kk < 20; kk++)
		be32enc(&endiandata[kk], pdata[kk]);

	// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
	fugue256_cpu_setBlock(thr_id, endiandata, (void*)ptarget);

	do {
		// GPU
		uint32_t foundNounce = UINT32_MAX;
		fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);

		*hashes_done = pdata[19] - start_nonce + throughput;

		if (foundNounce < UINT32_MAX && bench_algo < 0)
		{
			uint32_t vhash[8];
			sph_fugue256_context ctx_fugue;
			endiandata[19] = SWAP32(foundNounce);

			sph_fugue256_init(&ctx_fugue);
			sph_fugue256 (&ctx_fugue, endiandata, 80);
			sph_fugue256_close(&ctx_fugue, &vhash);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget))
			{
				work_set_target_ratio(work, vhash);
				pdata[19] = foundNounce;
				return 1;
			} else {
				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
			}
		}

		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
			pdata[19] = max_nonce;
			break;
		}

		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - start_nonce;
	return 0;
}

// cleanup
void free_fugue256(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	fugue256_cpu_free(thr_id);

	init[thr_id] = false;

	cudaDeviceSynchronize();
}
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include <string.h>`
			`#include <stdint.h>`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`#include <cuda_runtime.h>`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`#include "sph/sph_fugue.h"`

			`#include "miner.h"`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "cuda_fugue256.h"`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
			`extern "C" void my_fugue256_init(void *cc);`
			`extern "C" void my_fugue256(void cc, const void data, size_t len);`
			`extern "C" void my_fugue256_close(void cc, void dst);`
			`extern "C" void my_fugue256_addbits_and_close(void cc, unsigned ub, unsigned n, void dst);`

			`// vorbereitete Kontexte nach den ersten 80 Bytes`
Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`// sph_fugue256_context ctx_fugue_const[MAX_GPUS];`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
			`#define SWAP32(x) \`
			`((((x) << 24) & 0xff000000u) \| (((x) << 8) & 0x00ff0000u) \| \`
			`(((x) >> 8) & 0x0000ff00u) \| (((x) >> 24) & 0x000000ffu))`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`void fugue256_hash(unsigned char* output, const unsigned char* input, int len)`
			`{`
			`sph_fugue256_context ctx;`

			`sph_fugue256_init(&ctx);`
			`sph_fugue256(&ctx, input, len);`
			`sph_fugue256_close(&ctx, (void *)output);`
			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static bool init[MAX_GPUS] = { 0 };`
various small changes heavy: reduce by 256 threads default intensity to all -i 20 cuda: put static thread init bools outside the code (made once) api: fix nvml header to build without 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) endiandata[20];`
			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`uint32_t start_nonce = pdata[19]++;`
cuda: store device SM in a global var sample usage made for blake and fugue (higher intensity for SM5.2) add these to cuda_helper and clean unused code 10 years ago			`int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19;`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 2562568`
			`if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);`
Add intensity to last algos and fix quark speed 10 years ago
			`if (opt_benchmark)`
			`((uint32_t*)ptarget)[7] = 0xf;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
			`// init`
			`if(!init[thr_id])`
			`{`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`cudaSetDevice(device_map[thr_id]);`

api: report throughput when default 10 years ago			`fugue256_cpu_init(thr_id, throughput);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`init[thr_id] = true;`
			`}`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`// Endian`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`for (int kk=0; kk < 20; kk++)`
			`be32enc(&endiandata[kk], pdata[kk]);`

			`// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)`
			`fugue256_cpu_setBlock(thr_id, endiandata, (void*)ptarget);`

			`do {`
			`// GPU`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`uint32_t foundNounce = UINT32_MAX;`
api: report throughput when default 10 years ago			`fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - start_nonce + throughput;`

			`if (foundNounce < UINT32_MAX && bench_algo < 0)`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t vhash[8];`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`sph_fugue256_context ctx_fugue;`
start v1.7, apply new prototypes to all algos 9 years ago			`endiandata[19] = SWAP32(foundNounce);`

Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`sph_fugue256_init(&ctx_fugue);`
			`sph_fugue256 (&ctx_fugue, endiandata, 80);`
start v1.7, apply new prototypes to all algos 9 years ago			`sph_fugue256_close(&ctx_fugue, &vhash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget))`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
diff: use the new function in all algos 9 years ago			`work_set_target_ratio(work, vhash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`pdata[19] = foundNounce;`
			`return 1;`
			`} else {`
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`}`
			`}`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if ((uint64_t) throughput + pdata[19] >= max_nonce) {`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`pdata[19] = max_nonce;`
Add intensity to last algos and fix quark speed 10 years ago			`break;`
			`}`

api: report throughput when default 10 years ago			`pdata[19] += throughput;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
Add intensity to last algos and fix quark speed 10 years ago			`} while (!work_restart[thr_id].restart);`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - start_nonce;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`return 0;`
			`}`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`// cleanup`
			`void free_fugue256(int thr_id)`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`if (!init[thr_id])`
			`return;`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`cudaThreadSynchronize();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`fugue256_cpu_free(thr_id);`

			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`}`