ccminer/fuguecoin.cpp

#include <string.h>
#include <stdint.h>
#include <cuda_runtime.h>

#include "sph/sph_fugue.h"

#include "miner.h"

#include "cuda_fugue256.h"

#define SWAP32(x) \
    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))

void fugue256_hash(unsigned char* output, const unsigned char* input, int len)
{
	sph_fugue256_context ctx;

	sph_fugue256_init(&ctx);
	sph_fugue256(&ctx, input, len);
	sph_fugue256_close(&ctx, (void *)output);
}

static bool init[MAX_GPUS] = { 0 };

int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(64) endiandata[20];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	uint32_t start_nonce = pdata[19]++;
	int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19;
	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
	if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);

	if (opt_benchmark)
		ptarget[7] = 0xf;

	// init
	if(!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		if (opt_cudaschedule == -1 && gpu_threads == 1) {
			cudaDeviceReset();
			// reduce cpu usage
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
			CUDA_LOG_ERROR();
		}
		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

		fugue256_cpu_init(thr_id, throughput);
		init[thr_id] = true;
	}

	// Endian
	for (int kk=0; kk < 20; kk++)
		be32enc(&endiandata[kk], pdata[kk]);

	fugue256_cpu_setBlock(thr_id, endiandata, (void*)ptarget);

	do {
		// GPU
		uint32_t foundNounce = UINT32_MAX;
		fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);

		*hashes_done = pdata[19] - start_nonce + throughput;

		if (foundNounce < UINT32_MAX && bench_algo < 0)
		{
			uint32_t vhash[8];
			sph_fugue256_context ctx_fugue;
			endiandata[19] = SWAP32(foundNounce);

			sph_fugue256_init(&ctx_fugue);
			sph_fugue256 (&ctx_fugue, endiandata, 80);
			sph_fugue256_close(&ctx_fugue, &vhash);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget))
			{
				work_set_target_ratio(work, vhash);
				pdata[19] = foundNounce;
				return 1;
			} else {
				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
			}
		}

		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
			pdata[19] = max_nonce;
			break;
		}

		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - start_nonce;
	return 0;
}

// cleanup
void free_fugue256(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	fugue256_cpu_free(thr_id);

	init[thr_id] = false;

	cudaDeviceSynchronize();
}
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include <string.h>`
			`#include <stdint.h>`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`#include <cuda_runtime.h>`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`#include "sph/sph_fugue.h"`

			`#include "miner.h"`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "cuda_fugue256.h"`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
			`#define SWAP32(x) \`
			`((((x) << 24) & 0xff000000u) \| (((x) << 8) & 0x00ff0000u) \| \`
			`(((x) >> 8) & 0x0000ff00u) \| (((x) >> 24) & 0x000000ffu))`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`void fugue256_hash(unsigned char* output, const unsigned char* input, int len)`
			`{`
			`sph_fugue256_context ctx;`

			`sph_fugue256_init(&ctx);`
			`sph_fugue256(&ctx, input, len);`
			`sph_fugue256_close(&ctx, (void *)output);`
			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static bool init[MAX_GPUS] = { 0 };`
various small changes heavy: reduce by 256 threads default intensity to all -i 20 cuda: put static thread init bools outside the code (made once) api: fix nvml header to build without 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) endiandata[20];`
			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`uint32_t start_nonce = pdata[19]++;`
cuda: store device SM in a global var sample usage made for blake and fugue (higher intensity for SM5.2) add these to cuda_helper and clean unused code 10 years ago			`int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19;`
various changes, cleanup for the release small fixes to handle better the multi thread per gpu explicitly report than quark is not compatible with SM 2.1 (compact shuffle) 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);`
Add intensity to last algos and fix quark speed 10 years ago
			`if (opt_benchmark)`
various changes, cleanup for the release small fixes to handle better the multi thread per gpu explicitly report than quark is not compatible with SM 2.1 (compact shuffle) 9 years ago			`ptarget[7] = 0xf;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
			`// init`
			`if(!init[thr_id])`
			`{`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`cudaSetDevice(device_map[thr_id]);`
Show intensity on init for all algos 8 years ago			`if (opt_cudaschedule == -1 && gpu_threads == 1) {`
			`cudaDeviceReset();`
			`// reduce cpu usage`
			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
			`CUDA_LOG_ERROR();`
			`}`
			`gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);`
Various algos cleanup + lyra2 sec nonce fix 10 years ago
api: report throughput when default 10 years ago			`fugue256_cpu_init(thr_id, throughput);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`init[thr_id] = true;`
			`}`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`// Endian`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`for (int kk=0; kk < 20; kk++)`
			`be32enc(&endiandata[kk], pdata[kk]);`

			`fugue256_cpu_setBlock(thr_id, endiandata, (void*)ptarget);`

			`do {`
			`// GPU`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`uint32_t foundNounce = UINT32_MAX;`
api: report throughput when default 10 years ago			`fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - start_nonce + throughput;`

			`if (foundNounce < UINT32_MAX && bench_algo < 0)`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t vhash[8];`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`sph_fugue256_context ctx_fugue;`
start v1.7, apply new prototypes to all algos 9 years ago			`endiandata[19] = SWAP32(foundNounce);`

Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`sph_fugue256_init(&ctx_fugue);`
			`sph_fugue256 (&ctx_fugue, endiandata, 80);`
start v1.7, apply new prototypes to all algos 9 years ago			`sph_fugue256_close(&ctx_fugue, &vhash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget))`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
diff: use the new function in all algos 9 years ago			`work_set_target_ratio(work, vhash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`pdata[19] = foundNounce;`
			`return 1;`
			`} else {`
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`}`
			`}`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if ((uint64_t) throughput + pdata[19] >= max_nonce) {`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`pdata[19] = max_nonce;`
Add intensity to last algos and fix quark speed 10 years ago			`break;`
			`}`

api: report throughput when default 10 years ago			`pdata[19] += throughput;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
Add intensity to last algos and fix quark speed 10 years ago			`} while (!work_restart[thr_id].restart);`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`*hashes_done = pdata[19] - start_nonce;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`return 0;`
			`}`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`// cleanup`
			`void free_fugue256(int thr_id)`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`if (!init[thr_id])`
			`return;`
min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`cudaThreadSynchronize();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`fugue256_cpu_free(thr_id);`

			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`}`