ccminer/groestlcoin.cpp

#include <string.h>
#include <stdint.h>
#include <cuda_runtime.h>
#include <openssl/sha.h>

#include "sph/sph_groestl.h"
#include "cuda_groestlcoin.h"

#include "miner.h"

// CPU hash
void groestlhash(void *state, const void *input)
{
	uint32_t _ALIGN(64) hash[16];
	sph_groestl512_context ctx_groestl;

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512(&ctx_groestl, input, 80);
	sph_groestl512_close(&ctx_groestl, hash);

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512(&ctx_groestl, hash, 64);
	sph_groestl512_close(&ctx_groestl, hash);

	memcpy(state, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(64) endiandata[32];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	uint32_t start_nonce = pdata[19];
	uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8
	if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);

	uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput);

	if (opt_benchmark)
		ptarget[7] = 0x001f;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		if (opt_cudaschedule == -1 && gpu_threads == 1) {
			cudaDeviceReset();
			// reduce cpu usage
			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
			CUDA_LOG_ERROR();
		}
		CUDA_LOG_ERROR();
		groestlcoin_cpu_init(thr_id, throughput);
		init[thr_id] = true;
	}

	for (int k=0; k < 20; k++)
		be32enc(&endiandata[k], pdata[k]);

	groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);

	do {
		uint32_t foundNounce = UINT32_MAX;

		*hashes_done = pdata[19] - start_nonce + throughput;

		// GPU hash
		groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);

		if (foundNounce < UINT32_MAX && bench_algo < 0)
		{
			uint32_t _ALIGN(64) vhash[8];
			endiandata[19] = swab32(foundNounce);
			groestlhash(vhash, endiandata);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
				work_set_target_ratio(work, vhash);
				pdata[19] = foundNounce;
				free(outputHash);
				return true;
			} else {
				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
			}
		}

		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
			pdata[19] = max_nonce;
			break;
		}
		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - start_nonce;

	free(outputHash);
	return 0;
}

// cleanup
void free_groestlcoin(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaThreadSynchronize();

	groestlcoin_cpu_free(thr_id);
	init[thr_id] = false;

	cudaDeviceSynchronize();
}
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`#include <string.h>`
			`#include <stdint.h>`
Various algos cleanup + lyra2 sec nonce fix 10 years ago			`#include <cuda_runtime.h>`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`#include <openssl/sha.h>`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "sph/sph_groestl.h"`
Reduce keccak, deep & anime intensity + handle groestl -i param default intensity was the max supported by the card, and perf is not really better. I prefer to let it one under for cards with lower memory (1GB) 10 years ago			`#include "cuda_groestlcoin.h"`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "miner.h"`

myriad/groestl: some more cleanup + tabs... 10 years ago			`// CPU hash`
			`void groestlhash(void state, const void input)`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`{`
myriad/groestl: some more cleanup + tabs... 10 years ago			`uint32_t _ALIGN(64) hash[16];`
			`sph_groestl512_context ctx_groestl;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
myriad/groestl: some more cleanup + tabs... 10 years ago			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512(&ctx_groestl, input, 80);`
			`sph_groestl512_close(&ctx_groestl, hash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
myriad/groestl: some more cleanup + tabs... 10 years ago			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512(&ctx_groestl, hash, 64);`
			`sph_groestl512_close(&ctx_groestl, hash);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
myriad/groestl: some more cleanup + tabs... 10 years ago			`memcpy(state, hash, 32);`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 10 years ago			`static bool init[MAX_GPUS] = { 0 };`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`int scanhash_groestlcoin(int thr_id, struct work work, uint32_t max_nonce, unsigned long hashes_done)`
Allow different intensity per device and clean the old variables, no more required 10 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) endiandata[32];`
			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
myriad/groestl: some more cleanup + tabs... 10 years ago			`uint32_t start_nonce = pdata[19];`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 2562568`
			`if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);`
myriad/groestl: some more cleanup + tabs... 10 years ago
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`uint32_t outputHash = (uint32_t)malloc((size_t) 64* throughput);`
myriad/groestl: some more cleanup + tabs... 10 years ago
			`if (opt_benchmark)`
never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`ptarget[7] = 0x001f;`
myriad/groestl: some more cleanup + tabs... 10 years ago
			`if (!init[thr_id])`
			`{`
			`cudaSetDevice(device_map[thr_id]);`
1.7.1 release set schedule flags to reduce linux cpu usage without MyStreamSynchronize() 9 years ago			`if (opt_cudaschedule == -1 && gpu_threads == 1) {`
			`cudaDeviceReset();`
			`// reduce cpu usage`
			`cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);`
			`CUDA_LOG_ERROR();`
			`}`
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`CUDA_LOG_ERROR();`
myriad/groestl: some more cleanup + tabs... 10 years ago			`groestlcoin_cpu_init(thr_id, throughput);`
			`init[thr_id] = true;`
			`}`

			`for (int k=0; k < 20; k++)`
			`be32enc(&endiandata[k], pdata[k]);`

			`groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);`

			`do {`
			`uint32_t foundNounce = UINT32_MAX;`

			`*hashes_done = pdata[19] - start_nonce + throughput;`

			`// GPU hash`
			`groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if (foundNounce < UINT32_MAX && bench_algo < 0)`
myriad/groestl: some more cleanup + tabs... 10 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) vhash[8];`
myriad/groestl: some more cleanup + tabs... 10 years ago			`endiandata[19] = swab32(foundNounce);`
start v1.7, apply new prototypes to all algos 9 years ago			`groestlhash(vhash, endiandata);`
myriad/groestl: some more cleanup + tabs... 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {`
diff: use the new function in all algos 9 years ago			`work_set_target_ratio(work, vhash);`
myriad/groestl: some more cleanup + tabs... 10 years ago			`pdata[19] = foundNounce;`
			`free(outputHash);`
			`return true;`
			`} else {`
benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);`
myriad/groestl: some more cleanup + tabs... 10 years ago			`}`
			`}`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`if ((uint64_t) throughput + pdata[19] >= max_nonce) {`
myriad/groestl: some more cleanup + tabs... 10 years ago			`pdata[19] = max_nonce;`
			`break;`
			`}`
			`pdata[19] += throughput;`

never interrupt global benchmark with found nonces fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs... 9 years ago			`} while (!work_restart[thr_id].restart);`

			`*hashes_done = pdata[19] - start_nonce;`
myriad/groestl: some more cleanup + tabs... 10 years ago
			`free(outputHash);`
			`return 0;`
Revision 0.6 with myriad-groestl and jackpot coin 11 years ago			`}`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`// cleanup`
			`void free_groestlcoin(int thr_id)`
			`{`
			`if (!init[thr_id])`
			`return;`

benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) 9 years ago			`cudaThreadSynchronize();`
algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago
			`groestlcoin_cpu_free(thr_id);`
			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`}`