ccminer/groestlcoin.cpp

#include <string.h>
#include <stdint.h>
#include <cuda_runtime.h>
#include <openssl/sha.h>

#include "sph/sph_groestl.h"
#include "cuda_groestlcoin.h"

#include "miner.h"

// CPU hash
void groestlhash(void *state, const void *input)
{
	uint32_t _ALIGN(64) hash[16];
	sph_groestl512_context ctx_groestl;

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512(&ctx_groestl, input, 80);
	sph_groestl512_close(&ctx_groestl, hash);

	sph_groestl512_init(&ctx_groestl);
	sph_groestl512(&ctx_groestl, hash, 64);
	sph_groestl512_close(&ctx_groestl, hash);

	memcpy(state, hash, 32);
}

static bool init[MAX_GPUS] = { 0 };

int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
{
	uint32_t _ALIGN(64) endiandata[32];
	uint32_t *pdata = work->data;
	uint32_t *ptarget = work->target;
	uint32_t start_nonce = pdata[19];
	uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8
	if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);

	uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x000000ff;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);
		groestlcoin_cpu_init(thr_id, throughput);
		init[thr_id] = true;
	}

	for (int k=0; k < 20; k++)
		be32enc(&endiandata[k], pdata[k]);

	groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);

	do {
		uint32_t foundNounce = UINT32_MAX;

		*hashes_done = pdata[19] - start_nonce + throughput;

		// GPU hash
		groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);

		if (foundNounce < UINT32_MAX)
		{
			uint32_t _ALIGN(64) vhash[8];
			endiandata[19] = swab32(foundNounce);
			groestlhash(vhash, endiandata);

			if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
				work_set_target_ratio(work, vhash);
				pdata[19] = foundNounce;
				free(outputHash);
				return true;
			} else {
				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!",
					device_map[thr_id], foundNounce);
			}
		}

		if ((uint64_t) pdata[19] + throughput > max_nonce) {
			*hashes_done = pdata[19] - start_nonce + 1;
			pdata[19] = max_nonce;
			break;
		}
		pdata[19] += throughput;

	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);

	free(outputHash);
	return 0;
}

// cleanup
void free_groestlcoin(int thr_id)
{
	if (!init[thr_id])
		return;

	cudaSetDevice(device_map[thr_id]);

	groestlcoin_cpu_free(thr_id);
	init[thr_id] = false;

	cudaDeviceSynchronize();
}
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago			`#include <string.h>`
			`#include <stdint.h>`
Various algos cleanup + lyra2 sec nonce fix 9 years ago			`#include <cuda_runtime.h>`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago			`#include <openssl/sha.h>`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "sph/sph_groestl.h"`
Reduce keccak, deep & anime intensity + handle groestl -i param default intensity was the max supported by the card, and perf is not really better. I prefer to let it one under for cards with lower memory (1GB) 10 years ago			`#include "cuda_groestlcoin.h"`

min() and max(a,b) are not defined on linux, in fact max exists in jansson includes (in tree only) Add them to miner.h 10 years ago			`#include "miner.h"`

myriad/groestl: some more cleanup + tabs... 9 years ago			`// CPU hash`
			`void groestlhash(void state, const void input)`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago			`{`
myriad/groestl: some more cleanup + tabs... 9 years ago			`uint32_t _ALIGN(64) hash[16];`
			`sph_groestl512_context ctx_groestl;`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago
myriad/groestl: some more cleanup + tabs... 9 years ago			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512(&ctx_groestl, input, 80);`
			`sph_groestl512_close(&ctx_groestl, hash);`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago
myriad/groestl: some more cleanup + tabs... 9 years ago			`sph_groestl512_init(&ctx_groestl);`
			`sph_groestl512(&ctx_groestl, hash, 64);`
			`sph_groestl512_close(&ctx_groestl, hash);`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago
myriad/groestl: some more cleanup + tabs... 9 years ago			`memcpy(state, hash, 32);`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago			`}`

Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... 9 years ago			`static bool init[MAX_GPUS] = { 0 };`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`int scanhash_groestlcoin(int thr_id, struct work work, uint32_t max_nonce, unsigned long hashes_done)`
Allow different intensity per device and clean the old variables, no more required 9 years ago			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) endiandata[32];`
			`uint32_t *pdata = work->data;`
			`uint32_t *ptarget = work->target;`
myriad/groestl: some more cleanup + tabs... 9 years ago			`uint32_t start_nonce = pdata[19];`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 2562568`
			`if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);`
myriad/groestl: some more cleanup + tabs... 9 years ago
			`uint32_t outputHash = (uint32_t)malloc(throughput * 64);`

			`if (opt_benchmark)`
			`((uint32_t*)ptarget)[7] = 0x000000ff;`

			`if (!init[thr_id])`
			`{`
			`cudaSetDevice(device_map[thr_id]);`
			`groestlcoin_cpu_init(thr_id, throughput);`
			`init[thr_id] = true;`
			`}`

			`for (int k=0; k < 20; k++)`
			`be32enc(&endiandata[k], pdata[k]);`

			`groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);`

			`do {`
			`uint32_t foundNounce = UINT32_MAX;`

			`*hashes_done = pdata[19] - start_nonce + throughput;`

			`// GPU hash`
			`groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);`

			`if (foundNounce < UINT32_MAX)`
			`{`
start v1.7, apply new prototypes to all algos 9 years ago			`uint32_t _ALIGN(64) vhash[8];`
myriad/groestl: some more cleanup + tabs... 9 years ago			`endiandata[19] = swab32(foundNounce);`
start v1.7, apply new prototypes to all algos 9 years ago			`groestlhash(vhash, endiandata);`
myriad/groestl: some more cleanup + tabs... 9 years ago
start v1.7, apply new prototypes to all algos 9 years ago			`if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {`
diff: use the new function in all algos 9 years ago			`work_set_target_ratio(work, vhash);`
myriad/groestl: some more cleanup + tabs... 9 years ago			`pdata[19] = foundNounce;`
			`free(outputHash);`
			`return true;`
			`} else {`
			`applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!",`
			`device_map[thr_id], foundNounce);`
			`}`
			`}`

			`if ((uint64_t) pdata[19] + throughput > max_nonce) {`
rename skein2 to c++, no cuda kernel code and some other changes... 9 years ago			`*hashes_done = pdata[19] - start_nonce + 1;`
myriad/groestl: some more cleanup + tabs... 9 years ago			`pdata[19] = max_nonce;`
			`break;`
			`}`
			`pdata[19] += throughput;`

			`} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);`

			`free(outputHash);`
			`return 0;`
Revision 0.6 with myriad-groestl and jackpot coin 10 years ago			`}`

algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... 9 years ago			`// cleanup`
			`void free_groestlcoin(int thr_id)`
			`{`
			`if (!init[thr_id])`
			`return;`

			`cudaSetDevice(device_map[thr_id]);`

			`groestlcoin_cpu_free(thr_id);`
			`init[thr_id] = false;`

			`cudaDeviceSynchronize();`
intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput 9 years ago			`}`