ccminer/quark/animecoin.cu

extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
}
#include "miner.h"
#include "cuda_helper.h"

static uint32_t *d_hash[MAX_GPUS];

// Speicher zur Generierung der Noncevektoren f<EFBFBD>r die bedingten Hashes
static uint32_t *d_branch1Nonces[MAX_GPUS];
static uint32_t *d_branch2Nonces[MAX_GPUS];
static uint32_t *d_branch3Nonces[MAX_GPUS];

extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads);
extern void quark_bmw512_cpu_setBlock_80(void *pdata);
extern void quark_bmw512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order);
extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);

extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads);
extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
											uint32_t *d_nonces1, size_t *nrm1,
											uint32_t *d_nonces2, size_t *nrm2,
											int order);
extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
											uint32_t *d_nonces1, size_t *nrm1,
											int order);

extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);

/* CPU Hash */
extern "C" void animehash(void *state, const void *input)
{
    sph_blake512_context ctx_blake;
    sph_bmw512_context ctx_bmw;
    sph_groestl512_context ctx_groestl;
    sph_jh512_context ctx_jh;
    sph_keccak512_context ctx_keccak;
    sph_skein512_context ctx_skein;
    
    unsigned char hash[64];

    sph_bmw512_init(&ctx_bmw);
    // ZBMW;
    sph_bmw512 (&ctx_bmw, (const void*) input, 80);
    sph_bmw512_close(&ctx_bmw, (void*) hash);

    sph_blake512_init(&ctx_blake);
    // ZBLAKE;
    sph_blake512 (&ctx_blake, hash, 64);
    sph_blake512_close(&ctx_blake, (void*) hash);
    
    if (hash[0] & 0x8)
    {
        sph_groestl512_init(&ctx_groestl);
        // ZGROESTL;
        sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
        sph_groestl512_close(&ctx_groestl, (void*) hash);
    }
    else
    {
        sph_skein512_init(&ctx_skein);
        // ZSKEIN;
        sph_skein512 (&ctx_skein, (const void*) hash, 64);
        sph_skein512_close(&ctx_skein, (void*) hash);
    }
    
    sph_groestl512_init(&ctx_groestl);
    // ZGROESTL;
    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
    sph_groestl512_close(&ctx_groestl, (void*) hash);

    sph_jh512_init(&ctx_jh);
    // ZJH;
    sph_jh512 (&ctx_jh, (const void*) hash, 64);
    sph_jh512_close(&ctx_jh, (void*) hash);

    if (hash[0] & 0x8)
    {
        sph_blake512_init(&ctx_blake);
        // ZBLAKE;
        sph_blake512 (&ctx_blake, (const void*) hash, 64);
        sph_blake512_close(&ctx_blake, (void*) hash);
    }
    else
    {
        sph_bmw512_init(&ctx_bmw);
        // ZBMW;
        sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
        sph_bmw512_close(&ctx_bmw, (void*) hash);
    }

    sph_keccak512_init(&ctx_keccak);
    // ZKECCAK;
    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
    sph_keccak512_close(&ctx_keccak, (void*) hash);

    sph_skein512_init(&ctx_skein);
    // SKEIN;
    sph_skein512 (&ctx_skein, (const void*) hash, 64);
    sph_skein512_close(&ctx_skein, (void*) hash);

    if (hash[0] & 0x8)
    {
        sph_keccak512_init(&ctx_keccak);
        // ZKECCAK;
        sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
        sph_keccak512_close(&ctx_keccak, (void*) hash);
    }
    else
    {
        sph_jh512_init(&ctx_jh);
        // ZJH;
        sph_jh512 (&ctx_jh, (const void*) hash, 64);
        sph_jh512_close(&ctx_jh, (void*) hash);
    }

    memcpy(state, hash, 32);
}

/*
struct HashPredicate
{    
    HashPredicate(uint32_t *hashes, uint32_t startNonce) :
        m_hashes(hashes),
        m_startNonce(startNonce)
        { }

    __device__
    bool operator()(const uint32_t x)
    {
        uint32_t *hash = &m_hashes[(x - m_startNonce)*16];
        return hash[0] & 0x8;
    }

    uint32_t *m_hashes;
    uint32_t  m_startNonce;
};
*/

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
{
	const uint32_t first_nonce = pdata[19];
	uint32_t throughput =  device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
	throughput = min(throughput, max_nonce - first_nonce);

	if (opt_benchmark)
		((uint32_t*)ptarget)[7] = 0x00000f;

	if (!init[thr_id])
	{
		cudaSetDevice(device_map[thr_id]);

		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));

		quark_blake512_cpu_init(thr_id, throughput);
		quark_groestl512_cpu_init(thr_id, throughput);
		quark_skein512_cpu_init(thr_id, throughput);
		quark_bmw512_cpu_init(thr_id, throughput);
		quark_keccak512_cpu_init(thr_id, throughput);
		quark_jh512_cpu_init(thr_id, throughput);
		cuda_check_cpu_init(thr_id, throughput);
		quark_compactTest_cpu_init(thr_id, throughput);

		CUDA_SAFE_CALL(cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput));
		CUDA_SAFE_CALL(cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput));
		CUDA_SAFE_CALL(cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput));

		init[thr_id] = true;
	}

	uint32_t endiandata[20];
	for (int k=0; k < 20; k++)
		be32enc(&endiandata[k], pdata[k]);

	quark_bmw512_cpu_setBlock_80((void*)endiandata);
	cuda_check_cpu_setTarget(ptarget);

	do {
		int order = 0;
		size_t nrm1=0, nrm2=0, nrm3=0;

		// erstes BMW512 Hash mit CUDA
		quark_bmw512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);

		// das ist der unbedingte Branch f<EFBFBD>r Blake512
		quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);

		quark_compactTest_single_false_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
				d_branch3Nonces[thr_id], &nrm3,
				order++);
		
		// nur den Skein Branch weiterverfolgen
		quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der unbedingte Branch f<EFBFBD>r Groestl512
		quark_groestl512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der unbedingte Branch f<EFBFBD>r JH512
		quark_jh512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);

		// quarkNonces in branch1 und branch2 aufsplitten gem<EFBFBD>ss if (hash[0] & 0x8)
		quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
			d_branch1Nonces[thr_id], &nrm1,
			d_branch2Nonces[thr_id], &nrm2,
			order++);

		// das ist der bedingte Branch f<EFBFBD>r Blake512
		quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der bedingte Branch f<EFBFBD>r Bmw512
		quark_bmw512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der unbedingte Branch f<EFBFBD>r Keccak512
		quark_keccak512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der unbedingte Branch f<EFBFBD>r Skein512
		quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);

		// quarkNonces in branch1 und branch2 aufsplitten gem<EFBFBD>ss if (hash[0] & 0x8)
		quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
			d_branch1Nonces[thr_id], &nrm1,
			d_branch2Nonces[thr_id], &nrm2,
			order++);

		// das ist der bedingte Branch f<EFBFBD>r Keccak512
		quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);

		// das ist der bedingte Branch f<EFBFBD>r JH512
		quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);

		// Scan nach Gewinner Hashes auf der GPU
		uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
		if (foundNonce != UINT32_MAX)
		{
			const uint32_t Htarg = ptarget[7];
			uint32_t vhash64[8];
			be32enc(&endiandata[19], foundNonce);
			animehash(vhash64, endiandata);

			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
				int res = 1;
				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
				*hashes_done = pdata[19] - first_nonce + throughput;
				if (secNonce != 0) {
					pdata[21] = secNonce;
					res++;
				}
				pdata[19] = foundNonce;
				return res;
			} else {
				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
			}
		}

		if ((uint64_t)pdata[19] + throughput > (uint64_t)max_nonce) {
			pdata[19] = max_nonce;
			break;
		}

		pdata[19] += throughput;

	} while (!work_restart[thr_id].restart);

	*hashes_done = pdata[19] - first_nonce + 1;
	return 0;
}
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								extern "C"
 								{
 								#include "sph/sph_blake.h"
 								#include "sph/sph_bmw.h"
 								#include "sph/sph_groestl.h"
 								#include "sph/sph_skein.h"
 								#include "sph/sph_jh.h"
 								#include "sph/sph_keccak.h"
-												Move common check_cpu functions to root

											
										
										
											10 years ago
+								}
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								#include "miner.h"
 								#include "cuda_helper.h"
-												Handle a maximum of 16 gpus (vs 8 before)

Some cards have 2 gpus on board...

											
										
										
											10 years ago
+								static uint32_t *d_hash[MAX_GPUS];
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
 								// Speicher zur Generierung der Noncevektoren f<EFBFBD>r die bedingten Hashes
-												Handle a maximum of 16 gpus (vs 8 before)

Some cards have 2 gpus on board...

											
										
										
											10 years ago
+								static uint32_t *d_branch1Nonces[MAX_GPUS];
 								static uint32_t *d_branch2Nonces[MAX_GPUS];
 								static uint32_t *d_branch3Nonces[MAX_GPUS];
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								extern void quark_bmw512_cpu_setBlock_80(void *pdata);
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_bmw512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order);
 								extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 								extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
 								extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+																			uint32_t *d_nonces1, size_t *nrm1,
 																			uint32_t *d_nonces2, size_t *nrm2,
 																			int order);
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+																			uint32_t *d_nonces1, size_t *nrm1,
 																			int order);
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+								extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-												checkhash: simplify the common function

use klaus trivial function, the old code has always been a bit weird..

split cuda_check_cpu_hash_64 in two functions, keep old for branched stuff

											
										
										
											10 years ago
 								/* CPU Hash */
-												cpu-miner: sort algos by name, show reject reason

											
										
										
											10 years ago
+								extern "C" void animehash(void *state, const void *input)
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								{
 								    sph_blake512_context ctx_blake;
 								    sph_bmw512_context ctx_bmw;
 								    sph_groestl512_context ctx_groestl;
 								    sph_jh512_context ctx_jh;
 								    sph_keccak512_context ctx_keccak;
 								    sph_skein512_context ctx_skein;
 								    unsigned char hash[64];
 								    sph_bmw512_init(&ctx_bmw);
 								    // ZBMW;
 								    sph_bmw512 (&ctx_bmw, (const void*) input, 80);
 								    sph_bmw512_close(&ctx_bmw, (void*) hash);
 								    sph_blake512_init(&ctx_blake);
 								    // ZBLAKE;
 								    sph_blake512 (&ctx_blake, hash, 64);
 								    sph_blake512_close(&ctx_blake, (void*) hash);
 								    if (hash[0] & 0x8)
 								    {
 								        sph_groestl512_init(&ctx_groestl);
 								        // ZGROESTL;
 								        sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
 								        sph_groestl512_close(&ctx_groestl, (void*) hash);
 								    }
 								    else
 								    {
 								        sph_skein512_init(&ctx_skein);
 								        // ZSKEIN;
 								        sph_skein512 (&ctx_skein, (const void*) hash, 64);
 								        sph_skein512_close(&ctx_skein, (void*) hash);
 								    }
 								    sph_groestl512_init(&ctx_groestl);
 								    // ZGROESTL;
 								    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
 								    sph_groestl512_close(&ctx_groestl, (void*) hash);
 								    sph_jh512_init(&ctx_jh);
 								    // ZJH;
 								    sph_jh512 (&ctx_jh, (const void*) hash, 64);
 								    sph_jh512_close(&ctx_jh, (void*) hash);
 								    if (hash[0] & 0x8)
 								    {
 								        sph_blake512_init(&ctx_blake);
 								        // ZBLAKE;
 								        sph_blake512 (&ctx_blake, (const void*) hash, 64);
 								        sph_blake512_close(&ctx_blake, (void*) hash);
 								    }
 								    else
 								    {
 								        sph_bmw512_init(&ctx_bmw);
 								        // ZBMW;
 								        sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
 								        sph_bmw512_close(&ctx_bmw, (void*) hash);
 								    }
 								    sph_keccak512_init(&ctx_keccak);
 								    // ZKECCAK;
 								    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
 								    sph_keccak512_close(&ctx_keccak, (void*) hash);
 								    sph_skein512_init(&ctx_skein);
 								    // SKEIN;
 								    sph_skein512 (&ctx_skein, (const void*) hash, 64);
 								    sph_skein512_close(&ctx_skein, (void*) hash);
 								    if (hash[0] & 0x8)
 								    {
 								        sph_keccak512_init(&ctx_keccak);
 								        // ZKECCAK;
 								        sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
 								        sph_keccak512_close(&ctx_keccak, (void*) hash);
 								    }
 								    else
 								    {
 								        sph_jh512_init(&ctx_jh);
 								        // ZJH;
 								        sph_jh512 (&ctx_jh, (const void*) hash, 64);
 								        sph_jh512_close(&ctx_jh, (void*) hash);
 								    }
 								    memcpy(state, hash, 32);
 								}
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+								/*
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								struct HashPredicate
 								{
 								    HashPredicate(uint32_t *hashes, uint32_t startNonce) :
 								        m_hashes(hashes),
 								        m_startNonce(startNonce)
 								        { }
 								    __device__
 								    bool operator()(const uint32_t x)
 								    {
 								        uint32_t *hash = &m_hashes[(x - m_startNonce)*16];
 								        return hash[0] & 0x8;
 								    }
 								    uint32_t *m_hashes;
 								    uint32_t  m_startNonce;
 								};
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+								*/
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												Handle a maximum of 16 gpus (vs 8 before)

Some cards have 2 gpus on board...

											
										
										
											10 years ago
+								static bool init[MAX_GPUS] = { 0 };
-												various small changes

heavy: reduce by 256 threads default intensity to all -i 20
cuda: put static thread init bools outside the code (made once)
api: fix nvml header to build without

											
										
										
											10 years ago
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+								extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
 								    const uint32_t *ptarget, uint32_t max_nonce,
 								    unsigned long *hashes_done)
 								{
 									const uint32_t first_nonce = pdata[19];
-												cleanup: use unsigned throughput parameters

Yes, its a big commit, was waiting 1.6 to do that...
Sorry for your possible merge issues ;)

											
										
										
											10 years ago
+									uint32_t throughput =  device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
 									throughput = min(throughput, max_nonce - first_nonce);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
 									if (opt_benchmark)
 										((uint32_t*)ptarget)[7] = 0x00000f;
 									if (!init[thr_id])
 									{
 										cudaSetDevice(device_map[thr_id]);
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+										CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
 										quark_blake512_cpu_init(thr_id, throughput);
 										quark_groestl512_cpu_init(thr_id, throughput);
 										quark_skein512_cpu_init(thr_id, throughput);
 										quark_bmw512_cpu_init(thr_id, throughput);
 										quark_keccak512_cpu_init(thr_id, throughput);
 										quark_jh512_cpu_init(thr_id, throughput);
 										cuda_check_cpu_init(thr_id, throughput);
 										quark_compactTest_cpu_init(thr_id, throughput);
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+										CUDA_SAFE_CALL(cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput));
 										CUDA_SAFE_CALL(cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput));
 										CUDA_SAFE_CALL(cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput));
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
 										init[thr_id] = true;
 									}
 									uint32_t endiandata[20];
 									for (int k=0; k < 20; k++)
-												remove uint32_t cast

											
										
										
											10 years ago
+										be32enc(&endiandata[k], pdata[k]);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
 									quark_bmw512_cpu_setBlock_80((void*)endiandata);
 									cuda_check_cpu_setTarget(ptarget);
 									do {
 										int order = 0;
 										size_t nrm1=0, nrm2=0, nrm3=0;
 										// erstes BMW512 Hash mit CUDA
 										quark_bmw512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
 										// das ist der unbedingte Branch f<EFBFBD>r Blake512
 										quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 										quark_compactTest_single_false_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
 												d_branch3Nonces[thr_id], &nrm3,
 												order++);
 										// nur den Skein Branch weiterverfolgen
 										quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der unbedingte Branch f<EFBFBD>r Groestl512
 										quark_groestl512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der unbedingte Branch f<EFBFBD>r JH512
 										quark_jh512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
 										// quarkNonces in branch1 und branch2 aufsplitten gem<EFBFBD>ss if (hash[0] & 0x8)
 										quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 											d_branch1Nonces[thr_id], &nrm1,
 											d_branch2Nonces[thr_id], &nrm2,
 											order++);
 										// das ist der bedingte Branch f<EFBFBD>r Blake512
 										quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der bedingte Branch f<EFBFBD>r Bmw512
 										quark_bmw512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der unbedingte Branch f<EFBFBD>r Keccak512
 										quark_keccak512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der unbedingte Branch f<EFBFBD>r Skein512
 										quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
 										// quarkNonces in branch1 und branch2 aufsplitten gem<EFBFBD>ss if (hash[0] & 0x8)
 										quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
 											d_branch1Nonces[thr_id], &nrm1,
 											d_branch2Nonces[thr_id], &nrm2,
 											order++);
 										// das ist der bedingte Branch f<EFBFBD>r Keccak512
 										quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
 										// das ist der bedingte Branch f<EFBFBD>r JH512
 										quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
 										// Scan nach Gewinner Hashes auf der GPU
-												checkhash: simplify the common function

use klaus trivial function, the old code has always been a bit weird..

split cuda_check_cpu_hash_64 in two functions, keep old for branched stuff

											
										
										
											10 years ago
+										uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
-												Check and submit multiple nonces in one loop

Added to most algos, checkhash function scans a big range
and can find multiple nonces at once if the difficulty is low.

Stop ignoring them, submit second one if found...

Clean the draft code for rc=2 implemented for blake and pentablake

btw... fix the reduced displayed hashrate when a nonce is found...

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+										if (foundNonce != UINT32_MAX)
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+										{
-												intensity: sign warnings fixes min(i,u)

											
										
										
											10 years ago
+											const uint32_t Htarg = ptarget[7];
-												Check and submit multiple nonces in one loop

Added to most algos, checkhash function scans a big range
and can find multiple nonces at once if the difficulty is low.

Stop ignoring them, submit second one if found...

Clean the draft code for rc=2 implemented for blake and pentablake

btw... fix the reduced displayed hashrate when a nonce is found...

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+											uint32_t vhash64[8];
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+											be32enc(&endiandata[19], foundNonce);
 											animehash(vhash64, endiandata);
-												Check and submit multiple nonces in one loop

Added to most algos, checkhash function scans a big range
and can find multiple nonces at once if the difficulty is low.

Stop ignoring them, submit second one if found...

Clean the draft code for rc=2 implemented for blake and pentablake

btw... fix the reduced displayed hashrate when a nonce is found...

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+											if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
 												int res = 1;
 												uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
 												*hashes_done = pdata[19] - first_nonce + throughput;
 												if (secNonce != 0) {
 													pdata[21] = secNonce;
 													res++;
 												}
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+												pdata[19] = foundNonce;
-												Check and submit multiple nonces in one loop

Added to most algos, checkhash function scans a big range
and can find multiple nonces at once if the difficulty is low.

Stop ignoring them, submit second one if found...

Clean the draft code for rc=2 implemented for blake and pentablake

btw... fix the reduced displayed hashrate when a nonce is found...

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>

											
										
										
											10 years ago
+												return res;
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+											} else {
-												warnings: use the right device id (device_map[thr_id])

											
										
										
											10 years ago
+												applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+											}
 										}
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+										if ((uint64_t)pdata[19] + throughput > (uint64_t)max_nonce) {
 											pdata[19] = max_nonce;
 											break;
 										}
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+										pdata[19] += throughput;
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+									} while (!work_restart[thr_id].restart);
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
-												enhance solo mining, update http headers

and prepare next version...

											
										
										
											10 years ago
+									*hashes_done = pdata[19] - first_nonce + 1;
-												Remove duplicated defines present in cuda_helper.h

also add cudaDeviceReset() on Ctrl+C for nvprof

											
										
										
											10 years ago
+									return 0;
 								}