61ff92b5b4
fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs...
264 lines
8.3 KiB
Plaintext
264 lines
8.3 KiB
Plaintext
/*
|
|
* X15 algorithm (CHC, BBC, X15C)
|
|
* Added in ccminer by Tanguy Pruvot - 2014
|
|
*/
|
|
|
|
extern "C" {
|
|
#include "sph/sph_blake.h"
|
|
#include "sph/sph_bmw.h"
|
|
#include "sph/sph_groestl.h"
|
|
#include "sph/sph_skein.h"
|
|
#include "sph/sph_jh.h"
|
|
#include "sph/sph_keccak.h"
|
|
|
|
#include "sph/sph_luffa.h"
|
|
#include "sph/sph_cubehash.h"
|
|
#include "sph/sph_shavite.h"
|
|
#include "sph/sph_simd.h"
|
|
#include "sph/sph_echo.h"
|
|
|
|
#include "sph/sph_hamsi.h"
|
|
#include "sph/sph_fugue.h"
|
|
#include "sph/sph_shabal.h"
|
|
#include "sph/sph_whirlpool.h"
|
|
}
|
|
|
|
#include "miner.h"
|
|
|
|
#include "cuda_helper.h"
|
|
#include "x11/cuda_x11.h"
|
|
|
|
// Memory for the hash functions
|
|
static uint32_t *d_hash[MAX_GPUS] = { 0 };
|
|
|
|
extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads);
|
|
extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
|
|
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
|
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
extern void x13_fugue512_cpu_free(int thr_id);
|
|
|
|
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
|
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
|
|
extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode);
|
|
extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
extern void x15_whirlpool_cpu_free(int thr_id);
|
|
|
|
|
|
// X15 CPU Hash function
|
|
extern "C" void x15hash(void *output, const void *input)
|
|
{
|
|
sph_blake512_context ctx_blake;
|
|
sph_bmw512_context ctx_bmw;
|
|
sph_groestl512_context ctx_groestl;
|
|
sph_jh512_context ctx_jh;
|
|
sph_keccak512_context ctx_keccak;
|
|
sph_skein512_context ctx_skein;
|
|
sph_luffa512_context ctx_luffa;
|
|
sph_cubehash512_context ctx_cubehash;
|
|
sph_shavite512_context ctx_shavite;
|
|
sph_simd512_context ctx_simd;
|
|
sph_echo512_context ctx_echo;
|
|
sph_hamsi512_context ctx_hamsi;
|
|
sph_fugue512_context ctx_fugue;
|
|
sph_shabal512_context ctx_shabal;
|
|
sph_whirlpool_context ctx_whirlpool;
|
|
|
|
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
|
#define hashB hash+64
|
|
|
|
memset(hash, 0, sizeof hash);
|
|
|
|
sph_blake512_init(&ctx_blake);
|
|
sph_blake512(&ctx_blake, input, 80);
|
|
sph_blake512_close(&ctx_blake, hash);
|
|
|
|
sph_bmw512_init(&ctx_bmw);
|
|
sph_bmw512(&ctx_bmw, hash, 64);
|
|
sph_bmw512_close(&ctx_bmw, hashB);
|
|
|
|
sph_groestl512_init(&ctx_groestl);
|
|
sph_groestl512(&ctx_groestl, hashB, 64);
|
|
sph_groestl512_close(&ctx_groestl, hash);
|
|
|
|
sph_skein512_init(&ctx_skein);
|
|
sph_skein512(&ctx_skein, hash, 64);
|
|
sph_skein512_close(&ctx_skein, hashB);
|
|
|
|
sph_jh512_init(&ctx_jh);
|
|
sph_jh512(&ctx_jh, hashB, 64);
|
|
sph_jh512_close(&ctx_jh, hash);
|
|
|
|
sph_keccak512_init(&ctx_keccak);
|
|
sph_keccak512(&ctx_keccak, hash, 64);
|
|
sph_keccak512_close(&ctx_keccak, hashB);
|
|
|
|
sph_luffa512_init(&ctx_luffa);
|
|
sph_luffa512(&ctx_luffa, hashB, 64);
|
|
sph_luffa512_close(&ctx_luffa, hash);
|
|
|
|
sph_cubehash512_init(&ctx_cubehash);
|
|
sph_cubehash512(&ctx_cubehash, hash, 64);
|
|
sph_cubehash512_close(&ctx_cubehash, hashB);
|
|
|
|
sph_shavite512_init(&ctx_shavite);
|
|
sph_shavite512(&ctx_shavite, hashB, 64);
|
|
sph_shavite512_close(&ctx_shavite, hash);
|
|
|
|
sph_simd512_init(&ctx_simd);
|
|
sph_simd512(&ctx_simd, hash, 64);
|
|
sph_simd512_close(&ctx_simd, hashB);
|
|
|
|
sph_echo512_init(&ctx_echo);
|
|
sph_echo512(&ctx_echo, hashB, 64);
|
|
sph_echo512_close(&ctx_echo, hash);
|
|
|
|
sph_hamsi512_init(&ctx_hamsi);
|
|
sph_hamsi512(&ctx_hamsi, hash, 64);
|
|
sph_hamsi512_close(&ctx_hamsi, hashB);
|
|
|
|
sph_fugue512_init(&ctx_fugue);
|
|
sph_fugue512(&ctx_fugue, hashB, 64);
|
|
sph_fugue512_close(&ctx_fugue, hash);
|
|
|
|
sph_shabal512_init(&ctx_shabal);
|
|
sph_shabal512(&ctx_shabal, hash, 64);
|
|
sph_shabal512_close(&ctx_shabal, hashB);
|
|
|
|
sph_whirlpool_init(&ctx_whirlpool);
|
|
sph_whirlpool(&ctx_whirlpool, hashB, 64);
|
|
sph_whirlpool_close(&ctx_whirlpool, hash);
|
|
|
|
memcpy(output, hash, 32);
|
|
}
|
|
|
|
static bool init[MAX_GPUS] = { 0 };
|
|
|
|
extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
|
{
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
uint32_t endiandata[20];
|
|
|
|
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8;
|
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
|
|
|
if (opt_benchmark)
|
|
ptarget[7] = 0x00FF;
|
|
|
|
if (!init[thr_id])
|
|
{
|
|
cudaSetDevice(device_map[thr_id]);
|
|
|
|
quark_blake512_cpu_init(thr_id, throughput);
|
|
quark_groestl512_cpu_init(thr_id, throughput);
|
|
quark_skein512_cpu_init(thr_id, throughput);
|
|
quark_bmw512_cpu_init(thr_id, throughput);
|
|
quark_keccak512_cpu_init(thr_id, throughput);
|
|
quark_jh512_cpu_init(thr_id, throughput);
|
|
x11_luffaCubehash512_cpu_init(thr_id, throughput);
|
|
x11_shavite512_cpu_init(thr_id, throughput);
|
|
x11_simd512_cpu_init(thr_id, throughput);
|
|
x11_echo512_cpu_init(thr_id, throughput);
|
|
x13_hamsi512_cpu_init(thr_id, throughput);
|
|
x13_fugue512_cpu_init(thr_id, throughput);
|
|
x14_shabal512_cpu_init(thr_id, throughput);
|
|
x15_whirlpool_cpu_init(thr_id, throughput, 0);
|
|
|
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
|
|
|
cuda_check_cpu_init(thr_id, throughput);
|
|
init[thr_id] = true;
|
|
}
|
|
|
|
for (int k=0; k < 20; k++)
|
|
be32enc(&endiandata[k], pdata[k]);
|
|
|
|
quark_blake512_cpu_setBlock_80(thr_id, endiandata);
|
|
cuda_check_cpu_setTarget(ptarget);
|
|
|
|
do {
|
|
int order = 0;
|
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
|
|
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);
|
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
|
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
|
|
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
|
if (foundNonce != UINT32_MAX)
|
|
{
|
|
const uint32_t Htarg = ptarget[7];
|
|
uint32_t vhash64[8];
|
|
/* check now with the CPU to confirm */
|
|
be32enc(&endiandata[19], foundNonce);
|
|
x15hash(vhash64, endiandata);
|
|
|
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
|
int res = 1;
|
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
|
work_set_target_ratio(work, vhash64);
|
|
if (secNonce != 0) {
|
|
be32enc(&endiandata[19], secNonce);
|
|
x15hash(vhash64, endiandata);
|
|
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
|
|
work_set_target_ratio(work, vhash64);
|
|
pdata[21] = secNonce;
|
|
res++;
|
|
}
|
|
pdata[19] = foundNonce;
|
|
return res;
|
|
} else {
|
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
|
}
|
|
}
|
|
|
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
|
pdata[19] = max_nonce;
|
|
break;
|
|
}
|
|
|
|
pdata[19] += throughput;
|
|
|
|
} while (!work_restart[thr_id].restart);
|
|
|
|
*hashes_done = pdata[19] - first_nonce;
|
|
|
|
return 0;
|
|
}
|
|
|
|
// cleanup
|
|
extern "C" void free_x15(int thr_id)
|
|
{
|
|
if (!init[thr_id])
|
|
return;
|
|
|
|
cudaThreadSynchronize();
|
|
|
|
cudaFree(d_hash[thr_id]);
|
|
|
|
quark_blake512_cpu_free(thr_id);
|
|
quark_groestl512_cpu_free(thr_id);
|
|
x11_simd512_cpu_free(thr_id);
|
|
x13_fugue512_cpu_free(thr_id);
|
|
x15_whirlpool_cpu_free(thr_id);
|
|
|
|
cuda_check_cpu_free(thr_id);
|
|
|
|
cudaDeviceSynchronize();
|
|
init[thr_id] = false;
|
|
}
|