9eead77027
This will allow later more gpu candidates. Note: This is an unfinished work, we keep the previous behavior for now To finish this, all algos solutions should be migrated and submitted nonces attributes stored. Its required to handle the different share diff per nonce and fix the possible solved count error (if 1/2 nonces is solved).
414 lines
10 KiB
Plaintext
414 lines
10 KiB
Plaintext
/**
|
|
* X11EVO algo implementation
|
|
* Cuda implementation by tpruvot@github - May 2016
|
|
*/
|
|
#include <stdio.h>
|
|
#include <memory.h>
|
|
|
|
extern "C" {
|
|
#include "sph/sph_blake.h"
|
|
#include "sph/sph_bmw.h"
|
|
#include "sph/sph_groestl.h"
|
|
#include "sph/sph_skein.h"
|
|
#include "sph/sph_jh.h"
|
|
#include "sph/sph_keccak.h"
|
|
#include "sph/sph_luffa.h"
|
|
#include "sph/sph_cubehash.h"
|
|
#include "sph/sph_shavite.h"
|
|
#include "sph/sph_simd.h"
|
|
#include "sph/sph_echo.h"
|
|
}
|
|
|
|
#include "miner.h"
|
|
#include "cuda_helper.h"
|
|
#include "cuda_x11.h"
|
|
|
|
static uint32_t *d_hash[MAX_GPUS];
|
|
|
|
enum Algo {
|
|
BLAKE = 0,
|
|
BMW,
|
|
GROESTL,
|
|
SKEIN,
|
|
JH,
|
|
KECCAK,
|
|
LUFFA,
|
|
CUBEHASH,
|
|
SHAVITE,
|
|
SIMD,
|
|
ECHO,
|
|
HASH_FUNC_COUNT
|
|
};
|
|
|
|
static void swap8(uint8_t *a, uint8_t *b)
|
|
{
|
|
uint8_t t = *a;
|
|
*a = *b;
|
|
*b = t;
|
|
}
|
|
|
|
static void initPerm(uint8_t n[], int count)
|
|
{
|
|
for (int i = 0; i < count; i++)
|
|
n[i] = i;
|
|
}
|
|
|
|
static int nextPerm(uint8_t n[], int count)
|
|
{
|
|
int tail, i, j;
|
|
|
|
if (count <= 1)
|
|
return 0;
|
|
|
|
for (i = count - 1; i>0 && n[i - 1] >= n[i]; i--);
|
|
tail = i;
|
|
|
|
if (tail > 0) {
|
|
for (j = count - 1; j>tail && n[j] <= n[tail - 1]; j--);
|
|
swap8(&n[tail - 1], &n[j]);
|
|
}
|
|
|
|
for (i = tail, j = count - 1; i<j; i++, j--)
|
|
swap8(&n[i], &n[j]);
|
|
|
|
return (tail != 0);
|
|
}
|
|
|
|
static void getAlgoString(char *str, int seq)
|
|
{
|
|
uint8_t algoList[HASH_FUNC_COUNT];
|
|
char *sptr;
|
|
|
|
initPerm(algoList, HASH_FUNC_COUNT);
|
|
|
|
for (int k = 0; k < seq; k++) {
|
|
nextPerm(algoList, HASH_FUNC_COUNT);
|
|
}
|
|
|
|
sptr = str;
|
|
for (int j = 0; j < HASH_FUNC_COUNT; j++) {
|
|
if (algoList[j] >= 10)
|
|
sprintf(sptr, "%c", 'A' + (algoList[j] - 10));
|
|
else
|
|
sprintf(sptr, "%u", (uint32_t) algoList[j]);
|
|
sptr++;
|
|
}
|
|
*sptr = '\0';
|
|
//applog(LOG_DEBUG, "nextPerm %s", str);
|
|
}
|
|
|
|
static __thread uint32_t s_ntime = 0;
|
|
static char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
|
|
static int s_sequence = -1;
|
|
|
|
#define INITIAL_DATE 0x57254700
|
|
static inline int getCurrentAlgoSeq(uint32_t current_time)
|
|
{
|
|
// change once per day
|
|
return (int) (current_time - INITIAL_DATE) / (60 * 60 * 24);
|
|
}
|
|
|
|
static void evo_twisted_code(uint32_t ntime, char *permstr)
|
|
{
|
|
int seq = getCurrentAlgoSeq(ntime);
|
|
if (s_sequence != seq) {
|
|
getAlgoString(permstr, seq);
|
|
s_sequence = seq;
|
|
}
|
|
}
|
|
|
|
// X11evo CPU Hash
|
|
extern "C" void x11evo_hash(void *output, const void *input)
|
|
{
|
|
uint32_t _ALIGN(64) hash[64/4] = { 0 };
|
|
|
|
sph_blake512_context ctx_blake;
|
|
sph_bmw512_context ctx_bmw;
|
|
sph_groestl512_context ctx_groestl;
|
|
sph_skein512_context ctx_skein;
|
|
sph_jh512_context ctx_jh;
|
|
sph_keccak512_context ctx_keccak;
|
|
sph_luffa512_context ctx_luffa1;
|
|
sph_cubehash512_context ctx_cubehash1;
|
|
sph_shavite512_context ctx_shavite1;
|
|
sph_simd512_context ctx_simd1;
|
|
sph_echo512_context ctx_echo1;
|
|
|
|
if (s_sequence == -1) {
|
|
uint32_t *data = (uint32_t*) input;
|
|
const uint32_t ntime = data[17];
|
|
evo_twisted_code(ntime, hashOrder);
|
|
}
|
|
|
|
void *in = (void*) input;
|
|
int size = 80;
|
|
|
|
const int hashes = (int) strlen(hashOrder);
|
|
|
|
for (int i = 0; i < hashes; i++)
|
|
{
|
|
const char elem = hashOrder[i];
|
|
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
|
|
|
if (i > 0) {
|
|
in = (void*) hash;
|
|
size = 64;
|
|
}
|
|
|
|
switch (algo) {
|
|
case BLAKE:
|
|
sph_blake512_init(&ctx_blake);
|
|
sph_blake512(&ctx_blake, in, size);
|
|
sph_blake512_close(&ctx_blake, hash);
|
|
break;
|
|
case BMW:
|
|
sph_bmw512_init(&ctx_bmw);
|
|
sph_bmw512(&ctx_bmw, in, size);
|
|
sph_bmw512_close(&ctx_bmw, hash);
|
|
break;
|
|
case GROESTL:
|
|
sph_groestl512_init(&ctx_groestl);
|
|
sph_groestl512(&ctx_groestl, in, size);
|
|
sph_groestl512_close(&ctx_groestl, hash);
|
|
break;
|
|
case SKEIN:
|
|
sph_skein512_init(&ctx_skein);
|
|
sph_skein512(&ctx_skein, in, size);
|
|
sph_skein512_close(&ctx_skein, hash);
|
|
break;
|
|
case JH:
|
|
sph_jh512_init(&ctx_jh);
|
|
sph_jh512(&ctx_jh, in, size);
|
|
sph_jh512_close(&ctx_jh, hash);
|
|
break;
|
|
case KECCAK:
|
|
sph_keccak512_init(&ctx_keccak);
|
|
sph_keccak512(&ctx_keccak, in, size);
|
|
sph_keccak512_close(&ctx_keccak, hash);
|
|
break;
|
|
case LUFFA:
|
|
sph_luffa512_init(&ctx_luffa1);
|
|
sph_luffa512(&ctx_luffa1, in, size);
|
|
sph_luffa512_close(&ctx_luffa1, hash);
|
|
break;
|
|
case CUBEHASH:
|
|
sph_cubehash512_init(&ctx_cubehash1);
|
|
sph_cubehash512(&ctx_cubehash1, in, size);
|
|
sph_cubehash512_close(&ctx_cubehash1, hash);
|
|
break;
|
|
case SHAVITE:
|
|
sph_shavite512_init(&ctx_shavite1);
|
|
sph_shavite512(&ctx_shavite1, in, size);
|
|
sph_shavite512_close(&ctx_shavite1, hash);
|
|
break;
|
|
case SIMD:
|
|
sph_simd512_init(&ctx_simd1);
|
|
sph_simd512(&ctx_simd1, in, size);
|
|
sph_simd512_close(&ctx_simd1, hash);
|
|
break;
|
|
case ECHO:
|
|
sph_echo512_init(&ctx_echo1);
|
|
sph_echo512(&ctx_echo1, in, size);
|
|
sph_echo512_close(&ctx_echo1, hash);
|
|
break;
|
|
}
|
|
}
|
|
|
|
memcpy(output, hash, 32);
|
|
}
|
|
|
|
//#define _DEBUG
|
|
#define _DEBUG_PREFIX "evo"
|
|
#include "cuda_debug.cuh"
|
|
|
|
static bool init[MAX_GPUS] = { 0 };
|
|
|
|
extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
|
|
|
|
extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
|
{
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
|
|
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
|
|
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
|
|
|
if (opt_debug || s_ntime != pdata[17] || s_sequence == -1) {
|
|
uint32_t ntime = swab32(work->data[17]);
|
|
evo_twisted_code(ntime, hashOrder);
|
|
s_ntime = pdata[17];
|
|
if (opt_debug) {
|
|
int secs = (int) (ntime - INITIAL_DATE) % (60 * 60 * 24);
|
|
secs = (60 * 60 * 24) - secs;
|
|
applog(LOG_DEBUG, "evo hash order %s, next in %d mn", hashOrder, secs/60);
|
|
}
|
|
}
|
|
|
|
if (opt_benchmark)
|
|
ptarget[7] = 0x5;
|
|
|
|
if (!init[thr_id])
|
|
{
|
|
cudaSetDevice(device_map[thr_id]);
|
|
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
|
cudaDeviceReset();
|
|
// reduce cpu usage
|
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
|
CUDA_LOG_ERROR();
|
|
}
|
|
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
|
|
|
|
quark_blake512_cpu_init(thr_id, throughput);
|
|
quark_bmw512_cpu_init(thr_id, throughput);
|
|
quark_groestl512_cpu_init(thr_id, throughput);
|
|
quark_skein512_cpu_init(thr_id, throughput);
|
|
quark_keccak512_cpu_init(thr_id, throughput);
|
|
quark_jh512_cpu_init(thr_id, throughput);
|
|
x11_luffa512_cpu_init(thr_id, throughput);
|
|
x11_cubehash512_cpu_init(thr_id, throughput);
|
|
x11_shavite512_cpu_init(thr_id, throughput);
|
|
x11_echo512_cpu_init(thr_id, throughput);
|
|
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
|
|
return 0;
|
|
}
|
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
|
|
|
cuda_check_cpu_init(thr_id, throughput);
|
|
|
|
init[thr_id] = true;
|
|
}
|
|
|
|
uint32_t endiandata[20];
|
|
for (int k=0; k < 19; k++)
|
|
be32enc(&endiandata[k], pdata[k]);
|
|
|
|
cuda_check_cpu_setTarget(ptarget);
|
|
quark_blake512_cpu_setBlock_80(thr_id, endiandata);
|
|
|
|
const int hashes = (int) strlen(hashOrder);
|
|
|
|
do {
|
|
int order = 1;
|
|
uint32_t foundNonce;
|
|
|
|
// Hash with CUDA
|
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
|
TRACE("blake80:");
|
|
|
|
for (int i = 1; i < hashes; i++)
|
|
{
|
|
const char elem = hashOrder[i];
|
|
const uint8_t algo64 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
|
|
|
switch (algo64) {
|
|
case BLAKE:
|
|
quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("blake :");
|
|
break;
|
|
case BMW:
|
|
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("bmw :");
|
|
break;
|
|
case GROESTL:
|
|
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("groestl:");
|
|
break;
|
|
case SKEIN:
|
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("skein :");
|
|
break;
|
|
case JH:
|
|
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("jh512 :");
|
|
break;
|
|
case KECCAK:
|
|
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("keccak :");
|
|
break;
|
|
case LUFFA:
|
|
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("luffa :");
|
|
break;
|
|
case CUBEHASH:
|
|
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("cube :");
|
|
break;
|
|
case SHAVITE:
|
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("shavite:");
|
|
break;
|
|
case SIMD:
|
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("simd :");
|
|
break;
|
|
case ECHO:
|
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("echo => ");
|
|
break;
|
|
}
|
|
}
|
|
|
|
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
|
if (foundNonce != UINT32_MAX)
|
|
{
|
|
uint32_t _ALIGN(64) vhash64[8];
|
|
const uint32_t Htarg = ptarget[7];
|
|
be32enc(&endiandata[19], foundNonce);
|
|
x11evo_hash(vhash64, endiandata);
|
|
|
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
|
int res = 1;
|
|
// check if there was some other ones...
|
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
|
work_set_target_ratio(work, vhash64);
|
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
if (secNonce != 0) {
|
|
be32enc(&endiandata[19], secNonce);
|
|
x11evo_hash(vhash64, endiandata);
|
|
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
|
|
work_set_target_ratio(work, vhash64);
|
|
pdata[21] = secNonce;
|
|
res++;
|
|
}
|
|
pdata[19] = foundNonce;
|
|
return res;
|
|
} else if (vhash64[7] > Htarg) {
|
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
|
pdata[19] = foundNonce + 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
|
pdata[19] = max_nonce;
|
|
break;
|
|
}
|
|
pdata[19] += throughput;
|
|
|
|
} while (!work_restart[thr_id].restart);
|
|
|
|
*hashes_done = pdata[19] - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
// cleanup
|
|
extern "C" void free_x11evo(int thr_id)
|
|
{
|
|
if (!init[thr_id])
|
|
return;
|
|
|
|
cudaThreadSynchronize();
|
|
|
|
cudaFree(d_hash[thr_id]);
|
|
|
|
quark_blake512_cpu_free(thr_id);
|
|
quark_groestl512_cpu_free(thr_id);
|
|
x11_simd512_cpu_free(thr_id);
|
|
|
|
cuda_check_cpu_free(thr_id);
|
|
init[thr_id] = false;
|
|
|
|
cudaDeviceSynchronize();
|
|
}
|