Browse Source

implement x11evo algo

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
2upstream
Tanguy Pruvot 9 years ago
parent
commit
85c212eaad
  1. 2
      Makefile.am
  2. 7
      README.txt
  3. 2
      algos.h
  4. 1
      bench.cpp
  5. 5
      ccminer.cpp
  6. 1
      ccminer.vcxproj
  7. 3
      ccminer.vcxproj.filters
  8. 3
      miner.h
  9. 2
      skein2.cpp
  10. 3
      util.cpp
  11. 3
      x11/cuda_x11.h
  12. 412
      x11/x11evo.cu

2
Makefile.am

@ -53,7 +53,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ @@ -53,7 +53,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
x11/cuda_x11_luffa512_Cubehash.cu \
x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu \
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
x15/whirlpool.cu \

7
README.txt

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
ccMiner release 1.7.6 (May 2016) "DCR vote & pool device stats"
ccMiner preview 1.8-dev (May 2016) "Pascal and x11evo algo"
---------------------------------------------------------------
***************************************************************
@ -98,6 +98,7 @@ its command line interface and options. @@ -98,6 +98,7 @@ its command line interface and options.
sib use to mine Sibcoin
skein use to mine Skeincoin
skein2 use to mine Woodcoin
x11evo use to mine Revolver
x11 use to mine DarkCoin
x14 use to mine X14Coin
x15 use to mine Halcyon
@ -239,6 +240,10 @@ features. @@ -239,6 +240,10 @@ features.
>>> RELEASE HISTORY <<<
June 2016 v1.8.0
Pascal support with cuda 8
x11evo algo (XRE)
May 18th 2016 v1.7.6
Decred vote support
X17 cleanup and improvement

2
algos.h

@ -35,6 +35,7 @@ enum sha_algos { @@ -35,6 +35,7 @@ enum sha_algos {
ALGO_SKEIN,
ALGO_SKEIN2,
ALGO_S3,
ALGO_X11EVO,
ALGO_X11,
ALGO_X13,
ALGO_X14,
@ -82,6 +83,7 @@ static const char *algo_names[] = { @@ -82,6 +83,7 @@ static const char *algo_names[] = {
"skein",
"skein2",
"s3",
"x11evo",
"x11",
"x13",
"x14",

1
bench.cpp

@ -71,6 +71,7 @@ void algo_free_all(int thr_id) @@ -71,6 +71,7 @@ void algo_free_all(int thr_id)
free_s3(thr_id);
free_vanilla(thr_id);
free_whirl(thr_id);
free_x11evo(thr_id);
free_x11(thr_id);
free_x13(thr_id);
free_x14(thr_id);

5
ccminer.cpp

@ -241,6 +241,7 @@ Options:\n\ @@ -241,6 +241,7 @@ Options:\n\
skein Skein SHA2 (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
s3 S3 (1Coin)\n\
x11evo Permuted x11 (Revolver)\n\
x11 X11 (DarkCoin)\n\
x13 X13 (MaruCoin)\n\
x14 X14\n\
@ -1901,6 +1902,7 @@ static void *miner_thread(void *userdata) @@ -1901,6 +1902,7 @@ static void *miner_thread(void *userdata)
case ALGO_HEAVY:
case ALGO_LYRA2v2:
case ALGO_S3:
case ALGO_X11EVO:
case ALGO_X11:
case ALGO_X13:
case ALGO_WHIRLCOIN:
@ -2066,6 +2068,9 @@ static void *miner_thread(void *userdata) @@ -2066,6 +2068,9 @@ static void *miner_thread(void *userdata)
//case ALGO_WHIRLPOOLX:
// rc = scanhash_whirlx(thr_id, &work, max_nonce, &hashes_done);
// break;
case ALGO_X11EVO:
rc = scanhash_x11evo(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_X11:
rc = scanhash_x11(thr_id, &work, max_nonce, &hashes_done);
break;

1
ccminer.vcxproj

@ -494,6 +494,7 @@ @@ -494,6 +494,7 @@
<CudaCompile Include="x11\sib.cu" />
<CudaCompile Include="x11\s3.cu" />
<CudaCompile Include="x11\x11.cu" />
<CudaCompile Include="x11\x11evo.cu" />
<CudaCompile Include="x13\cuda_x13_hamsi512.cu">
<MaxRegCount>72</MaxRegCount>
</CudaCompile>

3
ccminer.vcxproj.filters

@ -571,6 +571,9 @@ @@ -571,6 +571,9 @@
<CudaCompile Include="x11\x11.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
<CudaCompile Include="x11\x11evo.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
<Filter>Source Files\CUDA\x13</Filter>
</CudaCompile>

3
miner.h

@ -288,6 +288,7 @@ extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, un @@ -288,6 +288,7 @@ extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, un
extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -331,6 +332,7 @@ extern void free_skein2(int thr_id); @@ -331,6 +332,7 @@ extern void free_skein2(int thr_id);
extern void free_s3(int thr_id);
extern void free_vanilla(int thr_id);
extern void free_whirl(int thr_id);
extern void free_x11evo(int thr_id);
extern void free_x11(int thr_id);
extern void free_x13(int thr_id);
extern void free_x14(int thr_id);
@ -807,6 +809,7 @@ void skein2hash(void *output, const void *input); @@ -807,6 +809,7 @@ void skein2hash(void *output, const void *input);
void s3hash(void *output, const void *input);
void wcoinhash(void *state, const void *input);
void whirlxHash(void *state, const void *input);
void x11evo_hash(void *output, const void *input);
void x11hash(void *output, const void *input);
void x13hash(void *output, const void *input);
void x14hash(void *output, const void *input);

2
skein2.cpp

@ -11,8 +11,6 @@ @@ -11,8 +11,6 @@
static uint32_t *d_hash[MAX_GPUS];
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);

3
util.cpp

@ -2192,6 +2192,9 @@ void print_hash_tests(void) @@ -2192,6 +2192,9 @@ void print_hash_tests(void)
//whirlxHash(&hash[0], &buf[0]);
//printpfx("whirlpoolx", hash);
x11evo_hash(&hash[0], &buf[0]);
printpfx("x11evo", hash);
x11hash(&hash[0], &buf[0]);
printpfx("X11", hash);

3
x11/cuda_x11.h

@ -3,6 +3,9 @@ @@ -3,6 +3,9 @@
extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads);
extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash, int order);
extern void x11_luffa512_cpu_init(int thr_id, uint32_t threads);
extern void x11_luffa512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

412
x11/x11evo.cu

@ -0,0 +1,412 @@ @@ -0,0 +1,412 @@
/**
* X11EVO algo implementation
* Cuda implementation by tpruvot@github - May 2016
*/
#include <stdio.h>
#include <memory.h>
extern "C" {
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
}
#include "miner.h"
#include "cuda_helper.h"
#include "cuda_x11.h"
static uint32_t *d_hash[MAX_GPUS];
enum Algo {
BLAKE = 0,
BMW,
GROESTL,
SKEIN,
JH,
KECCAK,
LUFFA,
CUBEHASH,
SHAVITE,
SIMD,
ECHO,
HASH_FUNC_COUNT
};
static void swap8(uint8_t *a, uint8_t *b)
{
uint8_t t = *a;
*a = *b;
*b = t;
}
static void initPerm(uint8_t n[], int count)
{
for (int i = 0; i < count; i++)
n[i] = i;
}
static int nextPerm(uint8_t n[], int count)
{
int tail, i, j;
if (count <= 1)
return 0;
for (i = count - 1; i>0 && n[i - 1] >= n[i]; i--);
tail = i;
if (tail > 0) {
for (j = count - 1; j>tail && n[j] <= n[tail - 1]; j--);
swap8(&n[tail - 1], &n[j]);
}
for (i = tail, j = count - 1; i<j; i++, j--)
swap8(&n[i], &n[j]);
return (tail != 0);
}
static void getAlgoString(char *str, int seq)
{
uint8_t algoList[HASH_FUNC_COUNT];
char *sptr;
initPerm(algoList, HASH_FUNC_COUNT);
for (int k = 0; k < seq; k++) {
nextPerm(algoList, HASH_FUNC_COUNT);
}
sptr = str;
for (int j = 0; j < HASH_FUNC_COUNT; j++) {
if (algoList[j] >= 10)
sprintf(sptr, "%c", 'A' + (algoList[j] - 10));
else
sprintf(sptr, "%u", (uint32_t) algoList[j]);
sptr++;
}
*sptr = '\0';
//applog(LOG_DEBUG, "nextPerm %s", str);
}
static __thread uint32_t s_ntime = 0;
static char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
static int s_sequence = -1;
#define INITIAL_DATE 0x57254700
static inline int getCurrentAlgoSeq(uint32_t current_time)
{
// change once per day
return (int) (current_time - INITIAL_DATE) / (60 * 60 * 24);
}
static void evo_twisted_code(uint32_t ntime, char *permstr)
{
int seq = getCurrentAlgoSeq(ntime);
if (s_sequence != seq) {
getAlgoString(permstr, seq);
s_sequence = seq;
}
}
// X11evo CPU Hash
extern "C" void x11evo_hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hash[64/4] = { 0 };
sph_blake512_context ctx_blake;
sph_bmw512_context ctx_bmw;
sph_groestl512_context ctx_groestl;
sph_skein512_context ctx_skein;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_luffa512_context ctx_luffa1;
sph_cubehash512_context ctx_cubehash1;
sph_shavite512_context ctx_shavite1;
sph_simd512_context ctx_simd1;
sph_echo512_context ctx_echo1;
if (s_sequence == -1) {
uint32_t *data = (uint32_t*) input;
const uint32_t ntime = data[17];
evo_twisted_code(ntime, hashOrder);
}
void *in = (void*) input;
int size = 80;
const int hashes = (int) strlen(hashOrder);
for (int i = 0; i < hashes; i++)
{
const char elem = hashOrder[i];
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
if (i > 0) {
in = (void*) hash;
size = 64;
}
switch (algo) {
case BLAKE:
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, in, size);
sph_blake512_close(&ctx_blake, hash);
break;
case BMW:
sph_bmw512_init(&ctx_bmw);
sph_bmw512(&ctx_bmw, in, size);
sph_bmw512_close(&ctx_bmw, hash);
break;
case GROESTL:
sph_groestl512_init(&ctx_groestl);
sph_groestl512(&ctx_groestl, in, size);
sph_groestl512_close(&ctx_groestl, hash);
break;
case SKEIN:
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, in, size);
sph_skein512_close(&ctx_skein, hash);
break;
case JH:
sph_jh512_init(&ctx_jh);
sph_jh512(&ctx_jh, in, size);
sph_jh512_close(&ctx_jh, hash);
break;
case KECCAK:
sph_keccak512_init(&ctx_keccak);
sph_keccak512(&ctx_keccak, in, size);
sph_keccak512_close(&ctx_keccak, hash);
break;
case LUFFA:
sph_luffa512_init(&ctx_luffa1);
sph_luffa512(&ctx_luffa1, in, size);
sph_luffa512_close(&ctx_luffa1, hash);
break;
case CUBEHASH:
sph_cubehash512_init(&ctx_cubehash1);
sph_cubehash512(&ctx_cubehash1, in, size);
sph_cubehash512_close(&ctx_cubehash1, hash);
break;
case SHAVITE:
sph_shavite512_init(&ctx_shavite1);
sph_shavite512(&ctx_shavite1, in, size);
sph_shavite512_close(&ctx_shavite1, hash);
break;
case SIMD:
sph_simd512_init(&ctx_simd1);
sph_simd512(&ctx_simd1, in, size);
sph_simd512_close(&ctx_simd1, hash);
break;
case ECHO:
sph_echo512_init(&ctx_echo1);
sph_echo512(&ctx_echo1, in, size);
sph_echo512_close(&ctx_echo1, hash);
break;
}
}
memcpy(output, hash, 32);
}
//#define _DEBUG
#define _DEBUG_PREFIX "evo"
#include "cuda_debug.cuh"
static bool init[MAX_GPUS] = { 0 };
extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_debug || s_ntime != pdata[17] || s_sequence == -1) {
uint32_t ntime = swab32(work->data[17]);
evo_twisted_code(ntime, hashOrder);
s_ntime = pdata[17];
if (opt_debug) {
int secs = (int) (ntime - INITIAL_DATE) % (60 * 60 * 24);
secs = (60 * 60 * 24) - secs;
applog(LOG_DEBUG, "evo hash order %s, next in %d mn", hashOrder, secs/60);
}
}
if (opt_benchmark)
ptarget[7] = 0x5;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
quark_blake512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
return 0;
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
cuda_check_cpu_setTarget(ptarget);
quark_blake512_cpu_setBlock_80(thr_id, endiandata);
const int hashes = (int) strlen(hashOrder);
do {
int order = 1;
uint32_t foundNonce;
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
TRACE("blake80:");
for (int i = 1; i < hashes; i++)
{
const char elem = hashOrder[i];
const uint8_t algo64 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
switch (algo64) {
case BLAKE:
quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("blake :");
break;
case BMW:
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("bmw :");
break;
case GROESTL:
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("groestl:");
break;
case SKEIN:
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("skein :");
break;
case JH:
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("jh512 :");
break;
case KECCAK:
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("keccak :");
break;
case LUFFA:
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("luffa :");
break;
case CUBEHASH:
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("cube :");
break;
case SHAVITE:
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("shavite:");
break;
case SIMD:
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("simd :");
break;
case ECHO:
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => ");
break;
}
}
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce);
x11evo_hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
// check if there was some other ones...
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
*hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x11evo_hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = foundNonce + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
return 0;
}
// cleanup
extern "C" void free_x11evo(int thr_id)
{
if (!init[thr_id])
return;
cudaThreadSynchronize();
cudaFree(d_hash[thr_id]);
quark_blake512_cpu_free(thr_id);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}
Loading…
Cancel
Save