Add skunk algo, initial version

This commit is contained in:
Tanguy Pruvot 2017-07-19 13:52:45 +02:00
parent 5aa50a4215
commit cbede12c47
13 changed files with 228 additions and 12 deletions

View File

@ -55,7 +55,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/nist5.cu \
quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \
neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \
pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp zr5.cu \
pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp skunk.cu zr5.cu \
sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \
sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \

View File

@ -1,5 +1,5 @@
ccminer 2.1-dev (June 2017) "Equihash"
ccminer 2.2-dev (July 2017) "Equihash, tribus and skunk"
---------------------------------------------------------------
***************************************************************
@ -9,7 +9,6 @@ If you find this tool useful and like to support its continuous
tpruvot@github:
BTC : 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
DCR : DsUCcACGcyP8McNMRXQwbtpDxaVUYLDQDeU
LBC : bKe6pLqELL3HHSbpJXxSdn5RrY2bfrkRhF
Alexis:
BTC : 14EgXD7fPYD4sHBXWUi46VeiTVXNq765B8
@ -51,6 +50,8 @@ Scrypt and Scrypt:N
Scrypt-Jane (Chacha)
Sibcoin (sib)
Skein (Skein + SHA)
Signatum (Skein cubehash fugue Streebog)
Tribus (JH, keccak, simd)
Woodcoin (Double Skein)
Vanilla (Blake256 8-rounds - double sha256)
Vertcoin Lyra2RE
@ -116,6 +117,7 @@ its command line interface and options.
sib use to mine Sibcoin
skein use to mine Skeincoin
skein2 use to mine Woodcoin
skunk use to mine Signatum
timetravel use to mine MachineCoin
tribus use to mine Denarius
x11evo use to mine Revolver
@ -280,7 +282,12 @@ features.
>>> RELEASE HISTORY <<<
v2.1 (unfinished)
v2.2 (under dev)
New skunk algo, using the heavy streebog algorithm
Enhance tribus algo (+10%)
equihash protocol enhancement on yiimp.ccminer.org and zpool.ca
June 16th 2017 v2.1-tribus
Interface equihash algo with djeZo solver (from nheqminer 0.5c)
New api parameters (and multicast announces for local networks)
New tribus algo

View File

@ -45,6 +45,7 @@ enum sha_algos {
ALGO_SIB,
ALGO_SKEIN,
ALGO_SKEIN2,
ALGO_SKUNK,
ALGO_S3,
ALGO_TIMETRAVEL,
ALGO_TRIBUS,
@ -109,6 +110,7 @@ static const char *algo_names[] = {
"sib",
"skein",
"skein2",
"skunk",
"s3",
"timetravel",
"tribus",

View File

@ -80,6 +80,7 @@ void algo_free_all(int thr_id)
free_qubit(thr_id);
free_skeincoin(thr_id);
free_skein2(thr_id);
free_skunk(thr_id);
free_sha256d(thr_id);
free_sha256t(thr_id);
free_sia(thr_id);

View File

@ -275,6 +275,7 @@ Options:\n\
scrypt-jane Scrypt-jane Chacha\n\
skein Skein SHA2 (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
skunk Skein Cube Fugue Streebog\n\
s3 S3 (1Coin)\n\
timetravel Machinecoin permuted x8\n\
tribus Denerius\n\
@ -2401,6 +2402,9 @@ static void *miner_thread(void *userdata)
case ALGO_SKEIN2:
rc = scanhash_skein2(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SKUNK:
rc = scanhash_skunk(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SHA256D:
rc = scanhash_sha256d(thr_id, &work, max_nonce, &hashes_done);
break;

View File

@ -532,6 +532,7 @@
<CudaCompile Include="cuda_skeincoin.cu">
<MaxRegCount>48</MaxRegCount>
</CudaCompile>
<CudaCompile Include="skunk.cu" />
<CudaCompile Include="tribus.cu" />
<ClInclude Include="x11\cuda_x11_aes.cuh" />
<CudaCompile Include="x11\cuda_x11_cubehash512.cu" />
@ -563,10 +564,8 @@
</CudaCompile>
<CudaCompile Include="x13\cuda_x13_fugue512.cu">
</CudaCompile>
<CudaCompile Include="x13\x13.cu">
</CudaCompile>
<CudaCompile Include="x15\x14.cu">
</CudaCompile>
<CudaCompile Include="x13\x13.cu" />
<CudaCompile Include="x15\x14.cu" />
<CudaCompile Include="x15\cuda_x14_shabal512.cu" />
<CudaCompile Include="x15\cuda_x15_whirlpool.cu" />
<CudaCompile Include="x17\hmq17.cu" />
@ -600,4 +599,4 @@
<Target Name="AfterClean">
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
</Target>
</Project>
</Project>

View File

@ -754,6 +754,9 @@
<CudaCompile Include="pentablake.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="skunk.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="tribus.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>

View File

@ -164,7 +164,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
/* Define to the version of this package. */
#define PACKAGE_VERSION "2.1"
#define PACKAGE_VERSION "2.2"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer], [2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@ -308,6 +308,7 @@ extern int scanhash_sia(int thr_id, struct work *work, uint32_t max_nonce, unsig
extern int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skunk(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_tribus(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -368,6 +369,7 @@ extern void free_sia(int thr_id);
extern void free_sib(int thr_id);
extern void free_skeincoin(int thr_id);
extern void free_skein2(int thr_id);
extern void free_skunk(int thr_id);
extern void free_s3(int thr_id);
extern void free_timetravel(int thr_id);
extern void free_tribus(int thr_id);
@ -909,6 +911,7 @@ void sha256t_hash(void *output, const void *input);
void sibhash(void *output, const void *input);
void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input);
void skunk_hash(void *state, const void *input);
void s3hash(void *output, const void *input);
void timetravel_hash(void *output, const void *input);
void bitcore_hash(void *output, const void *input);

194
skunk.cu Normal file
View File

@ -0,0 +1,194 @@
/**
* Skunk Algo for Signatum
* (skein, cube, fugue, gost streebog)
*
* tpruvot@github 06 2017 - GPLv3
*/
extern "C" {
#include "sph/sph_skein.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_fugue.h"
#include "sph/sph_streebog.h"
}
#include "miner.h"
#include "cuda_helper.h"
extern void skein512_cpu_setBlock_80(void *pdata);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void streebog_set_target(const uint32_t* ptarget);
#include <stdio.h>
#include <memory.h>
#define NBN 2
static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];
// CPU Hash
extern "C" void skunk_hash(void *output, const void *input)
{
unsigned char _ALIGN(128) hash[128] = { 0 };
sph_skein512_context ctx_skein;
sph_cubehash512_context ctx_cubehash;
sph_fugue512_context ctx_fugue;
sph_gost512_context ctx_gost;
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, input, 80);
sph_skein512_close(&ctx_skein, (void*) hash);
sph_cubehash512_init(&ctx_cubehash);
sph_cubehash512(&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
sph_fugue512_init(&ctx_fugue);
sph_fugue512(&ctx_fugue, (const void*) hash, 64);
sph_fugue512_close(&ctx_fugue, (void*) hash);
sph_gost512_init(&ctx_gost);
sph_gost512(&ctx_gost, (const void*) hash, 64);
sph_gost512_close(&ctx_gost, (void*) hash);
memcpy(output, hash, 32);
}
static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17;
if (strstr(device_name[dev_id], "GTX 10")) intensity = 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0xf;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
quark_skein512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
init[thr_id] = true;
}
uint32_t _ALIGN(64) h_resNonce[NBN];
uint32_t _ALIGN(64) endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
skein512_cpu_setBlock_80(endiandata);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
streebog_set_target(ptarget);
do {
int order = 0;
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);
*hashes_done = pdata[19] - first_nonce + throughput;
if (h_resNonce[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
const uint32_t Htarg = ptarget[7];
const uint32_t startNounce = pdata[19];
be32enc(&endiandata[19], startNounce + h_resNonce[0]);
skunk_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
work->nonces[0] = startNounce + h_resNonce[0];
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (h_resNonce[1] != UINT32_MAX)
{
uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
be32enc(&endiandata[19], secNonce);
skunk_hash(vhash, endiandata);
work->nonces[1] = secNonce;
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash);
xchg(work->nonces[1], work->nonces[0]);
} else {
bn_set_target_ratio(work, vhash, work->valid_nonces);
}
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
pdata[19] = startNounce + h_resNonce[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
return 0;
}
// cleanup
extern "C" void free_skunk(int thr_id)
{
if (!init[thr_id])
return;
cudaThreadSynchronize();
x13_fugue512_cpu_free(thr_id);
cudaFree(d_hash[thr_id]);
cudaFree(d_resNonce[thr_id]);
init[thr_id] = false;
cudaDeviceSynchronize();
}

View File

@ -2270,6 +2270,9 @@ void print_hash_tests(void)
skein2hash(&hash[0], &buf[0]);
printpfx("skein2", hash);
skunk_hash(&hash[0], &buf[0]);
printpfx("skunk", hash);
s3hash(&hash[0], &buf[0]);
printpfx("S3", hash);

View File

@ -254,7 +254,7 @@ void x13_fugue512_gpu_hash_64(uint32_t threads, uint64_t *g_hash)
mixtabs[thr+256] = ROR8(tmp);
mixtabs[thr+512] = ROL16(tmp);
mixtabs[thr+768] = ROL8(tmp);
#if TPB < 256
#if TPB <= 256
if (blockDim.x < 256) {
const uint32_t thr = (threadIdx.x + 0x80) & 0xFF;
const uint32_t tmp = tex1Dfetch(mixTab0Tex, thr);