update README, small changes, prepare release 1.6.1
still need a SM 3.0 fix for skein...
This commit is contained in:
parent
7963dbb438
commit
d58d53f2b2
@ -7,8 +7,8 @@ else
|
||||
JANSSON_INCLUDES=
|
||||
endif
|
||||
|
||||
EXTRA_DIST = autogen.sh README.txt LICENSE.txt \
|
||||
cudaminer.sln cudaminer.vcxproj cudaminer.vcxproj.filters \
|
||||
EXTRA_DIST = autogen.sh README.md README.txt LICENSE.txt \
|
||||
ccminer.sln ccminer.vcxproj ccminer.vcxproj.filters \
|
||||
compat/gettimeofday.c compat/getopt/getopt_long.c cpuminer-config.h.in
|
||||
|
||||
SUBDIRS = compat
|
||||
@ -121,3 +121,6 @@ JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
|
||||
skein.o: skein.cu
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
|
||||
|
||||
skein2.o: skein2.cu
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
|
||||
|
||||
|
@ -3,7 +3,7 @@ ccminer
|
||||
|
||||
Based on Christian Buchner's & Christian H.'s CUDA project, no more active on github recently.
|
||||
|
||||
Fork by tpruvot@github with X14,X15,X17,WHIRL,Blake256,BlakeCoin,Lyra2RE and some others, check the [README.txt](README.txt)
|
||||
Fork by tpruvot@github with X14,X15,X17,Blake256,BlakeCoin,Lyra2RE,Skein,ZR5 and others, check the [README.txt](README.txt)
|
||||
|
||||
BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
|
||||
[![tip for next commit](https://tip4commit.com/projects/927.svg)](https://tip4commit.com/github/tpruvot/ccminer)
|
||||
@ -30,5 +30,7 @@ The tree now contains recent prebuilt openssl and curl .lib for both x86 and x64
|
||||
To rebuild them, you need to clone this repository and its submodules :
|
||||
git clone https://github.com/peters/curl-for-windows.git compat/curl-for-windows
|
||||
|
||||
There is also a [Tutorial for windows](http://cudamining.co.uk/url/tutorials/id/3) on [CudaMining](http://cudamining.co.uk) website.
|
||||
On Linux, you can use the helper ./build.sh (edit it if required)
|
||||
|
||||
There is also an old [Tutorial for windows](http://cudamining.co.uk/url/tutorials/id/3) on [CudaMining](http://cudamining.co.uk) website.
|
||||
|
||||
|
24
README.txt
24
README.txt
@ -1,16 +1,15 @@
|
||||
|
||||
ccMiner release 1.6.0-tpruvot (Mar 2015) - "ZR5, Pluck & WhirlX"
|
||||
ccMiner release 1.6.1-tpruvot (Apr 2015) - "Skein512 Chainsaw"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
If you find this tool useful and like to support its continued
|
||||
If you find this tool useful and like to support its continuous
|
||||
development, then consider a donation.
|
||||
|
||||
tpruvot@github:
|
||||
BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
|
||||
BTC : 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
|
||||
DRK : XeVrkPrWB7pDbdFLfKhF1Z3xpqhsx6wkH3
|
||||
NEOS : NaEcVrdzoCWHUYXb7X8QoafoKS9UV69Yk4
|
||||
XST : S9TqZucWgT6ajZLDBxQnHUtmkotCEHn9z9
|
||||
ZRC : ZEcubH2xp2mpuwxMjy7wZThr5AzLGu3mqT
|
||||
|
||||
DJM34:
|
||||
BTC donation address: 1NENYmxwZGHsKFmyjTc5WferTn5VTFb7Ze
|
||||
@ -56,7 +55,7 @@ that the most of our comments are in german.
|
||||
|
||||
>>> Command Line Interface <<<
|
||||
|
||||
This code is based on the pooler cpuminer 2.3.2 release and inherits
|
||||
This code is based on the pooler cpuminer and inherits
|
||||
its command line interface and options.
|
||||
|
||||
-a, --algo=ALGO specify the algorithm to use
|
||||
@ -83,7 +82,6 @@ its command line interface and options.
|
||||
s3 use to mine 1coin
|
||||
skein use to mine Skeincoin
|
||||
skein2 use to mine Woodcoin
|
||||
whirl use to mine Whirlcoin
|
||||
whirlpoolx use to mine Vanillacoin
|
||||
x11 use to mine DarkCoin
|
||||
x14 use to mine X14Coin
|
||||
@ -113,20 +111,23 @@ its command line interface and options.
|
||||
-T, --timeout=N network timeout, in seconds (default: 270)
|
||||
-s, --scantime=N upper bound on time spent scanning current work when
|
||||
long polling is unavailable, in seconds (default: 5)
|
||||
-n, --ndevs list cuda devices
|
||||
-N, --statsavg number of samples used to display hashrate (default: 30)
|
||||
--no-gbt disable getblocktemplate support (height check in solo)
|
||||
--no-longpoll disable X-Long-Polling support
|
||||
--no-stratum disable X-Stratum support
|
||||
-q, --quiet disable per-thread hashmeter output
|
||||
--no-color disable colored output
|
||||
-D, --debug enable debug output
|
||||
-P, --protocol-dump verbose dump of protocol-level activities
|
||||
-b, --api-bind IP/Port for the miner API (default: 127.0.0.1:4068)
|
||||
--api-remote Allow remote control
|
||||
-B, --background run the miner in the background
|
||||
--benchmark run in offline benchmark mode
|
||||
--cputest debug hashes from cpu algorithms
|
||||
--cpu-affinity set process affinity to specific cpu core(s) mask
|
||||
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)
|
||||
-c, --config=FILE load a JSON-format configuration file
|
||||
--no-color disable colored console output
|
||||
-V, --version display version information and exit
|
||||
-h, --help display this help text and exit
|
||||
|
||||
@ -189,6 +190,9 @@ features.
|
||||
|
||||
>>> RELEASE HISTORY <<<
|
||||
|
||||
Apr. 14th 2015 v1.6.1
|
||||
Add the Double Skein Algo for Woodcoin
|
||||
|
||||
Mar. 27th 2015 v1.6.0
|
||||
Add the ZR5 Algo for Ziftcoin
|
||||
Implement Skeincoin algo (skein + sha)
|
||||
@ -347,8 +351,8 @@ Christian Buchner, Christian H. (Germany): Initial CUDA implementation
|
||||
|
||||
djm34, tsiv, sp and klausT for cuda algos implementation and optimisation
|
||||
|
||||
Tanguy Pruvot : 750Ti tuning, blake, colors, general code cleanup/opts
|
||||
API monitoring, linux Config/Makefile and vstudio stuff...
|
||||
Tanguy Pruvot : 750Ti tuning, blake, colors, zr5, skein, general code cleanup
|
||||
API monitoring, linux Config/Makefile and vstudio libs...
|
||||
|
||||
and also many thanks to anyone else who contributed to the original
|
||||
cpuminer application (Jeff Garzik, pooler), it's original HVC-fork
|
||||
|
@ -263,7 +263,6 @@ Options:\n\
|
||||
x14 X14\n\
|
||||
x15 X15\n\
|
||||
x17 X17 (peoplecurrency)\n\
|
||||
whirl Whirlcoin (old whirlpool)\n\
|
||||
whirlpoolx Vanilla coin\n\
|
||||
zr5 ZR5 (ZiftrCoin)\n\
|
||||
-d, --devices Comma separated list of CUDA devices to use.\n\
|
||||
@ -1396,7 +1395,7 @@ static void *miner_thread(void *userdata)
|
||||
max64 = max(minmax-1, max64);
|
||||
}
|
||||
|
||||
// we can't scan more than uint capacity
|
||||
// we can't scan more than uint32 capacity
|
||||
max64 = min(UINT32_MAX, max64);
|
||||
|
||||
start_nonce = nonceptr[0];
|
||||
@ -1412,6 +1411,11 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
// todo: keep it rounded for gpu threads ?
|
||||
|
||||
if (unlikely(start_nonce > max_nonce)) {
|
||||
// should not happen but seen in skein2 benchmark with 2 gpus
|
||||
max_nonce = end_nonce = UINT32_MAX;
|
||||
}
|
||||
|
||||
work.scanned_from = start_nonce;
|
||||
nonceptr[0] = start_nonce;
|
||||
|
||||
@ -1537,6 +1541,7 @@ static void *miner_thread(void *userdata)
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
/* to be deleted */
|
||||
case ALGO_WHIRLCOIN:
|
||||
rc = scanhash_whc(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
|
@ -1,4 +1,4 @@
|
||||
AC_INIT([ccminer], [1.6.1-git])
|
||||
AC_INIT([ccminer], [1.6.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
1
miner.h
1
miner.h
@ -686,7 +686,6 @@ void qubithash(void *state, const void *input);
|
||||
void skeincoinhash(void *output, const void *input);
|
||||
void skein2hash(void *output, const void *input);
|
||||
void s3hash(void *output, const void *input);
|
||||
void wcoinhash(void *state, const void *input);
|
||||
void whirlxHash(void *state, const void *input);
|
||||
void x11hash(void *output, const void *input);
|
||||
void x13hash(void *output, const void *input);
|
||||
|
@ -12,8 +12,7 @@
|
||||
q[i+8] + ROTL64(q[i+9], 37) + q[i+10] + ROTL64(q[i+11], 43) + \
|
||||
q[i+12] + ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15])
|
||||
|
||||
static __constant__ uint64_t d_constMem[16];
|
||||
static uint64_t h_constMem[16] = {
|
||||
static __constant__ uint64_t d_constMem[16] = {
|
||||
SPH_C64(0x8081828384858687),
|
||||
SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697),
|
||||
|
74
skein.cu
74
skein.cu
@ -3,9 +3,7 @@
|
||||
* by tpruvot@github - 2015
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include "sph/sph_skein.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
@ -101,70 +99,10 @@ void sha256_transform_gpu(uint32_t *state, uint32_t *message)
|
||||
}
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0);
|
||||
RNDr(S, W, 1);
|
||||
RNDr(S, W, 2);
|
||||
RNDr(S, W, 3);
|
||||
RNDr(S, W, 4);
|
||||
RNDr(S, W, 5);
|
||||
RNDr(S, W, 6);
|
||||
RNDr(S, W, 7);
|
||||
RNDr(S, W, 8);
|
||||
RNDr(S, W, 9);
|
||||
RNDr(S, W, 10);
|
||||
RNDr(S, W, 11);
|
||||
RNDr(S, W, 12);
|
||||
RNDr(S, W, 13);
|
||||
RNDr(S, W, 14);
|
||||
RNDr(S, W, 15);
|
||||
RNDr(S, W, 16);
|
||||
RNDr(S, W, 17);
|
||||
RNDr(S, W, 18);
|
||||
RNDr(S, W, 19);
|
||||
RNDr(S, W, 20);
|
||||
RNDr(S, W, 21);
|
||||
RNDr(S, W, 22);
|
||||
RNDr(S, W, 23);
|
||||
RNDr(S, W, 24);
|
||||
RNDr(S, W, 25);
|
||||
RNDr(S, W, 26);
|
||||
RNDr(S, W, 27);
|
||||
RNDr(S, W, 28);
|
||||
RNDr(S, W, 29);
|
||||
RNDr(S, W, 30);
|
||||
RNDr(S, W, 31);
|
||||
RNDr(S, W, 32);
|
||||
RNDr(S, W, 33);
|
||||
RNDr(S, W, 34);
|
||||
RNDr(S, W, 35);
|
||||
RNDr(S, W, 36);
|
||||
RNDr(S, W, 37);
|
||||
RNDr(S, W, 38);
|
||||
RNDr(S, W, 39);
|
||||
RNDr(S, W, 40);
|
||||
RNDr(S, W, 41);
|
||||
RNDr(S, W, 42);
|
||||
RNDr(S, W, 43);
|
||||
RNDr(S, W, 44);
|
||||
RNDr(S, W, 45);
|
||||
RNDr(S, W, 46);
|
||||
RNDr(S, W, 47);
|
||||
RNDr(S, W, 48);
|
||||
RNDr(S, W, 49);
|
||||
RNDr(S, W, 50);
|
||||
RNDr(S, W, 51);
|
||||
RNDr(S, W, 52);
|
||||
RNDr(S, W, 53);
|
||||
RNDr(S, W, 54);
|
||||
RNDr(S, W, 55);
|
||||
RNDr(S, W, 56);
|
||||
RNDr(S, W, 57);
|
||||
RNDr(S, W, 58);
|
||||
RNDr(S, W, 59);
|
||||
RNDr(S, W, 60);
|
||||
RNDr(S, W, 61);
|
||||
RNDr(S, W, 62);
|
||||
RNDr(S, W, 63);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 64; i++) {
|
||||
RNDr(S, W, i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
@ -408,8 +346,8 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int swap = 1;
|
||||
|
||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
||||
throughput = min(throughput, (max_nonce - first_nonce));
|
||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
||||
throughput = min(throughput, (max_nonce - first_nonce));
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x07;
|
||||
|
59
skein2.cu
59
skein2.cu
@ -1,6 +1,9 @@
|
||||
extern "C" {
|
||||
/**
|
||||
* SKEIN512 80 + SKEIN512 64 (Woodcoin)
|
||||
* by tpruvot@github - 2015
|
||||
*/
|
||||
|
||||
#include "sph/sph_skein.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
@ -8,21 +11,19 @@ extern "C" {
|
||||
static uint32_t *d_hash[MAX_GPUS];
|
||||
|
||||
extern void skein512_cpu_setBlock_80(void *pdata);
|
||||
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
||||
|
||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern "C" void skein2hash(void *output, const void *input)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[16];
|
||||
sph_skein512_context ctx_skein;
|
||||
|
||||
uint32_t hash[16];
|
||||
|
||||
sph_skein512_init(&ctx_skein);
|
||||
sph_skein512(&ctx_skein, input, 80);
|
||||
sph_skein512_close(&ctx_skein, hash);
|
||||
//applog_hash((uchar*)hash);
|
||||
//applog_hash((uchar*)&hash[8]);
|
||||
|
||||
sph_skein512_init(&ctx_skein);
|
||||
sph_skein512(&ctx_skein, hash, 64);
|
||||
sph_skein512_close(&ctx_skein, hash);
|
||||
@ -30,21 +31,6 @@ extern "C" void skein2hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define TRACE(algo) { \
|
||||
if (max_nonce == 1 && pdata[19] <= 1) { \
|
||||
uint32_t* debugbuf = NULL; \
|
||||
cudaMallocHost(&debugbuf, 16*sizeof(uint32_t)); \
|
||||
cudaMemcpy(debugbuf, d_hash[thr_id], 16*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
|
||||
printf("SK2 %s %08x %08x %08x %08x...\n", algo, \
|
||||
swab32(debugbuf[0]), swab32(debugbuf[1]), swab32(debugbuf[2]), swab32(debugbuf[3])); \
|
||||
cudaFree(debugbuf); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TRACE(algo) {}
|
||||
#endif
|
||||
|
||||
static bool init[MAX_GPUS] = { 0 };
|
||||
|
||||
extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
|
||||
@ -53,25 +39,25 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
||||
throughput = min(throughput, (max_nonce - first_nonce));
|
||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
||||
throughput = min(throughput, (max_nonce - first_nonce));
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0FFF;
|
||||
((uint32_t*)ptarget)[7] = 0;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaDeviceReset();
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64UL * throughput));
|
||||
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
uint32_t endiandata[20];
|
||||
for (int k=0; k < 20; k++)
|
||||
for (int k=0; k < 19; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
skein512_cpu_setBlock_80((void*)endiandata);
|
||||
@ -81,24 +67,22 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
|
||||
int order = 0;
|
||||
|
||||
// Hash with CUDA
|
||||
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
TRACE("80:");
|
||||
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 0);
|
||||
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("64:");
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||
|
||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||
if (foundNonce != UINT32_MAX)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
uint32_t _ALIGN(64) vhash64[8];
|
||||
|
||||
endiandata[19] = foundNonce;
|
||||
skein2hash(vhash64, endiandata);
|
||||
|
||||
#define Htarg ptarget[7]
|
||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
||||
int res = 1;
|
||||
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||
if (secNonce != 0) {
|
||||
if (!opt_quiet)
|
||||
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", device_map[thr_id], swab32(secNonce));
|
||||
@ -113,10 +97,13 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
|
||||
}
|
||||
}
|
||||
|
||||
if (((uint64_t) throughput + pdata[19]) > max_nonce) {
|
||||
break;
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
} while (!work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
3
util.cpp
3
util.cpp
@ -1797,9 +1797,6 @@ void print_hash_tests(void)
|
||||
s3hash(&hash[0], &buf[0]);
|
||||
printpfx("S3", hash);
|
||||
|
||||
wcoinhash(&hash[0], &buf[0]);
|
||||
printpfx("whirl", hash);
|
||||
|
||||
whirlxHash(&hash[0], &buf[0]);
|
||||
printpfx("whirlpoolx", hash);
|
||||
|
||||
|
@ -131,7 +131,7 @@ extern "C" void x11hash(void *output, const void *input)
|
||||
uint32_t* debugbuf = NULL; \
|
||||
cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
|
||||
cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
|
||||
printf("%s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
|
||||
printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
|
||||
swab32(debugbuf[2]), swab32(debugbuf[3])); \
|
||||
cudaFreeHost(debugbuf); \
|
||||
} \
|
||||
@ -209,7 +209,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("simd :");
|
||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
TRACE("echo X11 =>");
|
||||
TRACE("echo => ");
|
||||
|
||||
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||
if (foundNonce != UINT32_MAX)
|
||||
|
Loading…
x
Reference in New Issue
Block a user