update README, small changes, prepare release 1.6.1

still need a SM 3.0 fix for skein...
This commit is contained in:
Tanguy Pruvot 2015-04-14 21:57:01 +02:00
parent 7963dbb438
commit d58d53f2b2
11 changed files with 63 additions and 129 deletions

View File

@ -7,8 +7,8 @@ else
JANSSON_INCLUDES=
endif
EXTRA_DIST = autogen.sh README.txt LICENSE.txt \
cudaminer.sln cudaminer.vcxproj cudaminer.vcxproj.filters \
EXTRA_DIST = autogen.sh README.md README.txt LICENSE.txt \
ccminer.sln ccminer.vcxproj ccminer.vcxproj.filters \
compat/gettimeofday.c compat/getopt/getopt_long.c cpuminer-config.h.in
SUBDIRS = compat
@ -121,3 +121,6 @@ JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
skein.o: skein.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
skein2.o: skein2.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<

View File

@ -3,7 +3,7 @@ ccminer
Based on Christian Buchner's &amp; Christian H.'s CUDA project, no more active on github recently.
Fork by tpruvot@github with X14,X15,X17,WHIRL,Blake256,BlakeCoin,Lyra2RE and some others, check the [README.txt](README.txt)
Fork by tpruvot@github with X14,X15,X17,Blake256,BlakeCoin,Lyra2RE,Skein,ZR5 and others, check the [README.txt](README.txt)
BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
[![tip for next commit](https://tip4commit.com/projects/927.svg)](https://tip4commit.com/github/tpruvot/ccminer)
@ -30,5 +30,7 @@ The tree now contains recent prebuilt openssl and curl .lib for both x86 and x64
To rebuild them, you need to clone this repository and its submodules :
git clone https://github.com/peters/curl-for-windows.git compat/curl-for-windows
There is also a [Tutorial for windows](http://cudamining.co.uk/url/tutorials/id/3) on [CudaMining](http://cudamining.co.uk) website.
On Linux, you can use the helper ./build.sh (edit it if required)
There is also an old [Tutorial for windows](http://cudamining.co.uk/url/tutorials/id/3) on [CudaMining](http://cudamining.co.uk) website.

View File

@ -1,16 +1,15 @@
ccMiner release 1.6.0-tpruvot (Mar 2015) - "ZR5, Pluck & WhirlX"
ccMiner release 1.6.1-tpruvot (Apr 2015) - "Skein512 Chainsaw"
---------------------------------------------------------------
***************************************************************
If you find this tool useful and like to support its continued
If you find this tool useful and like to support its continuous
development, then consider a donation.
tpruvot@github:
BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
BTC : 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
DRK : XeVrkPrWB7pDbdFLfKhF1Z3xpqhsx6wkH3
NEOS : NaEcVrdzoCWHUYXb7X8QoafoKS9UV69Yk4
XST : S9TqZucWgT6ajZLDBxQnHUtmkotCEHn9z9
ZRC : ZEcubH2xp2mpuwxMjy7wZThr5AzLGu3mqT
DJM34:
BTC donation address: 1NENYmxwZGHsKFmyjTc5WferTn5VTFb7Ze
@ -56,7 +55,7 @@ that the most of our comments are in german.
>>> Command Line Interface <<<
This code is based on the pooler cpuminer 2.3.2 release and inherits
This code is based on the pooler cpuminer and inherits
its command line interface and options.
-a, --algo=ALGO specify the algorithm to use
@ -83,7 +82,6 @@ its command line interface and options.
s3 use to mine 1coin
skein use to mine Skeincoin
skein2 use to mine Woodcoin
whirl use to mine Whirlcoin
whirlpoolx use to mine Vanillacoin
x11 use to mine DarkCoin
x14 use to mine X14Coin
@ -113,20 +111,23 @@ its command line interface and options.
-T, --timeout=N network timeout, in seconds (default: 270)
-s, --scantime=N upper bound on time spent scanning current work when
long polling is unavailable, in seconds (default: 5)
-n, --ndevs list cuda devices
-N, --statsavg number of samples used to display hashrate (default: 30)
--no-gbt disable getblocktemplate support (height check in solo)
--no-longpoll disable X-Long-Polling support
--no-stratum disable X-Stratum support
-q, --quiet disable per-thread hashmeter output
--no-color disable colored output
-D, --debug enable debug output
-P, --protocol-dump verbose dump of protocol-level activities
-b, --api-bind IP/Port for the miner API (default: 127.0.0.1:4068)
--api-remote Allow remote control
-B, --background run the miner in the background
--benchmark run in offline benchmark mode
--cputest debug hashes from cpu algorithms
--cpu-affinity set process affinity to specific cpu core(s) mask
--cpu-priority set process priority (default: 0 idle, 2 normal to 5 highest)
-c, --config=FILE load a JSON-format configuration file
--no-color disable colored console output
-V, --version display version information and exit
-h, --help display this help text and exit
@ -189,6 +190,9 @@ features.
>>> RELEASE HISTORY <<<
Apr. 14th 2015 v1.6.1
Add the Double Skein Algo for Woodcoin
Mar. 27th 2015 v1.6.0
Add the ZR5 Algo for Ziftcoin
Implement Skeincoin algo (skein + sha)
@ -347,8 +351,8 @@ Christian Buchner, Christian H. (Germany): Initial CUDA implementation
djm34, tsiv, sp and klausT for cuda algos implementation and optimisation
Tanguy Pruvot : 750Ti tuning, blake, colors, general code cleanup/opts
API monitoring, linux Config/Makefile and vstudio stuff...
Tanguy Pruvot : 750Ti tuning, blake, colors, zr5, skein, general code cleanup
API monitoring, linux Config/Makefile and vstudio libs...
and also many thanks to anyone else who contributed to the original
cpuminer application (Jeff Garzik, pooler), it's original HVC-fork

View File

@ -263,7 +263,6 @@ Options:\n\
x14 X14\n\
x15 X15\n\
x17 X17 (peoplecurrency)\n\
whirl Whirlcoin (old whirlpool)\n\
whirlpoolx Vanilla coin\n\
zr5 ZR5 (ZiftrCoin)\n\
-d, --devices Comma separated list of CUDA devices to use.\n\
@ -1396,7 +1395,7 @@ static void *miner_thread(void *userdata)
max64 = max(minmax-1, max64);
}
// we can't scan more than uint capacity
// we can't scan more than uint32 capacity
max64 = min(UINT32_MAX, max64);
start_nonce = nonceptr[0];
@ -1412,6 +1411,11 @@ static void *miner_thread(void *userdata)
// todo: keep it rounded for gpu threads ?
if (unlikely(start_nonce > max_nonce)) {
// should not happen but seen in skein2 benchmark with 2 gpus
max_nonce = end_nonce = UINT32_MAX;
}
work.scanned_from = start_nonce;
nonceptr[0] = start_nonce;
@ -1537,6 +1541,7 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done);
break;
/* to be deleted */
case ALGO_WHIRLCOIN:
rc = scanhash_whc(thr_id, work.data, work.target,
max_nonce, &hashes_done);

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.6.1-git])
AC_INIT([ccminer], [1.6.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@ -686,7 +686,6 @@ void qubithash(void *state, const void *input);
void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input);
void s3hash(void *output, const void *input);
void wcoinhash(void *state, const void *input);
void whirlxHash(void *state, const void *input);
void x11hash(void *output, const void *input);
void x13hash(void *output, const void *input);

View File

@ -12,8 +12,7 @@
q[i+8] + ROTL64(q[i+9], 37) + q[i+10] + ROTL64(q[i+11], 43) + \
q[i+12] + ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15])
static __constant__ uint64_t d_constMem[16];
static uint64_t h_constMem[16] = {
static __constant__ uint64_t d_constMem[16] = {
SPH_C64(0x8081828384858687),
SPH_C64(0x88898A8B8C8D8E8F),
SPH_C64(0x9091929394959697),

View File

@ -3,9 +3,7 @@
* by tpruvot@github - 2015
*/
extern "C" {
#include "sph/sph_skein.h"
}
#include "miner.h"
#include "cuda_helper.h"
@ -101,70 +99,10 @@ void sha256_transform_gpu(uint32_t *state, uint32_t *message)
}
/* 3. Mix. */
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
#pragma unroll
for (int i = 0; i < 64; i++) {
RNDr(S, W, i);
}
for (int i = 0; i < 8; i++)
state[i] += S[i];
@ -408,8 +346,8 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata,
const uint32_t first_nonce = pdata[19];
const int swap = 1;
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, (max_nonce - first_nonce));
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x07;

View File

@ -1,6 +1,9 @@
extern "C" {
/**
* SKEIN512 80 + SKEIN512 64 (Woodcoin)
* by tpruvot@github - 2015
*/
#include "sph/sph_skein.h"
}
#include "miner.h"
#include "cuda_helper.h"
@ -8,21 +11,19 @@ extern "C" {
static uint32_t *d_hash[MAX_GPUS];
extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern "C" void skein2hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hash[16];
sph_skein512_context ctx_skein;
uint32_t hash[16];
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, input, 80);
sph_skein512_close(&ctx_skein, hash);
//applog_hash((uchar*)hash);
//applog_hash((uchar*)&hash[8]);
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, hash, 64);
sph_skein512_close(&ctx_skein, hash);
@ -30,21 +31,6 @@ extern "C" void skein2hash(void *output, const void *input)
memcpy(output, hash, 32);
}
#ifdef _DEBUG
#define TRACE(algo) { \
if (max_nonce == 1 && pdata[19] <= 1) { \
uint32_t* debugbuf = NULL; \
cudaMallocHost(&debugbuf, 16*sizeof(uint32_t)); \
cudaMemcpy(debugbuf, d_hash[thr_id], 16*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
printf("SK2 %s %08x %08x %08x %08x...\n", algo, \
swab32(debugbuf[0]), swab32(debugbuf[1]), swab32(debugbuf[2]), swab32(debugbuf[3])); \
cudaFree(debugbuf); \
} \
}
#else
#define TRACE(algo) {}
#endif
static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
@ -53,25 +39,25 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, (max_nonce - first_nonce));
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0FFF;
((uint32_t*)ptarget)[7] = 0;
if (!init[thr_id])
{
cudaDeviceReset();
cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64UL * throughput));
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
skein512_cpu_setBlock_80((void*)endiandata);
@ -81,24 +67,22 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
int order = 0;
// Hash with CUDA
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("80:");
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 0);
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("64:");
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
{
uint32_t vhash64[8];
uint32_t _ALIGN(64) vhash64[8];
endiandata[19] = foundNonce;
skein2hash(vhash64, endiandata);
#define Htarg ptarget[7]
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) {
if (!opt_quiet)
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", device_map[thr_id], swab32(secNonce));
@ -113,10 +97,13 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata,
}
}
if (((uint64_t) throughput + pdata[19]) > max_nonce) {
break;
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@ -1797,9 +1797,6 @@ void print_hash_tests(void)
s3hash(&hash[0], &buf[0]);
printpfx("S3", hash);
wcoinhash(&hash[0], &buf[0]);
printpfx("whirl", hash);
whirlxHash(&hash[0], &buf[0]);
printpfx("whirlpoolx", hash);

View File

@ -131,7 +131,7 @@ extern "C" void x11hash(void *output, const void *input)
uint32_t* debugbuf = NULL; \
cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
printf("%s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
swab32(debugbuf[2]), swab32(debugbuf[3])); \
cudaFreeHost(debugbuf); \
} \
@ -209,7 +209,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("simd :");
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo X11 =>");
TRACE("echo => ");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)