diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index aee56c9..174a30c 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -487,7 +487,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non if (opt_benchmark) { targetHigh = 0x1ULL << 32; - ptarget[6] = swab32(0x00ff); + ptarget[6] = swab32(0xff); } if (!init[thr_id]) @@ -519,9 +519,9 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non if (foundNonce != UINT32_MAX) { uint32_t vhashcpu[8]; - uint32_t Htarg = (uint32_t)targetHigh; + uint32_t Htarg = ptarget[6]; - for (int k=0; k < 19; k++) + for (int k=16; k < 19; k++) be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[19], foundNonce); diff --git a/Algo256/decred.cu b/Algo256/decred.cu new file mode 100644 index 0000000..9f59eec --- /dev/null +++ b/Algo256/decred.cu @@ -0,0 +1,443 @@ +/** + * Blake-256 Decred 180-Bytes input Cuda Kernel (Tested on SM 5/5.2) + * + * Tanguy Pruvot - Feb 2016 + */ + +#include +#include + +#include + +extern "C" { +#include +} + +/* threads per block */ +#define TPB 256 + +/* hash by cpu with blake 256 */ +extern "C" void decred_hash(void *output, const void *input) +{ + sph_blake256_context ctx; + + sph_blake256_set_rounds(14); + + sph_blake256_init(&ctx); + sph_blake256(&ctx, input, 180); + sph_blake256_close(&ctx, output); +} + +#include + +#ifdef __INTELLISENSE__ +#define __byte_perm(x, y, b) x +#endif + +__constant__ uint32_t _ALIGN(4) d_data[24]; + +/* 8 adapters max */ +static uint32_t *d_resNonce[MAX_GPUS]; +static uint32_t *h_resNonce[MAX_GPUS]; + +/* max count of found nonces in one call */ +#define NBN 1 +#if NBN > 1 +static uint32_t extra_results[NBN] = { UINT32_MAX }; +#endif + +/* ############################################################################################################################### */ + +#define GSPREC(a,b,c,d,x,y) { \ + v[a] += (m[x] ^ c_u256[y]) + v[b]; \ + v[d] = __byte_perm(v[d] ^ v[a], 0, 0x1032); \ + v[c] += v[d]; \ + v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \ + v[a] += (m[y] ^ c_u256[x]) + v[b]; \ + v[d] = __byte_perm(v[d] ^ v[a], 0, 0x0321); \ + v[c] += v[d]; \ + v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \ +} + +__device__ __forceinline__ +void blake256_compress_14(uint32_t *h, const uint32_t nonce, const uint32_t T0) +{ + uint32_t v[16]; + + #pragma unroll 8 + for(uint32_t i = 0; i < 8; i++) + v[i] = h[i]; + + const uint32_t c_u256[16] = { + 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917 + }; + + v[ 8] = c_u256[0]; + v[ 9] = c_u256[1]; + v[10] = c_u256[2]; + v[11] = c_u256[3]; + + v[12] = c_u256[4] ^ T0; + v[13] = c_u256[5] ^ T0; + v[14] = c_u256[6]; + v[15] = c_u256[7]; + + uint32_t m[16]; + + m[0] = d_data[8]; + m[1] = d_data[9]; + m[2] = d_data[10]; + m[3] = nonce; + + #pragma unroll + for (uint32_t i = 4; i < 16; i++) { + m[i] = d_data[i+8U]; + } + + // round 1 + GSPREC(0, 4, 0x8, 0xC, 0, 1); + GSPREC(1, 5, 0x9, 0xD, 2, 3); + GSPREC(2, 6, 0xA, 0xE, 4, 5); + GSPREC(3, 7, 0xB, 0xF, 6, 7); + GSPREC(0, 5, 0xA, 0xF, 8, 9); + GSPREC(1, 6, 0xB, 0xC, 10, 11); + GSPREC(2, 7, 0x8, 0xD, 12, 13); + GSPREC(3, 4, 0x9, 0xE, 14, 15); + // round 2 + GSPREC(0, 4, 0x8, 0xC, 14, 10); + GSPREC(1, 5, 0x9, 0xD, 4, 8); + GSPREC(2, 6, 0xA, 0xE, 9, 15); + GSPREC(3, 7, 0xB, 0xF, 13, 6); + GSPREC(0, 5, 0xA, 0xF, 1, 12); + GSPREC(1, 6, 0xB, 0xC, 0, 2); + GSPREC(2, 7, 0x8, 0xD, 11, 7); + GSPREC(3, 4, 0x9, 0xE, 5, 3); + // round 3 + GSPREC(0, 4, 0x8, 0xC, 11, 8); + GSPREC(1, 5, 0x9, 0xD, 12, 0); + GSPREC(2, 6, 0xA, 0xE, 5, 2); + GSPREC(3, 7, 0xB, 0xF, 15, 13); + GSPREC(0, 5, 0xA, 0xF, 10, 14); + GSPREC(1, 6, 0xB, 0xC, 3, 6); + GSPREC(2, 7, 0x8, 0xD, 7, 1); + GSPREC(3, 4, 0x9, 0xE, 9, 4); + // round 4 + GSPREC(0, 4, 0x8, 0xC, 7, 9); + GSPREC(1, 5, 0x9, 0xD, 3, 1); + GSPREC(2, 6, 0xA, 0xE, 13, 12); + GSPREC(3, 7, 0xB, 0xF, 11, 14); + GSPREC(0, 5, 0xA, 0xF, 2, 6); + GSPREC(1, 6, 0xB, 0xC, 5, 10); + GSPREC(2, 7, 0x8, 0xD, 4, 0); + GSPREC(3, 4, 0x9, 0xE, 15, 8); + // round 5 + GSPREC(0, 4, 0x8, 0xC, 9, 0); + GSPREC(1, 5, 0x9, 0xD, 5, 7); + GSPREC(2, 6, 0xA, 0xE, 2, 4); + GSPREC(3, 7, 0xB, 0xF, 10, 15); + GSPREC(0, 5, 0xA, 0xF, 14, 1); + GSPREC(1, 6, 0xB, 0xC, 11, 12); + GSPREC(2, 7, 0x8, 0xD, 6, 8); + GSPREC(3, 4, 0x9, 0xE, 3, 13); + // round 6 + GSPREC(0, 4, 0x8, 0xC, 2, 12); + GSPREC(1, 5, 0x9, 0xD, 6, 10); + GSPREC(2, 6, 0xA, 0xE, 0, 11); + GSPREC(3, 7, 0xB, 0xF, 8, 3); + GSPREC(0, 5, 0xA, 0xF, 4, 13); + GSPREC(1, 6, 0xB, 0xC, 7, 5); + GSPREC(2, 7, 0x8, 0xD, 15,14); + GSPREC(3, 4, 0x9, 0xE, 1, 9); + // round 7 + GSPREC(0, 4, 0x8, 0xC, 12, 5); + GSPREC(1, 5, 0x9, 0xD, 1, 15); + GSPREC(2, 6, 0xA, 0xE, 14,13); + GSPREC(3, 7, 0xB, 0xF, 4, 10); + GSPREC(0, 5, 0xA, 0xF, 0, 7); + GSPREC(1, 6, 0xB, 0xC, 6, 3); + GSPREC(2, 7, 0x8, 0xD, 9, 2); + GSPREC(3, 4, 0x9, 0xE, 8, 11); + // round 8 + GSPREC(0, 4, 0x8, 0xC, 13,11); + GSPREC(1, 5, 0x9, 0xD, 7, 14); + GSPREC(2, 6, 0xA, 0xE, 12, 1); + GSPREC(3, 7, 0xB, 0xF, 3, 9); + GSPREC(0, 5, 0xA, 0xF, 5, 0); + GSPREC(1, 6, 0xB, 0xC, 15, 4); + GSPREC(2, 7, 0x8, 0xD, 8, 6); + GSPREC(3, 4, 0x9, 0xE, 2, 10); + // round 9 + GSPREC(0, 4, 0x8, 0xC, 6, 15); + GSPREC(1, 5, 0x9, 0xD, 14, 9); + GSPREC(2, 6, 0xA, 0xE, 11, 3); + GSPREC(3, 7, 0xB, 0xF, 0, 8); + GSPREC(0, 5, 0xA, 0xF, 12, 2); + GSPREC(1, 6, 0xB, 0xC, 13, 7); + GSPREC(2, 7, 0x8, 0xD, 1, 4); + GSPREC(3, 4, 0x9, 0xE, 10, 5); + // round 10 + GSPREC(0, 4, 0x8, 0xC, 10, 2); + GSPREC(1, 5, 0x9, 0xD, 8, 4); + GSPREC(2, 6, 0xA, 0xE, 7, 6); + GSPREC(3, 7, 0xB, 0xF, 1, 5); + GSPREC(0, 5, 0xA, 0xF, 15,11); + GSPREC(1, 6, 0xB, 0xC, 9, 14); + GSPREC(2, 7, 0x8, 0xD, 3, 12); + GSPREC(3, 4, 0x9, 0xE, 13, 0); + // round 11 + GSPREC(0, 4, 0x8, 0xC, 0, 1); + GSPREC(1, 5, 0x9, 0xD, 2, 3); + GSPREC(2, 6, 0xA, 0xE, 4, 5); + GSPREC(3, 7, 0xB, 0xF, 6, 7); + GSPREC(0, 5, 0xA, 0xF, 8, 9); + GSPREC(1, 6, 0xB, 0xC, 10,11); + GSPREC(2, 7, 0x8, 0xD, 12,13); + GSPREC(3, 4, 0x9, 0xE, 14,15); + // round 12 + GSPREC(0, 4, 0x8, 0xC, 14,10); + GSPREC(1, 5, 0x9, 0xD, 4, 8); + GSPREC(2, 6, 0xA, 0xE, 9, 15); + GSPREC(3, 7, 0xB, 0xF, 13, 6); + GSPREC(0, 5, 0xA, 0xF, 1, 12); + GSPREC(1, 6, 0xB, 0xC, 0, 2); + GSPREC(2, 7, 0x8, 0xD, 11, 7); + GSPREC(3, 4, 0x9, 0xE, 5, 3); + // round 13 + GSPREC(0, 4, 0x8, 0xC, 11, 8); + GSPREC(1, 5, 0x9, 0xD, 12, 0); + GSPREC(2, 6, 0xA, 0xE, 5, 2); + GSPREC(3, 7, 0xB, 0xF, 15,13); + GSPREC(0, 5, 0xA, 0xF, 10,14); + GSPREC(1, 6, 0xB, 0xC, 3, 6); + GSPREC(2, 7, 0x8, 0xD, 7, 1); + GSPREC(3, 4, 0x9, 0xE, 9, 4); + // round 14 + GSPREC(0, 4, 0x8, 0xC, 7, 9); + GSPREC(1, 5, 0x9, 0xD, 3, 1); + GSPREC(2, 6, 0xA, 0xE, 13,12); + GSPREC(3, 7, 0xB, 0xF, 11,14); + GSPREC(0, 5, 0xA, 0xF, 2, 6); + GSPREC(1, 6, 0xB, 0xC, 5, 10); + GSPREC(2, 7, 0x8, 0xD, 4, 0); + //GSPREC(3, 4, 0x9, 0xE, 15, 8); + + v[3] += (m[15] ^ c_u256[8]) + v[4]; + v[14] = __byte_perm(v[14] ^ v[3], 0, 0x1032); + v[9] += v[14]; \ + v[4] = SPH_ROTR32(v[4] ^ v[9], 12); + v[3] += (m[8] ^ c_u256[15]) + v[4]; + v[14] = __byte_perm(v[14] ^ v[3], 0, 0x0321); + + // only compute h6 & 7 + h[6] ^= v[6] ^ v[14]; + h[7] ^= v[7] ^ v[15]; +} + +/* ############################################################################################################################### */ + +__global__ +void blake256_gpu_hash_nonce(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonce, const uint64_t highTarget) +{ + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + uint32_t h[8]; + + #pragma unroll + for(int i=0; i < 8; i++) { + h[i] = d_data[i]; + } + + // ------ Close: Last 52/64 bytes ------ + + blake256_compress_14(h, nonce, (180U*8U)); + + if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) { +#if NBN == 2 + if (resNonce[0] != UINT32_MAX) + resNonce[1] = nonce; + else + resNonce[0] = nonce; +#else + resNonce[0] = nonce; +#endif + } + } +} + +__host__ +static uint32_t decred_cpu_hash_nonce(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget) +{ + uint32_t result = UINT32_MAX; + + dim3 grid((threads + TPB-1)/TPB); + dim3 block(TPB); + + /* Check error on Ctrl+C or kill to prevent segfaults on exit */ + if (cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)) != cudaSuccess) + return result; + + blake256_gpu_hash_nonce <<>> (threads, startNonce, d_resNonce[thr_id], highTarget); + + if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { + result = h_resNonce[thr_id][0]; +#if NBN > 1 + for (int n=0; n < (NBN-1); n++) + extra_results[n] = h_resNonce[thr_id][n+1]; +#endif + } + return result; +} + +__host__ +static void decred_midstate_128(uint32_t *output, const uint32_t *input) +{ + sph_blake256_context ctx; + + sph_blake256_set_rounds(14); + + sph_blake256_init(&ctx); + sph_blake256(&ctx, input, 128); + + memcpy(output, (void*)ctx.H, 32); +} + +__host__ +void decred_cpu_setBlock_52(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget) +{ + uint32_t _ALIGN(64) data[24]; + memcpy(data, midstate, 32); + // pre swab32 + for (int i=0; i<13; i++) + data[8+i] = swab32(penddata[i]); + data[21] = 0x80000001; + data[22] = 0; + data[23] = 0x000005a0; + CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_data, data, 32 + 64, 0, cudaMemcpyHostToDevice)); +} + +/* ############################################################################################################################### */ + +static bool init[MAX_GPUS] = { 0 }; + +// nonce position is different in decred +#define DCR_NONCE_OFT32 35 + +extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t _ALIGN(64) endiandata[48]; + uint32_t _ALIGN(64) midstate[8]; + + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + uint32_t *pnonce = &pdata[DCR_NONCE_OFT32]; + + const uint32_t first_nonce = *pnonce; + uint64_t targetHigh = ((uint64_t*)ptarget)[3]; + + int dev_id = device_map[thr_id]; + int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 29 : 25; + if (device_sm[dev_id] < 350) intensity = 22; + + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + + int rc = 0; + + if (opt_benchmark) { + targetHigh = 0x1ULL << 32; + ptarget[6] = swab32(0xff); + } + + if (!init[thr_id]) + { + cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage (linux) + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); + CUDA_LOG_ERROR(); + } + + CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); + CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); + init[thr_id] = true; + } + + memcpy(endiandata, pdata, 180); + decred_midstate_128(midstate, endiandata); + decred_cpu_setBlock_52(&pdata[32], midstate, ptarget); + + do { + // GPU HASH + uint32_t foundNonce = decred_cpu_hash_nonce(thr_id, throughput, (*pnonce), targetHigh); + + if (foundNonce != UINT32_MAX) + { + uint32_t vhashcpu[8]; + uint32_t Htarg = ptarget[6]; + + be32enc(&endiandata[DCR_NONCE_OFT32], foundNonce); + decred_hash(vhashcpu, endiandata); + + if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) + { + rc = 1; + work_set_target_ratio(work, vhashcpu); + *hashes_done = (*pnonce) - first_nonce + throughput; + work->nonces[0] = *pnonce = swab32(foundNonce); +#if NBN > 1 + if (extra_results[0] != UINT32_MAX) { + be32enc(&endiandata[DCR_NONCE_OFT32], extra_results[0]); + decred_hash(vhashcpu, endiandata); + if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { + work->nonces[1] = swab32(extra_results[0]); + if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio) { + work_set_target_ratio(work, vhashcpu); + xchg(work->nonces[1], *pnonce); + } + rc = 2; + } + extra_results[0] = UINT32_MAX; + } +#endif + return rc; + } + else if (opt_debug) { + applog_hash(ptarget); + applog_compare_hash(vhashcpu, ptarget); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + } + } + + *pnonce += throughput; + + } while (!work_restart[thr_id].restart && max_nonce > (uint64_t)throughput + (*pnonce)); + + *hashes_done = (*pnonce) - first_nonce; + + MyStreamSynchronize(NULL, 0, dev_id); + return rc; +} + +// cleanup +extern "C" void free_decred(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaDeviceSynchronize(); + + cudaFreeHost(h_resNonce[thr_id]); + cudaFree(d_resNonce[thr_id]); + + init[thr_id] = false; + + cudaDeviceSynchronize(); +} + diff --git a/Makefile.am b/Makefile.am index c37dabd..b2a0290 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,7 +36,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ lyra2/lyra2REv2.cu lyra2/cuda_lyra2v2.cu \ Algo256/cuda_bmw256.cu Algo256/cuda_cubehash256.cu \ Algo256/cuda_blake256.cu Algo256/cuda_groestl256.cu Algo256/cuda_keccak256.cu Algo256/cuda_skein256.cu \ - Algo256/blake256.cu Algo256/keccak256.cu \ + Algo256/blake256.cu Algo256/decred.cu Algo256/keccak256.cu \ Algo256/bmw.cu Algo256/cuda_bmw.cu \ JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \ JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \ diff --git a/algos.h b/algos.h index 0a49ca4..a0cefb0 100644 --- a/algos.h +++ b/algos.h @@ -10,6 +10,7 @@ enum sha_algos { ALGO_BMW, ALGO_C11, ALGO_DEEP, + ALGO_DECRED, ALGO_DMD_GR, ALGO_FRESH, ALGO_FUGUE256, /* Fugue256 */ @@ -55,6 +56,7 @@ static const char *algo_names[] = { "bmw", "c11", "deep", + "decred", "dmd-gr", "fresh", "fugue256", diff --git a/bench.cpp b/bench.cpp index d5cce84..a9998ea 100644 --- a/bench.cpp +++ b/bench.cpp @@ -47,6 +47,7 @@ void algo_free_all(int thr_id) free_blake256(thr_id); free_bmw(thr_id); free_c11(thr_id); + free_decred(thr_id); free_deep(thr_id); free_keccak256(thr_id); free_fresh(thr_id); diff --git a/ccminer.cpp b/ccminer.cpp index cf2884c..09b1c94 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -546,6 +546,7 @@ static void calc_network_diff(struct work *work) // sample for diff 43.281 : 1c05ea29 // todo: endian reversed on longpoll could be zr5 specific... uint32_t nbits = have_longpoll ? work->data[18] : swab32(work->data[18]); + if (opt_algo == ALGO_DECRED) nbits = work->data[29]; uint32_t bits = (nbits & 0xffffff); int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28 @@ -568,16 +569,20 @@ static bool work_decode(const json_t *val, struct work *work) int i; switch (opt_algo) { + case ALGO_DECRED: + data_size = 192; + adata_sz = 180/4; + break; case ALGO_NEOSCRYPT: case ALGO_ZR5: data_size = 80; + adata_sz = data_size / 4; break; default: - data_size = 128; // sizeof(work->data); + data_size = 128; + adata_sz = data_size / 4; } - adata_sz = data_size / 4; // sizeof(uint32_t); - if (!jobj_binary(val, "data", work->data, data_size)) { json_t *obj = json_object_get(val, "data"); int len = obj ? (int) strlen(json_string_value(obj)) : 0; @@ -647,20 +652,30 @@ static bool work_decode(const json_t *val, struct work *work) } } - json_t *jr = json_object_get(val, "noncerange"); - if (jr) { - const char * hexstr = json_string_value(jr); - if (likely(hexstr)) { - // never seen yet... - hex2bin((uchar*)work->noncerange.u64, hexstr, 8); - applog(LOG_DEBUG, "received noncerange: %08x-%08x", - work->noncerange.u32[0], work->noncerange.u32[1]); - } - } - /* use work ntime as job id (solo-mining) */ cbin2hex(work->job_id, (const char*)&work->data[17], 4); + if (opt_algo == ALGO_DECRED) { + // some random extradata to make it unique + work->data[36] = (rand()*4); + work->data[37] = (rand()*4) << 8; + // required for the longpoll pool block info... + work->height = work->data[32]; + if (!have_longpoll && work->height > net_blocks + 1) { + char netinfo[64] = { 0 }; + if (opt_showdiff && net_diff > 0.) { + if (net_diff != work->targetdiff) + sprintf(netinfo, ", diff %.3f, pool %.1f", net_diff, work->targetdiff); + else + sprintf(netinfo, ", diff %.3f", net_diff); + } + applog(LOG_BLUE, "%s block %d%s", + algo_names[opt_algo], work->height, netinfo); + net_blocks = work->height - 1; + } + cbin2hex(work->job_id, (const char*)&work->data[34], 4); + } + return true; } @@ -729,10 +744,10 @@ static int share_result(int result, int pooln, double sharediff, const char *rea static bool submit_upstream_work(CURL *curl, struct work *work) { + char s[512]; struct pool_infos *pool = &pools[work->pooln]; json_t *val, *res, *reason; bool stale_work = false; - char s[384]; /* discard if a newer block was received */ stale_work = work->height && work->height < g_work.height; @@ -776,6 +791,8 @@ static bool submit_upstream_work(CURL *curl, struct work *work) be32enc(&ntime, work->data[17]); be32enc(&nonce, work->data[19]); break; + case ALGO_DECRED: + break; case ALGO_BLAKE: case ALGO_BLAKECOIN: case ALGO_BMW: @@ -852,6 +869,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work) if (opt_algo == ALGO_ZR5) { data_size = 80; adata_sz = 20; } + else if (opt_algo == ALGO_DECRED) { + data_size = 192; adata_sz = 180/4; + } if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR) { for (int i = 0; i < adata_sz; i++) @@ -971,7 +991,7 @@ static bool get_mininginfo(CURL *curl, struct work *work) struct pool_infos *pool = &pools[work->pooln]; int curl_err = 0; - if (have_stratum || !allow_mininginfo) + if (have_stratum || have_longpoll || !allow_mininginfo) return false; json_t *val = json_rpc_call_pool(curl, pool, info_req, false, false, &curl_err); @@ -1223,8 +1243,12 @@ bool get_work(struct thr_info *thr, struct work *work) memset(work->data, 0x55, 76); //work->data[17] = swab32((uint32_t)time(NULL)); memset(work->data + 19, 0x00, 52); - work->data[20] = 0x80000000; - work->data[31] = 0x00000280; + if (opt_algo == ALGO_DECRED) { + memset(&work->data[35], 0x00, 52); + } else { + work->data[20] = 0x80000000; + work->data[31] = 0x00000280; + } memset(work->target, 0x00, sizeof(work->target)); return true; } @@ -1358,8 +1382,14 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work) break; } - work->data[20] = 0x80000000; - work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280; + if (opt_algo == ALGO_DECRED) { + work->data[45] = 0x80000001; + work->data[46] = 0; + work->data[47] = 0x000005a0; + } else { + work->data[20] = 0x80000000; + work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280; + } // HeavyCoin (vote / reward) if (opt_algo == ALGO_HEAVY) { @@ -1554,7 +1584,7 @@ static void *miner_thread(void *userdata) uint64_t max64, minmax = 0x100000; // &work.data[19] - int wcmplen = 76; + int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76; int wcmpoft = 0; uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen); @@ -1633,9 +1663,16 @@ static void *miner_thread(void *userdata) #endif memcpy(&work, &g_work, sizeof(struct work)); nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr + if (opt_algo == ALGO_DECRED) nonceptr[0] = 0; } else nonceptr[0]++; //?? + if (opt_algo == ALGO_DECRED) { + end_nonce = 0xF0000000UL; + nonceptr[1] += 1; + nonceptr[2] |= thr_id; + } + pthread_mutex_unlock(&g_work_lock); // --benchmark [-a all] @@ -1751,6 +1788,7 @@ static void *miner_thread(void *userdata) break; case ALGO_BLAKE: case ALGO_BMW: + case ALGO_DECRED: case ALGO_WHIRLPOOLX: minmax = 0x40000000U; break; @@ -1839,6 +1877,9 @@ static void *miner_thread(void *userdata) case ALGO_C11: rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done); break; + case ALGO_DECRED: + rc = scanhash_decred(thr_id, &work, max_nonce, &hashes_done); + break; case ALGO_DEEP: rc = scanhash_deep(thr_id, &work, max_nonce, &hashes_done); break; @@ -2154,9 +2195,13 @@ longpoll_retry: if (net_diff > 0.) { sprintf(netinfo, ", diff %.3f", net_diff); } - if (opt_showdiff) + if (opt_showdiff) { sprintf(&netinfo[strlen(netinfo)], ", target %.3f", g_work.targetdiff); - applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo); + } + if (g_work.height) + applog(LOG_BLUE, "%s block %u%s", algo_names[opt_algo], g_work.height, netinfo); + else + applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo); } g_work_time = time(NULL); } @@ -3175,6 +3220,13 @@ int main(int argc, char *argv[]) cur_pooln = pool_get_first_valid(0); pool_switch(-1, cur_pooln); + if (opt_algo == ALGO_DECRED) { + allow_gbt = false; + want_stratum = have_stratum = false; + allow_mininginfo = false; + want_longpoll = true; + } + flags = !opt_benchmark && strncmp(rpc_url, "https:", 6) ? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL) : CURL_GLOBAL_ALL; @@ -3317,6 +3369,12 @@ int main(int argc, char *argv[]) /* real start of the stratum work */ if (want_stratum && have_stratum) { tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); + } else { + // hmm, weird on Multicoin.co + //char lpurl[512]; + //sprintf(lpurl, "%s/LP", rpc_url); + //if (opt_algo == ALGO_DECRED) + // tq_push(thr_info[longpoll_thr_id].q, strdup(lpurl)); } #ifdef USE_WRAPNVML diff --git a/ccminer.vcxproj b/ccminer.vcxproj index a9f6488..60f4330 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -408,6 +408,7 @@ --ptxas-options="-dlcm=cg" %(AdditionalOptions) true + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 81c61ec..cc67ed0 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -622,6 +622,9 @@ Source Files\CUDA + + Source Files\CUDA + Source Files\CUDA diff --git a/cpuminer-config.h b/cpuminer-config.h index 6cf335d..7798f7a 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -66,7 +66,7 @@ #define HAVE_STRING_H 1 /* Define to 1 if you have the header file. */ -/* #undef HAVE_SYSLOG_H */ +#define HAVE_SYSLOG_H 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_ENDIAN_H */ @@ -87,7 +87,7 @@ #define HAVE_UNISTD_H 1 /* Defined if libcurl supports AsynchDNS */ -/* #undef LIBCURL_FEATURE_ASYNCHDNS */ +#define LIBCURL_FEATURE_ASYNCHDNS 1 /* Defined if libcurl supports IDN */ #define LIBCURL_FEATURE_IDN 1 @@ -111,7 +111,7 @@ /* #undef LIBCURL_FEATURE_SSPI */ /* Defined if libcurl supports DICT */ -/* #undef LIBCURL_PROTOCOL_DICT */ +#define LIBCURL_PROTOCOL_DICT 1 /* Defined if libcurl supports FILE */ #define LIBCURL_PROTOCOL_FILE 1 @@ -126,31 +126,28 @@ #define LIBCURL_PROTOCOL_HTTP 1 /* Defined if libcurl supports HTTPS */ -/* #undef LIBCURL_PROTOCOL_HTTPS */ +#define LIBCURL_PROTOCOL_HTTPS 1 /* Defined if libcurl supports IMAP */ -/* #undef LIBCURL_PROTOCOL_IMAP */ +#define LIBCURL_PROTOCOL_IMAP 1 /* Defined if libcurl supports LDAP */ -/* #undef LIBCURL_PROTOCOL_LDAP */ +#define LIBCURL_PROTOCOL_LDAP 1 /* Defined if libcurl supports POP3 */ -/* #undef LIBCURL_PROTOCOL_POP3 */ +#define LIBCURL_PROTOCOL_POP3 1 /* Defined if libcurl supports RTSP */ -/* #undef LIBCURL_PROTOCOL_RTSP */ +#define LIBCURL_PROTOCOL_RTSP 1 /* Defined if libcurl supports SMTP */ -/* #undef LIBCURL_PROTOCOL_SMTP */ +#define LIBCURL_PROTOCOL_SMTP 1 /* Defined if libcurl supports TELNET */ -/* #undef LIBCURL_PROTOCOL_TELNET */ +#define LIBCURL_PROTOCOL_TELNET 1 /* Defined if libcurl supports TFTP */ -/* #undef LIBCURL_PROTOCOL_TFTP */ - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ +#define LIBCURL_PROTOCOL_TFTP 1 /* Name of package */ #define PACKAGE "ccminer" @@ -191,4 +188,4 @@ /* #undef curl_free */ /* Define to `unsigned int' if does not define. */ -//#define size_t unsigned int +/* #undef size_t */ diff --git a/miner.h b/miner.h index a2465c6..1ca1d8b 100644 --- a/miner.h +++ b/miner.h @@ -264,6 +264,7 @@ struct work; extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds); extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); @@ -305,6 +306,7 @@ void algo_free_all(int thr_id); extern void free_blake256(int thr_id); extern void free_bmw(int thr_id); extern void free_c11(int thr_id); +extern void free_decred(int thr_id); extern void free_deep(int thr_id); extern void free_keccak256(int thr_id); extern void free_fresh(int thr_id); @@ -536,7 +538,7 @@ extern void gpulog(int prio, int thr_id, const char *fmt, ...); void get_defconfig_path(char *out, size_t bufsize, char *argv0); extern void cbin2hex(char *out, const char *in, size_t len); extern char *bin2hex(const unsigned char *in, size_t len); -extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len); +extern bool hex2bin(void *output, const char *hexstr, size_t len); extern int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y); extern bool fulltest(const uint32_t *hash, const uint32_t *target); @@ -612,7 +614,7 @@ struct tx { }; struct work { - uint32_t data[32]; + uint32_t data[48]; uint32_t target[8]; uint32_t maxvote; @@ -625,6 +627,8 @@ struct work { uint64_t u64[1]; } noncerange; + uint32_t nonces[2]; + double targetdiff; double shareratio; double sharediff; @@ -761,13 +765,16 @@ void restart_threads(void); size_t time2str(char* buf, time_t timer); char* atime2str(time_t timer); -void applog_hash(unsigned char *hash); -void applog_compare_hash(unsigned char *hash, unsigned char *hash2); +void applog_hex(void *data, int len); +void applog_hash(void *hash); +void applog_hash64(void *hash); +void applog_compare_hash(void *hash, void *hash_ref); void print_hash_tests(void); void blake256hash(void *output, const void *input, int8_t rounds); void bmw_hash(void *state, const void *input); void c11hash(void *output, const void *input); +void decred_hash(void *state, const void *input); void deephash(void *state, const void *input); void luffa_hash(void *state, const void *input); void fresh_hash(void *state, const void *input); diff --git a/util.cpp b/util.cpp index db43beb..d448b96 100644 --- a/util.cpp +++ b/util.cpp @@ -724,9 +724,10 @@ char *bin2hex(const uchar *in, size_t len) return s; } -bool hex2bin(uchar *p, const char *hexstr, size_t len) +bool hex2bin(void *output, const char *hexstr, size_t len) { - char hex_byte[3]; + uchar *p = (uchar *) output; + char hex_byte[4]; char *ep; hex_byte[2] = '\0'; @@ -1791,8 +1792,9 @@ char* atime2str(time_t timer) } /* sprintf can be used in applog */ -static char* format_hash(char* buf, uchar *hash) +static char* format_hash(char* buf, uint8_t* h) { + uchar *hash = (uchar*) h; int len = 0; for (int i=0; i < 32; i += 4) { len += sprintf(buf+len, "%02x%02x%02x%02x ", @@ -1802,23 +1804,39 @@ static char* format_hash(char* buf, uchar *hash) } /* to debug diff in data */ -extern void applog_compare_hash(uchar *hash, uchar *hash2) +void applog_compare_hash(void *hash, void *hash_ref) { char s[256] = ""; int len = 0; + uchar* hash1 = (uchar*)hash; + uchar* hash2 = (uchar*)hash_ref; for (int i=0; i < 32; i += 4) { - const char *color = memcmp(hash+i, hash2+i, 4) ? CL_WHT : CL_GRY; + const char *color = memcmp(hash1+i, hash2+i, 4) ? CL_WHT : CL_GRY; len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color, - hash[i], hash[i+1], hash[i+2], hash[i+3]); + hash1[i], hash1[i+1], hash1[i+2], hash1[i+3]); s[len] = '\0'; } applog(LOG_DEBUG, "%s", s); } -extern void applog_hash(uchar *hash) +void applog_hash(void *hash) +{ + char s[128] = {'\0'}; + applog(LOG_DEBUG, "%s", format_hash(s, (uint8_t*)hash)); +} + +void applog_hash64(void *hash) { char s[128] = {'\0'}; - applog(LOG_DEBUG, "%s", format_hash(s, hash)); + char t[128] = {'\0'}; + applog(LOG_DEBUG, "%s %s", format_hash(s, (uint8_t*)hash), format_hash(t, &((uint8_t*)hash)[32])); +} + +void applog_hex(void *data, int len) +{ + char* hex = bin2hex((uchar*)data, len); + applog(LOG_DEBUG, "%s", hex); + free(hex); } #define printpfx(n,h) \ @@ -1865,7 +1883,7 @@ void do_gpu_tests(void) //scanhash_scrypt_jane(0, &work, NULL, 1, &done, &tv, &tv); memset(work.data, 0, sizeof(work.data)); - scanhash_sib(0, &work, 1, &done); + scanhash_decred(0, &work, 1, &done); free(work_restart); work_restart = NULL; @@ -1878,7 +1896,7 @@ void print_hash_tests(void) uchar *scratchbuf = NULL; char s[128] = {'\0'}; uchar hash[128]; - uchar buf[128]; + uchar buf[192]; // work space for scratchpad based algos scratchbuf = (uchar*)calloc(128, 1024); @@ -1900,6 +1918,10 @@ void print_hash_tests(void) c11hash(&hash[0], &buf[0]); printpfx("c11", hash); + memset(buf, 0, 180); + decred_hash(&hash[0], &buf[0]); + printpfx("decred", hash); + deephash(&hash[0], &buf[0]); printpfx("deep", hash);