GOSTCoin CUDA miner project, compatible with most nvidia cards, containing only gostd algo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

295 lines
8.2 KiB

/**
* Equihash solver interface for ccminer (compatible with linux and windows)
* Solver taken from nheqminer, by djeZo (and NiceHash)
* tpruvot - 2017 (GPL v3)
*/
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <stdexcept>
#include <vector>
#include <sph/sph_sha2.h>
#include "eqcuda.hpp"
#include "equihash.h" // equi_verify()
#include <miner.h>
// All solutions (BLOCK_HEADER_LEN + SOLSIZE_LEN + SOL_LEN) sha256d should be under the target
extern "C" void equi_hash(const void* input, void* output, int len)
{
uint8_t _ALIGN(64) hash0[32], hash1[32];
sph_sha256_context ctx_sha256;
sph_sha256_init(&ctx_sha256);
sph_sha256(&ctx_sha256, input, len);
sph_sha256_close(&ctx_sha256, hash0);
sph_sha256(&ctx_sha256, hash0, 32);
sph_sha256_close(&ctx_sha256, hash1);
memcpy(output, hash1, 32);
}
// input here is 140 for the header and 1344 for the solution (equi.cpp)
extern "C" int equi_verify_sol(void * const hdr, void * const sol)
{
bool res = equi_verify((uint8_t*) hdr, (uint8_t*) sol);
//applog_hex((void*)hdr, 140);
//applog_hex((void*)sol, 1344);
return res ? 1 : 0;
}
#include <cuda_helper.h>
//#define EQNONCE_OFFSET 30 /* 27:34 */
#define NONCE_OFT EQNONCE_OFFSET
static bool init[MAX_GPUS] = { 0 };
static int valid_sols[MAX_GPUS] = { 0 };
static uint8_t _ALIGN(64) data_sols[MAX_GPUS][MAXREALSOLS][1536] = { 0 }; // 140+3+1344 required
static eq_cuda_context_interface* solvers[MAX_GPUS] = { NULL };
static void CompressArray(const unsigned char* in, size_t in_len,
unsigned char* out, size_t out_len, size_t bit_len, size_t byte_pad)
{
assert(bit_len >= 8);
assert(8 * sizeof(uint32_t) >= 7 + bit_len);
size_t in_width = (bit_len + 7) / 8 + byte_pad;
assert(out_len == bit_len*in_len / (8 * in_width));
uint32_t bit_len_mask = (1UL << bit_len) - 1;
// The acc_bits least-significant bits of acc_value represent a bit sequence
// in big-endian order.
size_t acc_bits = 0;
uint32_t acc_value = 0;
size_t j = 0;
for (size_t i = 0; i < out_len; i++) {
// When we have fewer than 8 bits left in the accumulator, read the next
// input element.
if (acc_bits < 8) {
acc_value = acc_value << bit_len;
for (size_t x = byte_pad; x < in_width; x++) {
acc_value = acc_value | (
(
// Apply bit_len_mask across byte boundaries
in[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) & 0xFF)
) << (8 * (in_width - x - 1))); // Big-endian
}
j += in_width;
acc_bits += bit_len;
}
acc_bits -= 8;
out[i] = (acc_value >> acc_bits) & 0xFF;
}
}
#ifndef htobe32
#define htobe32(x) swab32(x)
#endif
static void EhIndexToArray(const u32 i, unsigned char* arr)
{
u32 bei = htobe32(i);
memcpy(arr, &bei, sizeof(u32));
}
static std::vector<unsigned char> GetMinimalFromIndices(std::vector<u32> indices, size_t cBitLen)
{
assert(((cBitLen + 1) + 7) / 8 <= sizeof(u32));
size_t lenIndices = indices.size()*sizeof(u32);
size_t minLen = (cBitLen + 1)*lenIndices / (8 * sizeof(u32));
size_t bytePad = sizeof(u32) - ((cBitLen + 1) + 7) / 8;
std::vector<unsigned char> array(lenIndices);
for (size_t i = 0; i < indices.size(); i++) {
EhIndexToArray(indices[i], array.data() + (i*sizeof(u32)));
}
std::vector<unsigned char> ret(minLen);
CompressArray(array.data(), lenIndices, ret.data(), minLen, cBitLen + 1, bytePad);
return ret;
}
// solver callbacks
static void cb_solution(int thr_id, const std::vector<uint32_t>& solutions, size_t cbitlen, const unsigned char *compressed_sol)
{
std::vector<unsigned char> nSolution;
if (!compressed_sol) {
nSolution = GetMinimalFromIndices(solutions, cbitlen);
} else {
gpulog(LOG_INFO, thr_id, "compressed_sol");
nSolution = std::vector<unsigned char>(1344);
for (size_t i = 0; i < cbitlen; i++)
nSolution[i] = compressed_sol[i];
}
int nsol = valid_sols[thr_id];
if (nsol < 0) nsol = 0;
if(nSolution.size() == 1344) {
// todo, only store solution data here...
le32enc(&data_sols[thr_id][nsol][140], 0x000540fd); // sol sz header
memcpy(&data_sols[thr_id][nsol][143], nSolution.data(), 1344);
valid_sols[thr_id] = nsol + 1;
}
}
static void cb_hashdone(int thr_id) {
if (!valid_sols[thr_id]) valid_sols[thr_id] = -1;
}
static bool cb_cancel(int thr_id) {
if (work_restart[thr_id].restart)
valid_sols[thr_id] = -1;
return work_restart[thr_id].restart;
}
extern "C" int scanhash_equihash(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t _ALIGN(64) endiandata[35];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[NONCE_OFT];
uint32_t nonce_increment = rand() & 0xFF; // nonce randomizer
struct timeval tv_start, tv_end, diff;
double secs, solps;
uint32_t soluce_count = 0;
if (opt_benchmark)
ptarget[7] = 0xfffff;
if (!init[thr_id]) {
try {
int mode = 1;
switch (mode) {
case 1:
solvers[thr_id] = new eq_cuda_context<CONFIG_MODE_1>(thr_id, device_map[thr_id]);
break;
#ifdef CONFIG_MODE_2
case 2:
solvers[thr_id] = new eq_cuda_context<CONFIG_MODE_2>(thr_id, device_map[thr_id]);
break;
#endif
#ifdef CONFIG_MODE_3
case 3:
solvers[thr_id] = new eq_cuda_context<CONFIG_MODE_3>(thr_id, device_map[thr_id]);
break;
#endif
default:
proper_exit(EXIT_CODE_SW_INIT_ERROR);
return -1;
}
size_t memSz = solvers[thr_id]->equi_mem_sz / (1024*1024);
gpus_intensity[thr_id] = (uint32_t) solvers[thr_id]->throughput;
api_set_throughput(thr_id, gpus_intensity[thr_id]);
gpulog(LOG_DEBUG, thr_id, "Allocated %u MB of context memory", (u32) memSz);
cuda_get_arch(thr_id);
init[thr_id] = true;
} catch (const std::exception & e) {
CUDA_LOG_ERROR();
gpulog(LOG_ERR, thr_id, "init: %s", e.what());
proper_exit(EXIT_CODE_CUDA_ERROR);
}
}
gettimeofday(&tv_start, NULL);
memcpy(endiandata, pdata, 140);
work->valid_nonces = 0;
do {
try {
valid_sols[thr_id] = 0;
solvers[thr_id]->solve(
(const char *) endiandata, (unsigned int) (140 - 32),
(const char *) &endiandata[27], (unsigned int) 32,
&cb_cancel, &cb_solution, &cb_hashdone
);
*hashes_done = soluce_count;
} catch (const std::exception & e) {
gpulog(LOG_WARNING, thr_id, "solver: %s", e.what());
free_equihash(thr_id);
sleep(1);
return -1;
}
if (valid_sols[thr_id] > 0)
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
uint8_t _ALIGN(64) full_data[140+3+1344] = { 0 };
uint8_t* sol_data = &full_data[140];
soluce_count += valid_sols[thr_id];
for (int nsol=0; nsol < valid_sols[thr_id]; nsol++)
{
memcpy(full_data, endiandata, 140);
memcpy(sol_data, &data_sols[thr_id][nsol][140], 1347);
equi_hash(full_data, vhash, 140+3+1344);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
bool valid = equi_verify_sol(endiandata, &sol_data[3]);
if (valid && work->valid_nonces < MAX_NONCES) {
work->valid_nonces++;
memcpy(work->data, endiandata, 140);
equi_store_work_solution(work, vhash, sol_data);
work->nonces[work->valid_nonces-1] = endiandata[NONCE_OFT];
pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1;
//applog_hex(vhash, 32);
//applog_hex(&work->data[27], 32);
goto out; // second solution storage not handled..
}
}
if (work->valid_nonces == MAX_NONCES) goto out;
}
if (work->valid_nonces)
goto out;
valid_sols[thr_id] = 0;
}
endiandata[NONCE_OFT] += nonce_increment;
} while (!work_restart[thr_id].restart);
out:
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
secs = (1.0 * diff.tv_sec) + (0.000001 * diff.tv_usec);
solps = (double)soluce_count / secs;
gpulog(LOG_DEBUG, thr_id, "%d solutions in %.2f s (%.2f Sol/s)", soluce_count, secs, solps);
// H/s
*hashes_done = soluce_count;
pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1;
return work->valid_nonces;
}
// cleanup
void free_equihash(int thr_id)
{
if (!init[thr_id])
return;
delete(solvers[thr_id]);
solvers[thr_id] = NULL;
init[thr_id] = false;
}
// mmm... viva c++ junk
void eq_cuda_context_interface::solve(const char *tequihash_header, unsigned int tequihash_header_len,
const char* nonce, unsigned int nonce_len,
fn_cancel cancelf, fn_solution solutionf, fn_hashdone hashdonef) { }
eq_cuda_context_interface::~eq_cuda_context_interface() { }