1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-10 14:57:53 +00:00
ccminer/x11/fresh.cu
Tanguy Pruvot 9eead77027 diff: show by default, rework shares diff storage
This will allow later more gpu candidates.

Note: This is an unfinished work, we keep the previous behavior for now
To finish this, all algos solutions should be migrated and submitted nonces attributes stored.
Its required to handle the different share diff per nonce and fix the possible solved count error (if 1/2 nonces is solved).
2016-09-27 09:03:24 +02:00

179 lines
5.3 KiB
Plaintext

/**
* Fresh algorithm
*/
extern "C" {
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
}
#include "miner.h"
#include "cuda_helper.h"
// to test gpu hash on a null buffer
#define NULLTEST 0
static uint32_t *d_hash[MAX_GPUS];
extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads);
extern void x11_shavite512_setBlock_80(void *pdata);
extern void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
// CPU Hash
extern "C" void fresh_hash(void *state, const void *input)
{
// shavite-simd-shavite-simd-echo
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_echo512_context ctx_echo;
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
#define hashA hash
#define hashB hash+64
memset(hash, 0, sizeof hash);
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, input, 80);
sph_shavite512_close(&ctx_shavite, hashA);
sph_simd512_init(&ctx_simd);
sph_simd512(&ctx_simd, hashA, 64);
sph_simd512_close(&ctx_simd, hashB);
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, hashB, 64);
sph_shavite512_close(&ctx_shavite, hashA);
sph_simd512_init(&ctx_simd);
sph_simd512(&ctx_simd, hashA, 64);
sph_simd512_close(&ctx_simd, hashB);
sph_echo512_init(&ctx_echo);
sph_echo512(&ctx_echo, hashB, 64);
sph_echo512_close(&ctx_echo, hashA);
memcpy(state, hash, 32);
}
static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0x00ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput + 4), -1);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
x11_shavite512_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget);
do {
uint32_t foundNonce;
int order = 0;
// GPU Hash
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
#if NULLTEST
uint32_t buf[8]; memset(buf, 0, sizeof buf);
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaThreadSynchronize());
print_hash((unsigned char*)buf); printf("\n");
#endif
*hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
fresh_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
fresh_hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
// cleanup
extern "C" void free_fresh(int thr_id)
{
if (!init[thr_id])
return;
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}