61ff92b5b4
fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs...
179 lines
4.5 KiB
Plaintext
179 lines
4.5 KiB
Plaintext
/**
|
|
* S3 Hash (Also called Triple S - Used by 1Coin)
|
|
*/
|
|
|
|
extern "C" {
|
|
#include "sph/sph_skein.h"
|
|
#include "sph/sph_shavite.h"
|
|
#include "sph/sph_simd.h"
|
|
}
|
|
|
|
#include "miner.h"
|
|
#include "cuda_helper.h"
|
|
#include "cuda_x11.h"
|
|
|
|
extern void x11_shavite512_setBlock_80(void *pdata);
|
|
extern void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
|
|
|
#include <stdint.h>
|
|
|
|
static uint32_t *d_hash[MAX_GPUS];
|
|
|
|
/* CPU HASH */
|
|
extern "C" void s3hash(void *output, const void *input)
|
|
{
|
|
sph_shavite512_context ctx_shavite;
|
|
sph_simd512_context ctx_simd;
|
|
sph_skein512_context ctx_skein;
|
|
|
|
unsigned char hash[64];
|
|
|
|
sph_shavite512_init(&ctx_shavite);
|
|
sph_shavite512(&ctx_shavite, input, 80);
|
|
sph_shavite512_close(&ctx_shavite, (void*) hash);
|
|
|
|
sph_simd512_init(&ctx_simd);
|
|
sph_simd512(&ctx_simd, (const void*) hash, 64);
|
|
sph_simd512_close(&ctx_simd, (void*) hash);
|
|
|
|
sph_skein512_init(&ctx_skein);
|
|
sph_skein512(&ctx_skein, (const void*) hash, 64);
|
|
sph_skein512_close(&ctx_skein, (void*) hash);
|
|
|
|
memcpy(output, hash, 32);
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
#define TRACE(algo) { \
|
|
if (max_nonce == 1 && pdata[19] <= 1) { \
|
|
uint32_t* debugbuf = NULL; \
|
|
cudaMallocHost(&debugbuf, 32); \
|
|
cudaMemcpy(debugbuf, d_hash[thr_id], 32, cudaMemcpyDeviceToHost); \
|
|
printf("S3 %s %08x %08x %08x %08x...%08x\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
|
|
swab32(debugbuf[2]), swab32(debugbuf[3]), swab32(debugbuf[7])); \
|
|
cudaFreeHost(debugbuf); \
|
|
} \
|
|
}
|
|
#else
|
|
#define TRACE(algo) {}
|
|
#endif
|
|
|
|
static bool init[MAX_GPUS] = { 0 };
|
|
|
|
/* Main S3 entry point */
|
|
extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
|
{
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t first_nonce = pdata[19];
|
|
int intensity = 20; // 256*256*8*2;
|
|
#ifdef WIN32
|
|
// reduce by one the intensity on windows
|
|
intensity--;
|
|
#endif
|
|
uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity);
|
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
|
|
|
if (opt_benchmark)
|
|
ptarget[7] = 0xF;
|
|
|
|
if (!init[thr_id])
|
|
{
|
|
cudaSetDevice(device_map[thr_id]);
|
|
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
|
cudaDeviceReset();
|
|
// reduce cpu usage
|
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
|
CUDA_LOG_ERROR();
|
|
}
|
|
|
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
|
|
|
x11_shavite512_cpu_init(thr_id, throughput);
|
|
x11_simd512_cpu_init(thr_id, throughput);
|
|
quark_skein512_cpu_init(thr_id, throughput);
|
|
|
|
cuda_check_cpu_init(thr_id, throughput);
|
|
|
|
init[thr_id] = true;
|
|
}
|
|
|
|
uint32_t endiandata[20];
|
|
for (int k=0; k < 20; k++)
|
|
be32enc(&endiandata[k], pdata[k]);
|
|
|
|
x11_shavite512_setBlock_80((void*)endiandata);
|
|
cuda_check_cpu_setTarget(ptarget);
|
|
|
|
do {
|
|
const uint32_t Htarg = ptarget[7];
|
|
uint32_t foundNonce;
|
|
int order = 0;
|
|
|
|
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
|
TRACE("shavite:");
|
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("simd :");
|
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
|
TRACE("skein :");
|
|
|
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
|
|
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
|
|
|
if (foundNonce != UINT32_MAX)
|
|
{
|
|
uint32_t vhash64[8];
|
|
be32enc(&endiandata[19], foundNonce);
|
|
s3hash(vhash64, endiandata);
|
|
|
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
|
int res = 1;
|
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
|
work_set_target_ratio(work, vhash64);
|
|
if (secNonce != 0) {
|
|
be32enc(&endiandata[19], secNonce);
|
|
s3hash(vhash64, endiandata);
|
|
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
|
|
work_set_target_ratio(work, vhash64);
|
|
pdata[21] = secNonce;
|
|
res++;
|
|
}
|
|
pdata[19] = foundNonce;
|
|
return res;
|
|
|
|
} else {
|
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
|
}
|
|
}
|
|
|
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
|
pdata[19] = max_nonce;
|
|
break;
|
|
}
|
|
|
|
pdata[19] += throughput;
|
|
|
|
} while (!work_restart[thr_id].restart);
|
|
|
|
*hashes_done = pdata[19] - first_nonce;
|
|
return 0;
|
|
}
|
|
|
|
// cleanup
|
|
extern "C" void free_s3(int thr_id)
|
|
{
|
|
if (!init[thr_id])
|
|
return;
|
|
|
|
cudaThreadSynchronize();
|
|
|
|
cudaFree(d_hash[thr_id]);
|
|
x11_simd512_cpu_free(thr_id);
|
|
|
|
cuda_check_cpu_free(thr_id);
|
|
init[thr_id] = false;
|
|
|
|
cudaDeviceSynchronize();
|
|
}
|