mirror of
https://github.com/GOSTSec/ccminer
synced 2025-03-13 05:51:19 +00:00
streebog: apply skunk improvements to veltor
This commit is contained in:
parent
6055af1fa9
commit
1e71dc5782
@ -1,4 +1,4 @@
|
||||
AC_INIT([ccminer], [2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])
|
||||
AC_INIT([ccminer], [2.2.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
@ -204,7 +204,7 @@ static void GOST_E12(const uint2 shared[8][256],uint2 *const __restrict__ K, uin
|
||||
__constant__ uint64_t target64[4];
|
||||
|
||||
__host__
|
||||
void skunk_set_target(uint32_t* ptarget)
|
||||
void skunk_streebog_set_target(uint32_t* ptarget)
|
||||
{
|
||||
cudaMemcpyToSymbol(target64, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
@ -23,12 +23,12 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t s
|
||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern void x13_fugue512_cpu_free(int thr_id);
|
||||
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
extern void streebog_set_target(const uint32_t* ptarget);
|
||||
extern void streebog_sm3_set_target(uint32_t* ptarget);
|
||||
extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
|
||||
// krnlx merged kernel (for high-end cards only)
|
||||
extern void skunk_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void skunk_set_target(uint32_t* ptarget);
|
||||
extern void skunk_streebog_set_target(uint32_t* ptarget);
|
||||
extern void skunk_setBlock_80(int thr_id, void *pdata);
|
||||
extern void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
|
||||
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
@ -117,10 +117,10 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
|
||||
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
|
||||
if (use_compat_kernels[thr_id]) {
|
||||
skein512_cpu_setBlock_80(endiandata);
|
||||
streebog_set_target(ptarget);
|
||||
streebog_sm3_set_target(ptarget);
|
||||
} else {
|
||||
skunk_setBlock_80(thr_id, endiandata);
|
||||
skunk_set_target(ptarget);
|
||||
skunk_streebog_set_target(ptarget);
|
||||
}
|
||||
|
||||
do {
|
||||
@ -129,7 +129,7 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
|
||||
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
} else {
|
||||
skunk_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
|
@ -806,10 +806,11 @@ void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash)
|
||||
#define T6(x) shared[6][x]
|
||||
#define T7(x) shared[7][x]
|
||||
|
||||
// Streebog final for Veltor and skunk on SM 3.x
|
||||
__constant__ uint64_t target64[4];
|
||||
|
||||
__host__
|
||||
void streebog_set_target(const uint32_t* ptarget)
|
||||
void streebog_sm3_set_target(uint32_t* ptarget)
|
||||
{
|
||||
cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice);
|
||||
}
|
||||
@ -995,7 +996,7 @@ void streebog_gpu_hash_64_final(uint64_t *g_hash, uint32_t* resNonce)
|
||||
}
|
||||
|
||||
__host__
|
||||
void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash,uint32_t* d_resNonce)
|
||||
void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash,uint32_t* d_resNonce)
|
||||
{
|
||||
dim3 grid((threads + TPB-1) / TPB);
|
||||
dim3 block(TPB);
|
||||
|
@ -10,11 +10,17 @@ extern "C" {
|
||||
#include "cuda_x11.h"
|
||||
|
||||
extern void skein512_cpu_setBlock_80(void *pdata);
|
||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
||||
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
extern void streebog_set_target(const uint32_t* ptarget);
|
||||
|
||||
// for SM3.x
|
||||
extern void streebog_sm3_set_target(uint32_t* ptarget);
|
||||
extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
|
||||
// for latest cards only
|
||||
extern void skunk_cpu_init(int thr_id, uint32_t threads);
|
||||
extern void skunk_streebog_set_target(uint32_t* ptarget);
|
||||
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
@ -23,7 +29,7 @@ extern void streebog_set_target(const uint32_t* ptarget);
|
||||
static uint32_t *d_hash[MAX_GPUS];
|
||||
static uint32_t *d_resNonce[MAX_GPUS];
|
||||
|
||||
// veltorcoin CPU Hash
|
||||
// veltor CPU Hash
|
||||
extern "C" void veltorhash(void *output, const void *input)
|
||||
{
|
||||
unsigned char _ALIGN(128) hash[128] = { 0 };
|
||||
@ -53,6 +59,7 @@ extern "C" void veltorhash(void *output, const void *input)
|
||||
}
|
||||
|
||||
static bool init[MAX_GPUS] = { 0 };
|
||||
static bool use_compat_kernels[MAX_GPUS] = { 0 };
|
||||
|
||||
extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
@ -80,7 +87,9 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
|
||||
}
|
||||
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
|
||||
|
||||
quark_skein512_cpu_init(thr_id, throughput);
|
||||
skunk_cpu_init(thr_id, throughput);
|
||||
use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500);
|
||||
|
||||
x11_shavite512_cpu_init(thr_id, throughput);
|
||||
|
||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||
@ -97,14 +106,20 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
|
||||
skein512_cpu_setBlock_80(endiandata);
|
||||
|
||||
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
|
||||
streebog_set_target(ptarget);
|
||||
if(use_compat_kernels[thr_id])
|
||||
streebog_sm3_set_target(ptarget);
|
||||
else
|
||||
skunk_streebog_set_target(ptarget);
|
||||
|
||||
do {
|
||||
int order = 0;
|
||||
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
|
||||
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
if(use_compat_kernels[thr_id])
|
||||
streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
else
|
||||
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
||||
|
||||
cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user