mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 00:44:15 +00:00
Add fresh algo (based on djm34 code)
Cleaned up and adapted to my changes (cputest added) Remove Makefile.in which should be in gitignore (Plz refresh it with ./config.sh to compile on linux)
This commit is contained in:
parent
d9ea5f72ce
commit
bc2eb75758
16
.gitignore
vendored
16
.gitignore
vendored
@ -1,5 +1,5 @@
|
||||
|
||||
minerd
|
||||
ccminer
|
||||
*.o
|
||||
|
||||
autom4te.cache
|
||||
@ -26,6 +26,9 @@ config.sub
|
||||
mingw32-config.cache
|
||||
|
||||
*/.dirstamp
|
||||
.DS_Store
|
||||
Desktop.ini
|
||||
Thumbs.db
|
||||
|
||||
*.iml
|
||||
|
||||
@ -33,5 +36,14 @@ Debug/
|
||||
Release/
|
||||
x64/Debug/
|
||||
x64/Release/
|
||||
ccminer.*.suo
|
||||
*.suo
|
||||
*.user
|
||||
|
||||
.settings/
|
||||
.project
|
||||
.metadata
|
||||
.classpath
|
||||
.loadpath
|
||||
.cproject
|
||||
.buildpath
|
||||
|
||||
|
@ -37,9 +37,9 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
|
||||
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
||||
sph/shabal.c sph/whirlpool.c \
|
||||
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
||||
x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu
|
||||
|
||||
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
||||
|
1488
Makefile.in
1488
Makefile.in
File diff suppressed because it is too large
Load Diff
@ -503,6 +503,12 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\fresh.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\simd_functions.cu">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
|
||||
|
@ -361,6 +361,9 @@
|
||||
<CudaCompile Include="x11\cuda_x11_simd512.cu">
|
||||
<Filter>Source Files\CUDA\x11</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\fresh.cu">
|
||||
<Filter>Source Files\CUDA\x11</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\x11.cu">
|
||||
<Filter>Source Files\CUDA\x11</Filter>
|
||||
</CudaCompile>
|
||||
|
10
cpu-miner.c
10
cpu-miner.c
@ -132,6 +132,7 @@ typedef enum {
|
||||
ALGO_JACKPOT,
|
||||
ALGO_QUARK,
|
||||
ALGO_ANIME,
|
||||
ALGO_FRESH,
|
||||
ALGO_NIST5,
|
||||
ALGO_X11,
|
||||
ALGO_X13,
|
||||
@ -149,6 +150,7 @@ static const char *algo_names[] = {
|
||||
"jackpot",
|
||||
"quark",
|
||||
"anime",
|
||||
"fresh",
|
||||
"nist5",
|
||||
"x11",
|
||||
"x13",
|
||||
@ -225,6 +227,7 @@ Options:\n\
|
||||
jackpot Jackpot hash\n\
|
||||
quark Quark hash\n\
|
||||
anime Animecoin hash\n\
|
||||
fresh Freshcoin hash (shavite 80)\n\
|
||||
nist5 NIST5 (TalkCoin) hash\n\
|
||||
x11 X11 (DarkCoin) hash\n\
|
||||
x13 X13 (MaruCoin) hash\n\
|
||||
@ -782,7 +785,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
|
||||
if (opt_algo == ALGO_JACKPOT)
|
||||
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
|
||||
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR)
|
||||
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR || opt_algo == ALGO_FRESH)
|
||||
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
|
||||
else
|
||||
diff_to_target(work->target, sctx->job.diff / opt_difficulty);
|
||||
@ -918,6 +921,11 @@ static void *miner_thread(void *userdata)
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_FRESH:
|
||||
rc = scanhash_fresh(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_NIST5:
|
||||
rc = scanhash_nist5(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
|
5
miner.h
5
miner.h
@ -234,6 +234,10 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
@ -355,6 +359,7 @@ void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
|
||||
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
|
||||
void groestlhash(void *state, const void *input);
|
||||
void myriadhash(void *state, const void *input);
|
||||
void fresh_hash(void *state, const void *input);
|
||||
void nist5hash(void *state, const void *input);
|
||||
void quarkhash(void *state, const void *input);
|
||||
void x11hash(void *output, const void *input);
|
||||
|
@ -5,6 +5,9 @@
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#define TPB 256
|
||||
#define THF 4
|
||||
|
||||
// aus cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
@ -18,7 +21,7 @@ static cudaDeviceProp props[8];
|
||||
#include "groestl_functions_quad.cu"
|
||||
#include "bitslice_transformations_quad.cu"
|
||||
|
||||
__global__ __launch_bounds__(256, 4)
|
||||
__global__ __launch_bounds__(TPB, THF)
|
||||
void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
||||
{
|
||||
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
||||
@ -60,7 +63,7 @@ void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void __launch_bounds__(256, 4)
|
||||
__global__ void __launch_bounds__(TPB, THF)
|
||||
quark_doublegroestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x)>>2;
|
||||
@ -125,11 +128,11 @@ __host__ void quark_groestl512_cpu_init(int thr_id, int threads)
|
||||
|
||||
__host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||
{
|
||||
int threadsperblock = 256;
|
||||
int threadsperblock = TPB;
|
||||
|
||||
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||
const int factor = 4;
|
||||
const int factor = THF;
|
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||
@ -146,11 +149,11 @@ __host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t sta
|
||||
|
||||
__host__ void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||
{
|
||||
int threadsperblock = 256;
|
||||
int threadsperblock = TPB;
|
||||
|
||||
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||
const int factor = 4;
|
||||
const int factor = THF;
|
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||
|
4
util.c
4
util.c
@ -1360,6 +1360,10 @@ void print_hash_tests(void)
|
||||
quarkhash(&hash[0], &buf[0]);
|
||||
printf("\nquark: "); print_hash(hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
fresh_hash(&hash[0], &buf[0]);
|
||||
printf("\nfresh: "); print_hash(hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
x11hash(&hash[0], &buf[0]);
|
||||
printf("\nX11: "); print_hash(hash);
|
||||
|
@ -1,10 +1,11 @@
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#define TPB 256
|
||||
|
||||
// aus heavy.cu
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||
|
||||
//typedef unsigned char BitSequence;
|
||||
//typedef unsigned long long DataLength;
|
||||
__constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
|
||||
|
||||
__device__ __constant__
|
||||
static const uint32_t d_ShaviteInitVector[16] = {
|
||||
@ -16,7 +17,8 @@ static const uint32_t d_ShaviteInitVector[16] = {
|
||||
|
||||
#include "cuda_x11_aes.cu"
|
||||
|
||||
static __device__ __forceinline__ void AES_ROUND_NOKEY(
|
||||
__device__ __forceinline__
|
||||
static void AES_ROUND_NOKEY(
|
||||
const uint32_t* __restrict__ sharedMemory,
|
||||
uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3)
|
||||
{
|
||||
@ -31,7 +33,8 @@ static __device__ __forceinline__ void AES_ROUND_NOKEY(
|
||||
x3 = y3;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ void KEY_EXPAND_ELT(
|
||||
__device__ __forceinline__
|
||||
static void KEY_EXPAND_ELT(
|
||||
const uint32_t* __restrict__ sharedMemory,
|
||||
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3)
|
||||
{
|
||||
@ -46,8 +49,8 @@ static __device__ __forceinline__ void KEY_EXPAND_ELT(
|
||||
k3 = y0;
|
||||
}
|
||||
|
||||
static __device__ void
|
||||
c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
||||
__device__
|
||||
static void c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg, uint32_t count)
|
||||
{
|
||||
uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
|
||||
uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
|
||||
@ -56,7 +59,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
||||
uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
||||
uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
||||
uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
||||
const uint32_t counter = 512;
|
||||
const uint32_t counter = count;
|
||||
|
||||
p0 = state[0x0];
|
||||
p1 = state[0x1];
|
||||
@ -1291,8 +1294,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
||||
state[0xF] ^= p7;
|
||||
}
|
||||
|
||||
|
||||
// Die Hash-Funktion
|
||||
// GPU Hash
|
||||
__global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
|
||||
{
|
||||
__shared__ uint32_t sharedMemory[1024];
|
||||
@ -1305,7 +1307,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||
|
||||
int hashPosition = nounce - startNounce;
|
||||
uint32_t *Hash = (uint32_t*)&g_hash[8 * hashPosition];
|
||||
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
|
||||
|
||||
// kopiere init-state
|
||||
uint32_t state[16];
|
||||
@ -1320,7 +1322,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
||||
// fülle die Nachricht mit 64-byte (vorheriger Hash)
|
||||
#pragma unroll 16
|
||||
for(int i=0;i<16;i++)
|
||||
msg[i] = Hash[i];
|
||||
msg[i] = Hash[i];
|
||||
|
||||
// Nachrichtenende
|
||||
msg[16] = 0x80;
|
||||
@ -1334,7 +1336,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
||||
msg[30] = 0;
|
||||
msg[31] = 0x02000000;
|
||||
|
||||
c512(sharedMemory, state, msg);
|
||||
c512(sharedMemory, state, msg, 512);
|
||||
|
||||
#pragma unroll 16
|
||||
for(int i=0;i<16;i++)
|
||||
@ -1342,8 +1344,46 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
|
||||
{
|
||||
__shared__ uint32_t sharedMemory[1024];
|
||||
|
||||
aes_gpu_init(sharedMemory);
|
||||
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
const uint32_t nounce = startNounce + thread;
|
||||
|
||||
// kopiere init-state
|
||||
uint32_t state[16];
|
||||
|
||||
#pragma unroll 16
|
||||
for(int i=0;i<16;i++) {
|
||||
state[i] = d_ShaviteInitVector[i];}
|
||||
|
||||
uint32_t msg[32];
|
||||
|
||||
#pragma unroll 32
|
||||
for(int i=0;i<32;i++) {
|
||||
msg[i] = c_PaddedMessage80[i];
|
||||
}
|
||||
msg[19] = cuda_swab32(nounce);
|
||||
msg[20] = 0x80;
|
||||
msg[27] = 0x2800000;
|
||||
msg[31] = 0x2000000;
|
||||
|
||||
c512(sharedMemory, state, msg, 640);
|
||||
|
||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||
|
||||
#pragma unroll 16
|
||||
for(int i=0;i<16;i++)
|
||||
outHash[i] = state[i];
|
||||
|
||||
} //thread < threads
|
||||
}
|
||||
|
||||
// Setup-Funktionen
|
||||
__host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
||||
{
|
||||
aes_cpu_init();
|
||||
@ -1351,15 +1391,40 @@ __host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
||||
|
||||
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
const int threadsperblock = TPB;
|
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs
|
||||
size_t shared_size = 0;
|
||||
|
||||
x11_shavite512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
__host__ void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||
{
|
||||
const int threadsperblock = TPB;
|
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
size_t shared_size = 0;
|
||||
|
||||
x11_shavite512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
__host__ void x11_shavite512_setBlock_80(void *pdata)
|
||||
{
|
||||
// Message mit Padding bereitstellen
|
||||
// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
|
||||
unsigned char PaddedMessage[128];
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
|
||||
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
|
169
x11/fresh.cu
Normal file
169
x11/fresh.cu
Normal file
@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Fresh algorithm
|
||||
*/
|
||||
extern "C" {
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// to test gpu hash on a null buffer
|
||||
#define NULLTEST 0
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern int device_map[8];
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern void x11_shavite512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_shavite512_setBlock_80(void *pdata);
|
||||
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_simd512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
int order);
|
||||
|
||||
// CPU Hash
|
||||
extern "C" void fresh_hash(void *state, const void *input)
|
||||
{
|
||||
// shavite-simd-shavite-simd-echo
|
||||
|
||||
sph_shavite512_context ctx_shavite;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_echo512_context ctx_echo;
|
||||
|
||||
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, input, 80);
|
||||
sph_shavite512_close(&ctx_shavite, hashA);
|
||||
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512(&ctx_simd, hashA, 64);
|
||||
sph_simd512_close(&ctx_simd, hashB);
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512(&ctx_shavite, hashB, 64);
|
||||
sph_shavite512_close(&ctx_shavite, hashA);
|
||||
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512(&ctx_simd, hashA, 64);
|
||||
sph_simd512_close(&ctx_simd, hashB);
|
||||
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512(&ctx_echo, hashB, 64);
|
||||
sph_echo512_close(&ctx_echo, hashA);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
#if NULLTEST
|
||||
static void print_hash(unsigned char *hash)
|
||||
{
|
||||
for (int i=0; i < 32; i += 4) {
|
||||
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*256*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t endiandata[20];
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||
|
||||
#if NULLTEST
|
||||
for (int k=0; k < 20; k++)
|
||||
pdata[k] = 0;
|
||||
#endif
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4));
|
||||
|
||||
x11_shavite512_cpu_init(thr_id, throughput);
|
||||
x11_simd512_cpu_init(thr_id, throughput);
|
||||
x11_echo512_cpu_init(thr_id, throughput);
|
||||
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||
|
||||
x11_shavite512_setBlock_80((void*)endiandata);
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
do {
|
||||
uint32_t foundNonce;
|
||||
int order = 0;
|
||||
|
||||
// GPU Hash
|
||||
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
|
||||
#if NULLTEST
|
||||
uint32_t buf[8]; memset(buf, 0, sizeof buf);
|
||||
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
|
||||
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||
print_hash((unsigned char*)buf); printf("\n");
|
||||
#endif
|
||||
|
||||
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
fresh_hash(vhash64, endiandata);
|
||||
|
||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||
pdata[19] = foundNonce;
|
||||
*hashes_done = foundNonce - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
else if (vhash64[7] > Htarg) {
|
||||
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
|
||||
}
|
||||
else {
|
||||
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
|
||||
}
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user