mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 00:44:15 +00:00
Add fresh algo (based on djm34 code)
Cleaned up and adapted to my changes (cputest added) Remove Makefile.in which should be in gitignore (Plz refresh it with ./config.sh to compile on linux)
This commit is contained in:
parent
d9ea5f72ce
commit
bc2eb75758
16
.gitignore
vendored
16
.gitignore
vendored
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
minerd
|
ccminer
|
||||||
*.o
|
*.o
|
||||||
|
|
||||||
autom4te.cache
|
autom4te.cache
|
||||||
@ -26,6 +26,9 @@ config.sub
|
|||||||
mingw32-config.cache
|
mingw32-config.cache
|
||||||
|
|
||||||
*/.dirstamp
|
*/.dirstamp
|
||||||
|
.DS_Store
|
||||||
|
Desktop.ini
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
*.iml
|
*.iml
|
||||||
|
|
||||||
@ -33,5 +36,14 @@ Debug/
|
|||||||
Release/
|
Release/
|
||||||
x64/Debug/
|
x64/Debug/
|
||||||
x64/Release/
|
x64/Release/
|
||||||
ccminer.*.suo
|
*.suo
|
||||||
|
*.user
|
||||||
|
|
||||||
|
.settings/
|
||||||
|
.project
|
||||||
|
.metadata
|
||||||
|
.classpath
|
||||||
|
.loadpath
|
||||||
|
.cproject
|
||||||
|
.buildpath
|
||||||
|
|
||||||
|
@ -37,9 +37,9 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
|
sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
|
||||||
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
||||||
sph/shabal.c sph/whirlpool.c \
|
sph/shabal.c sph/whirlpool.c \
|
||||||
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||||
x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
|
||||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||||
|
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
||||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu
|
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu
|
||||||
|
|
||||||
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
||||||
|
1488
Makefile.in
1488
Makefile.in
File diff suppressed because it is too large
Load Diff
@ -503,6 +503,12 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
|||||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="x11\fresh.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">--ptxas-options=-O3 %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
<CudaCompile Include="x11\simd_functions.cu">
|
<CudaCompile Include="x11\simd_functions.cu">
|
||||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||||
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
|
@ -361,6 +361,9 @@
|
|||||||
<CudaCompile Include="x11\cuda_x11_simd512.cu">
|
<CudaCompile Include="x11\cuda_x11_simd512.cu">
|
||||||
<Filter>Source Files\CUDA\x11</Filter>
|
<Filter>Source Files\CUDA\x11</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="x11\fresh.cu">
|
||||||
|
<Filter>Source Files\CUDA\x11</Filter>
|
||||||
|
</CudaCompile>
|
||||||
<CudaCompile Include="x11\x11.cu">
|
<CudaCompile Include="x11\x11.cu">
|
||||||
<Filter>Source Files\CUDA\x11</Filter>
|
<Filter>Source Files\CUDA\x11</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
10
cpu-miner.c
10
cpu-miner.c
@ -132,6 +132,7 @@ typedef enum {
|
|||||||
ALGO_JACKPOT,
|
ALGO_JACKPOT,
|
||||||
ALGO_QUARK,
|
ALGO_QUARK,
|
||||||
ALGO_ANIME,
|
ALGO_ANIME,
|
||||||
|
ALGO_FRESH,
|
||||||
ALGO_NIST5,
|
ALGO_NIST5,
|
||||||
ALGO_X11,
|
ALGO_X11,
|
||||||
ALGO_X13,
|
ALGO_X13,
|
||||||
@ -149,6 +150,7 @@ static const char *algo_names[] = {
|
|||||||
"jackpot",
|
"jackpot",
|
||||||
"quark",
|
"quark",
|
||||||
"anime",
|
"anime",
|
||||||
|
"fresh",
|
||||||
"nist5",
|
"nist5",
|
||||||
"x11",
|
"x11",
|
||||||
"x13",
|
"x13",
|
||||||
@ -225,6 +227,7 @@ Options:\n\
|
|||||||
jackpot Jackpot hash\n\
|
jackpot Jackpot hash\n\
|
||||||
quark Quark hash\n\
|
quark Quark hash\n\
|
||||||
anime Animecoin hash\n\
|
anime Animecoin hash\n\
|
||||||
|
fresh Freshcoin hash (shavite 80)\n\
|
||||||
nist5 NIST5 (TalkCoin) hash\n\
|
nist5 NIST5 (TalkCoin) hash\n\
|
||||||
x11 X11 (DarkCoin) hash\n\
|
x11 X11 (DarkCoin) hash\n\
|
||||||
x13 X13 (MaruCoin) hash\n\
|
x13 X13 (MaruCoin) hash\n\
|
||||||
@ -782,7 +785,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
|||||||
|
|
||||||
if (opt_algo == ALGO_JACKPOT)
|
if (opt_algo == ALGO_JACKPOT)
|
||||||
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
|
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
|
||||||
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR)
|
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR || opt_algo == ALGO_FRESH)
|
||||||
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
|
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
|
||||||
else
|
else
|
||||||
diff_to_target(work->target, sctx->job.diff / opt_difficulty);
|
diff_to_target(work->target, sctx->job.diff / opt_difficulty);
|
||||||
@ -918,6 +921,11 @@ static void *miner_thread(void *userdata)
|
|||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ALGO_FRESH:
|
||||||
|
rc = scanhash_fresh(thr_id, work.data, work.target,
|
||||||
|
max_nonce, &hashes_done);
|
||||||
|
break;
|
||||||
|
|
||||||
case ALGO_NIST5:
|
case ALGO_NIST5:
|
||||||
rc = scanhash_nist5(thr_id, work.data, work.target,
|
rc = scanhash_nist5(thr_id, work.data, work.target,
|
||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done);
|
||||||
|
5
miner.h
5
miner.h
@ -234,6 +234,10 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
|
|||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done);
|
unsigned long *hashes_done);
|
||||||
|
|
||||||
|
extern int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||||
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
|
unsigned long *hashes_done);
|
||||||
|
|
||||||
extern int scanhash_nist5(int thr_id, uint32_t *pdata,
|
extern int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done);
|
unsigned long *hashes_done);
|
||||||
@ -355,6 +359,7 @@ void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
|
|||||||
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
|
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
|
||||||
void groestlhash(void *state, const void *input);
|
void groestlhash(void *state, const void *input);
|
||||||
void myriadhash(void *state, const void *input);
|
void myriadhash(void *state, const void *input);
|
||||||
|
void fresh_hash(void *state, const void *input);
|
||||||
void nist5hash(void *state, const void *input);
|
void nist5hash(void *state, const void *input);
|
||||||
void quarkhash(void *state, const void *input);
|
void quarkhash(void *state, const void *input);
|
||||||
void x11hash(void *output, const void *input);
|
void x11hash(void *output, const void *input);
|
||||||
|
@ -5,6 +5,9 @@
|
|||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
|
#define TPB 256
|
||||||
|
#define THF 4
|
||||||
|
|
||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
@ -18,7 +21,7 @@ static cudaDeviceProp props[8];
|
|||||||
#include "groestl_functions_quad.cu"
|
#include "groestl_functions_quad.cu"
|
||||||
#include "bitslice_transformations_quad.cu"
|
#include "bitslice_transformations_quad.cu"
|
||||||
|
|
||||||
__global__ __launch_bounds__(256, 4)
|
__global__ __launch_bounds__(TPB, THF)
|
||||||
void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
||||||
{
|
{
|
||||||
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
||||||
@ -60,7 +63,7 @@ void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void __launch_bounds__(256, 4)
|
__global__ void __launch_bounds__(TPB, THF)
|
||||||
quark_doublegroestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
quark_doublegroestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector)
|
||||||
{
|
{
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x)>>2;
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x)>>2;
|
||||||
@ -125,11 +128,11 @@ __host__ void quark_groestl512_cpu_init(int thr_id, int threads)
|
|||||||
|
|
||||||
__host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
__host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
int threadsperblock = 256;
|
int threadsperblock = TPB;
|
||||||
|
|
||||||
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
||||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||||
const int factor = 4;
|
const int factor = THF;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||||
@ -146,11 +149,11 @@ __host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t sta
|
|||||||
|
|
||||||
__host__ void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
__host__ void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
int threadsperblock = 256;
|
int threadsperblock = TPB;
|
||||||
|
|
||||||
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
// Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle
|
||||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||||
const int factor = 4;
|
const int factor = THF;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||||
|
4
util.c
4
util.c
@ -1360,6 +1360,10 @@ void print_hash_tests(void)
|
|||||||
quarkhash(&hash[0], &buf[0]);
|
quarkhash(&hash[0], &buf[0]);
|
||||||
printf("\nquark: "); print_hash(hash);
|
printf("\nquark: "); print_hash(hash);
|
||||||
|
|
||||||
|
memset(hash, 0, sizeof hash);
|
||||||
|
fresh_hash(&hash[0], &buf[0]);
|
||||||
|
printf("\nfresh: "); print_hash(hash);
|
||||||
|
|
||||||
memset(hash, 0, sizeof hash);
|
memset(hash, 0, sizeof hash);
|
||||||
x11hash(&hash[0], &buf[0]);
|
x11hash(&hash[0], &buf[0]);
|
||||||
printf("\nX11: "); print_hash(hash);
|
printf("\nX11: "); print_hash(hash);
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
|
#define TPB 256
|
||||||
|
|
||||||
// aus heavy.cu
|
// aus heavy.cu
|
||||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
|
|
||||||
//typedef unsigned char BitSequence;
|
__constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
|
||||||
//typedef unsigned long long DataLength;
|
|
||||||
|
|
||||||
__device__ __constant__
|
__device__ __constant__
|
||||||
static const uint32_t d_ShaviteInitVector[16] = {
|
static const uint32_t d_ShaviteInitVector[16] = {
|
||||||
@ -16,7 +17,8 @@ static const uint32_t d_ShaviteInitVector[16] = {
|
|||||||
|
|
||||||
#include "cuda_x11_aes.cu"
|
#include "cuda_x11_aes.cu"
|
||||||
|
|
||||||
static __device__ __forceinline__ void AES_ROUND_NOKEY(
|
__device__ __forceinline__
|
||||||
|
static void AES_ROUND_NOKEY(
|
||||||
const uint32_t* __restrict__ sharedMemory,
|
const uint32_t* __restrict__ sharedMemory,
|
||||||
uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3)
|
uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3)
|
||||||
{
|
{
|
||||||
@ -31,7 +33,8 @@ static __device__ __forceinline__ void AES_ROUND_NOKEY(
|
|||||||
x3 = y3;
|
x3 = y3;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ __forceinline__ void KEY_EXPAND_ELT(
|
__device__ __forceinline__
|
||||||
|
static void KEY_EXPAND_ELT(
|
||||||
const uint32_t* __restrict__ sharedMemory,
|
const uint32_t* __restrict__ sharedMemory,
|
||||||
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3)
|
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3)
|
||||||
{
|
{
|
||||||
@ -46,8 +49,8 @@ static __device__ __forceinline__ void KEY_EXPAND_ELT(
|
|||||||
k3 = y0;
|
k3 = y0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ void
|
__device__
|
||||||
c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
static void c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg, uint32_t count)
|
||||||
{
|
{
|
||||||
uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
|
uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
|
||||||
uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
|
uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
|
||||||
@ -56,7 +59,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
|||||||
uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
||||||
uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
||||||
uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
||||||
const uint32_t counter = 512;
|
const uint32_t counter = count;
|
||||||
|
|
||||||
p0 = state[0x0];
|
p0 = state[0x0];
|
||||||
p1 = state[0x1];
|
p1 = state[0x1];
|
||||||
@ -1291,8 +1294,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
|||||||
state[0xF] ^= p7;
|
state[0xF] ^= p7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GPU Hash
|
||||||
// Die Hash-Funktion
|
|
||||||
__global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
|
__global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
|
||||||
{
|
{
|
||||||
__shared__ uint32_t sharedMemory[1024];
|
__shared__ uint32_t sharedMemory[1024];
|
||||||
@ -1305,7 +1307,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
|||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
int hashPosition = nounce - startNounce;
|
||||||
uint32_t *Hash = (uint32_t*)&g_hash[8 * hashPosition];
|
uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3];
|
||||||
|
|
||||||
// kopiere init-state
|
// kopiere init-state
|
||||||
uint32_t state[16];
|
uint32_t state[16];
|
||||||
@ -1320,7 +1322,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
|||||||
// fülle die Nachricht mit 64-byte (vorheriger Hash)
|
// fülle die Nachricht mit 64-byte (vorheriger Hash)
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int i=0;i<16;i++)
|
for(int i=0;i<16;i++)
|
||||||
msg[i] = Hash[i];
|
msg[i] = Hash[i];
|
||||||
|
|
||||||
// Nachrichtenende
|
// Nachrichtenende
|
||||||
msg[16] = 0x80;
|
msg[16] = 0x80;
|
||||||
@ -1334,7 +1336,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
|||||||
msg[30] = 0;
|
msg[30] = 0;
|
||||||
msg[31] = 0x02000000;
|
msg[31] = 0x02000000;
|
||||||
|
|
||||||
c512(sharedMemory, state, msg);
|
c512(sharedMemory, state, msg, 512);
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int i=0;i<16;i++)
|
for(int i=0;i<16;i++)
|
||||||
@ -1342,8 +1344,46 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
|
||||||
|
{
|
||||||
|
__shared__ uint32_t sharedMemory[1024];
|
||||||
|
|
||||||
|
aes_gpu_init(sharedMemory);
|
||||||
|
|
||||||
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
if (thread < threads)
|
||||||
|
{
|
||||||
|
const uint32_t nounce = startNounce + thread;
|
||||||
|
|
||||||
|
// kopiere init-state
|
||||||
|
uint32_t state[16];
|
||||||
|
|
||||||
|
#pragma unroll 16
|
||||||
|
for(int i=0;i<16;i++) {
|
||||||
|
state[i] = d_ShaviteInitVector[i];}
|
||||||
|
|
||||||
|
uint32_t msg[32];
|
||||||
|
|
||||||
|
#pragma unroll 32
|
||||||
|
for(int i=0;i<32;i++) {
|
||||||
|
msg[i] = c_PaddedMessage80[i];
|
||||||
|
}
|
||||||
|
msg[19] = cuda_swab32(nounce);
|
||||||
|
msg[20] = 0x80;
|
||||||
|
msg[27] = 0x2800000;
|
||||||
|
msg[31] = 0x2000000;
|
||||||
|
|
||||||
|
c512(sharedMemory, state, msg, 640);
|
||||||
|
|
||||||
|
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||||
|
|
||||||
|
#pragma unroll 16
|
||||||
|
for(int i=0;i<16;i++)
|
||||||
|
outHash[i] = state[i];
|
||||||
|
|
||||||
|
} //thread < threads
|
||||||
|
}
|
||||||
|
|
||||||
// Setup-Funktionen
|
|
||||||
__host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
__host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
||||||
{
|
{
|
||||||
aes_cpu_init();
|
aes_cpu_init();
|
||||||
@ -1351,15 +1391,40 @@ __host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
|||||||
|
|
||||||
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
const int threadsperblock = 256;
|
const int threadsperblock = TPB;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
x11_shavite512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
x11_shavite512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||||||
MyStreamSynchronize(NULL, order, thr_id);
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__ void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||||
|
{
|
||||||
|
const int threadsperblock = TPB;
|
||||||
|
|
||||||
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
|
size_t shared_size = 0;
|
||||||
|
|
||||||
|
x11_shavite512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
|
||||||
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void x11_shavite512_setBlock_80(void *pdata)
|
||||||
|
{
|
||||||
|
// Message mit Padding bereitstellen
|
||||||
|
// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
|
||||||
|
unsigned char PaddedMessage[128];
|
||||||
|
memcpy(PaddedMessage, pdata, 80);
|
||||||
|
memset(PaddedMessage+80, 0, 48);
|
||||||
|
|
||||||
|
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
169
x11/fresh.cu
Normal file
169
x11/fresh.cu
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
/**
|
||||||
|
* Fresh algorithm
|
||||||
|
*/
|
||||||
|
extern "C" {
|
||||||
|
#include "sph/sph_shavite.h"
|
||||||
|
#include "sph/sph_simd.h"
|
||||||
|
#include "sph/sph_echo.h"
|
||||||
|
#include "miner.h"
|
||||||
|
#include "cuda_helper.h"
|
||||||
|
}
|
||||||
|
|
||||||
|
// to test gpu hash on a null buffer
|
||||||
|
#define NULLTEST 0
|
||||||
|
|
||||||
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
|
extern int device_map[8];
|
||||||
|
extern bool opt_benchmark;
|
||||||
|
|
||||||
|
extern void x11_shavite512_cpu_init(int thr_id, int threads);
|
||||||
|
extern void x11_shavite512_setBlock_80(void *pdata);
|
||||||
|
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
|
extern void x11_simd512_cpu_init(int thr_id, int threads);
|
||||||
|
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
|
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||||
|
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
|
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||||
|
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||||
|
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||||
|
|
||||||
|
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||||
|
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||||
|
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||||
|
int order);
|
||||||
|
|
||||||
|
// CPU Hash
|
||||||
|
extern "C" void fresh_hash(void *state, const void *input)
|
||||||
|
{
|
||||||
|
// shavite-simd-shavite-simd-echo
|
||||||
|
|
||||||
|
sph_shavite512_context ctx_shavite;
|
||||||
|
sph_simd512_context ctx_simd;
|
||||||
|
sph_echo512_context ctx_echo;
|
||||||
|
|
||||||
|
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||||
|
#define hashA hash
|
||||||
|
#define hashB hash+64
|
||||||
|
|
||||||
|
memset(hash, 0, sizeof hash);
|
||||||
|
|
||||||
|
sph_shavite512_init(&ctx_shavite);
|
||||||
|
sph_shavite512(&ctx_shavite, input, 80);
|
||||||
|
sph_shavite512_close(&ctx_shavite, hashA);
|
||||||
|
|
||||||
|
sph_simd512_init(&ctx_simd);
|
||||||
|
sph_simd512(&ctx_simd, hashA, 64);
|
||||||
|
sph_simd512_close(&ctx_simd, hashB);
|
||||||
|
|
||||||
|
sph_shavite512_init(&ctx_shavite);
|
||||||
|
sph_shavite512(&ctx_shavite, hashB, 64);
|
||||||
|
sph_shavite512_close(&ctx_shavite, hashA);
|
||||||
|
|
||||||
|
sph_simd512_init(&ctx_simd);
|
||||||
|
sph_simd512(&ctx_simd, hashA, 64);
|
||||||
|
sph_simd512_close(&ctx_simd, hashB);
|
||||||
|
|
||||||
|
sph_echo512_init(&ctx_echo);
|
||||||
|
sph_echo512(&ctx_echo, hashB, 64);
|
||||||
|
sph_echo512_close(&ctx_echo, hashA);
|
||||||
|
|
||||||
|
memcpy(state, hash, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
static void print_hash(unsigned char *hash)
|
||||||
|
{
|
||||||
|
for (int i=0; i < 32; i += 4) {
|
||||||
|
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||||
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
|
unsigned long *hashes_done)
|
||||||
|
{
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const int throughput = 256*256*8;
|
||||||
|
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||||
|
uint32_t endiandata[20];
|
||||||
|
uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
|
if (opt_benchmark)
|
||||||
|
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
for (int k=0; k < 20; k++)
|
||||||
|
pdata[k] = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!init[thr_id])
|
||||||
|
{
|
||||||
|
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4));
|
||||||
|
|
||||||
|
x11_shavite512_cpu_init(thr_id, throughput);
|
||||||
|
x11_simd512_cpu_init(thr_id, throughput);
|
||||||
|
x11_echo512_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
|
init[thr_id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k=0; k < 20; k++)
|
||||||
|
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||||
|
|
||||||
|
x11_shavite512_setBlock_80((void*)endiandata);
|
||||||
|
cuda_check_cpu_setTarget(ptarget);
|
||||||
|
do {
|
||||||
|
uint32_t foundNonce;
|
||||||
|
int order = 0;
|
||||||
|
|
||||||
|
// GPU Hash
|
||||||
|
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
uint32_t buf[8]; memset(buf, 0, sizeof buf);
|
||||||
|
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
|
||||||
|
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||||
|
print_hash((unsigned char*)buf); printf("\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
if (foundNonce != 0xffffffff)
|
||||||
|
{
|
||||||
|
uint32_t vhash64[8];
|
||||||
|
be32enc(&endiandata[19], foundNonce);
|
||||||
|
fresh_hash(vhash64, endiandata);
|
||||||
|
|
||||||
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||||
|
pdata[19] = foundNonce;
|
||||||
|
*hashes_done = foundNonce - first_nonce + 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if (vhash64[7] > Htarg) {
|
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pdata[19] += throughput;
|
||||||
|
|
||||||
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user