Merge branch 'qubit'
This commit is contained in:
commit
80d6e09ca6
@ -5,14 +5,13 @@ extern "C"
|
||||
#include "sph/sph_groestl.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// aus cpu-miner.c
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
|
||||
@ -31,10 +30,6 @@ extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
|
||||
extern void quark_skein512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void jackpot_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||
uint32_t *d_nonces1, size_t *nrm1,
|
||||
|
@ -29,19 +29,22 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
|
||||
myriadgroestl.cpp cuda_myriadgroestl.cu \
|
||||
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
|
||||
JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
|
||||
JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
|
||||
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
||||
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
|
||||
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
|
||||
quark/quarkcoin.cu quark/animecoin.cu \
|
||||
quark/cuda_quark_compactionTest.cu \
|
||||
cuda_nist5.cu blake32.cu pentablake.cu \
|
||||
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
|
||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
||||
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
||||
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
||||
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/doom.cu \
|
||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \
|
||||
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
|
||||
x15/whirlpool.cu \
|
||||
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu
|
||||
|
||||
|
||||
|
11
README.txt
11
README.txt
@ -1,5 +1,5 @@
|
||||
|
||||
ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
|
||||
ccMiner release 1.4.3-tpruvot (Sep 11th 2014) - "DJM34 Algos"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
@ -35,6 +35,7 @@ TalkCoin
|
||||
DarkCoin and other X11 coins
|
||||
NEOS blake (256 14-rounds)
|
||||
BlakeCoin (256 8-rounds)
|
||||
Deep, Doom, Goalcoin and Qubit
|
||||
Pentablake (Blake 512 x5)
|
||||
|
||||
where some of these coins have a VERY NOTABLE nVidia advantage
|
||||
@ -56,12 +57,15 @@ its command line interface and options.
|
||||
-a, --algo=ALGO specify the algorithm to use
|
||||
heavy use to mine Heavycoin
|
||||
mjollnir use to mine Mjollnircoin
|
||||
deep use to mine Deepcoin
|
||||
doom use to mine Doomcoin
|
||||
fugue256 use to mine Fuguecoin
|
||||
groestl use to mine Groestlcoin
|
||||
dmd-gr use to mine Diamond-Groestl
|
||||
myr-gr use to mine Myriad-Groest
|
||||
jackpot use to mine Jackpotcoin
|
||||
quark use to mine Quarkcoin
|
||||
qubit use to mine Qubit Algo
|
||||
anime use to mine Animecoin
|
||||
blake use to mine NEOS (Blake 256)
|
||||
blakecoin use to mine Old Blake 256
|
||||
@ -103,7 +107,7 @@ its command line interface and options.
|
||||
--benchmark run in offline benchmark mode
|
||||
--cputest debug hashes from cpu algorithms
|
||||
-c, --config=FILE load a JSON-format configuration file
|
||||
-C, --color display colored output in a linux Terminal
|
||||
-K, --nocolor disable colored console output
|
||||
-V, --version display version information and exit
|
||||
-h, --help display this help text and exit
|
||||
|
||||
@ -154,6 +158,9 @@ features.
|
||||
|
||||
>>> RELEASE HISTORY <<<
|
||||
|
||||
Sep 11th 2O14
|
||||
add algos from djm34 (deep,doom,qubit)
|
||||
|
||||
Sep. 1st 2014 add X17, optimized x15 and whirl
|
||||
add blake (256 variant)
|
||||
color support on Windows,
|
||||
|
@ -7,6 +7,8 @@
|
||||
#define __shfl(var, srcLane, width) (uint32_t)(var)
|
||||
#endif
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
|
||||
__device__ __forceinline__
|
||||
void to_bitslice_quad(uint32_t *input, uint32_t *output)
|
||||
{
|
||||
@ -429,3 +431,11 @@ void from_bitslice_quad(uint32_t *input, uint32_t *output)
|
||||
if (threadIdx.x % 4) output[i] = output[i+1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* host "fake" functions */
|
||||
#define from_bitslice_quad(st, out)
|
||||
#define to_bitslice_quad(in, msg) in[0] = (uint32_t) in[0];
|
||||
|
||||
#endif /* device only code */
|
||||
|
@ -432,7 +432,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="quark\cuda_checkhash.cu">
|
||||
<CudaCompile Include="cuda_checkhash.cu">
|
||||
<CInterleavedPTX>false</CInterleavedPTX>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
@ -463,6 +463,26 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\deep.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\doom.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\qubit.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\qubit_luffa512.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\cuda_x11_aes.cu">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
</CudaCompile>
|
||||
|
@ -52,6 +52,9 @@
|
||||
<Filter Include="Header Files\CUDA\heavy">
|
||||
<UniqueIdentifier>{3281db48-f394-49ea-a1ef-6ebd09828d50}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\qubit">
|
||||
<UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\x11">
|
||||
<UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
|
||||
</Filter>
|
||||
@ -376,6 +379,18 @@
|
||||
<CudaCompile Include="quark\quarkcoin.cu">
|
||||
<Filter>Source Files\CUDA\quark</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\deep.cu">
|
||||
<Filter>Source Files\CUDA\qubit</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\doom.cu">
|
||||
<Filter>Source Files\CUDA\qubit</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\qubit.cu">
|
||||
<Filter>Source Files\CUDA\qubit</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="qubit\qubit_luffa512.cu">
|
||||
<Filter>Source Files\CUDA\qubit</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="cuda_nist5.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
@ -415,7 +430,7 @@
|
||||
<CudaCompile Include="x13\x13.cu">
|
||||
<Filter>Source Files\CUDA\x13</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="quark\cuda_checkhash.cu">
|
||||
<CudaCompile Include="cuda_checkhash.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x15\x14.cu">
|
||||
|
@ -1,4 +1,4 @@
|
||||
AC_INIT([ccminer], [2014.09.09])
|
||||
AC_INIT([ccminer], [2014.09.11])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
39
cpu-miner.c
39
cpu-miner.c
@ -129,6 +129,8 @@ typedef enum {
|
||||
ALGO_ANIME,
|
||||
ALGO_BLAKE,
|
||||
ALGO_BLAKECOIN,
|
||||
ALGO_DEEP,
|
||||
ALGO_DOOM,
|
||||
ALGO_FRESH,
|
||||
ALGO_FUGUE256, /* Fugue256 */
|
||||
ALGO_GROESTL,
|
||||
@ -139,6 +141,7 @@ typedef enum {
|
||||
ALGO_NIST5,
|
||||
ALGO_PENTABLAKE,
|
||||
ALGO_QUARK,
|
||||
ALGO_QUBIT,
|
||||
ALGO_WHC,
|
||||
ALGO_X11,
|
||||
ALGO_X13,
|
||||
@ -152,6 +155,8 @@ static const char *algo_names[] = {
|
||||
"anime",
|
||||
"blake",
|
||||
"blakecoin",
|
||||
"deep",
|
||||
"doom",
|
||||
"fresh",
|
||||
"fugue256",
|
||||
"groestl",
|
||||
@ -162,6 +167,7 @@ static const char *algo_names[] = {
|
||||
"nist5",
|
||||
"penta",
|
||||
"quark",
|
||||
"qubit",
|
||||
"whirl",
|
||||
"x11",
|
||||
"x13",
|
||||
@ -235,6 +241,8 @@ Options:\n\
|
||||
anime Animecoin hash\n\
|
||||
blake Blake 256 (like NEOS blake)\n\
|
||||
blakecoin Old Blake 256 (8 rounds)\n\
|
||||
deep Deepcoin hash\n\
|
||||
doom Doomcoin hash\n\
|
||||
fresh Freshcoin hash (shavite 80)\n\
|
||||
fugue256 Fuguecoin hash\n\
|
||||
groestl Groestlcoin hash\n\
|
||||
@ -245,6 +253,7 @@ Options:\n\
|
||||
nist5 NIST5 (TalkCoin) hash\n\
|
||||
penta Pentablake hash (5x Blake 512)\n\
|
||||
quark Quark hash\n\
|
||||
qubit Qubit hash\n\
|
||||
whirl Whirlcoin (old whirlpool)\n\
|
||||
x11 X11 (DarkCoin) hash\n\
|
||||
x13 X13 (MaruCoin) hash\n\
|
||||
@ -275,7 +284,7 @@ Options:\n\
|
||||
--no-longpoll disable X-Long-Polling support\n\
|
||||
--no-stratum disable X-Stratum support\n\
|
||||
-q, --quiet disable per-thread hashmeter output\n\
|
||||
-C, --color enable colored output\n\
|
||||
-K, --nocolor disable colored output\n\
|
||||
-D, --debug enable debug output\n\
|
||||
-P, --protocol-dump verbose dump of protocol-level activities\n"
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
@ -301,7 +310,7 @@ static char const short_options[] =
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
"S"
|
||||
#endif
|
||||
"a:c:CDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
|
||||
"a:c:CKDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
|
||||
|
||||
static struct option const options[] = {
|
||||
{ "algo", 1, NULL, 'a' },
|
||||
@ -312,7 +321,7 @@ static struct option const options[] = {
|
||||
{ "cputest", 0, NULL, 1006 },
|
||||
{ "cert", 1, NULL, 1001 },
|
||||
{ "config", 1, NULL, 'c' },
|
||||
{ "color", 0, NULL, 'C' },
|
||||
{ "nocolor", 0, NULL, 'K' },
|
||||
{ "debug", 0, NULL, 'D' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "no-longpoll", 0, NULL, 1003 },
|
||||
@ -1083,6 +1092,16 @@ continue_scan:
|
||||
max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ);
|
||||
break;
|
||||
|
||||
case ALGO_DEEP:
|
||||
rc = scanhash_deep(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_DOOM:
|
||||
rc = scanhash_doom(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_FUGUE256:
|
||||
rc = scanhash_fugue256(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
@ -1109,6 +1128,11 @@ continue_scan:
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_QUBIT:
|
||||
rc = scanhash_qubit(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_ANIME:
|
||||
rc = scanhash_anime(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
@ -1427,7 +1451,7 @@ out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define PROGRAM_VERSION "1.4.2"
|
||||
#define PROGRAM_VERSION "1.4.3"
|
||||
static void show_version_and_exit(void)
|
||||
{
|
||||
printf("%s v%s\n"
|
||||
@ -1489,8 +1513,12 @@ static void parse_arg (int key, char *arg)
|
||||
break;
|
||||
}
|
||||
case 'C':
|
||||
/* color for compat */
|
||||
use_colors = true;
|
||||
break;
|
||||
case 'K':
|
||||
use_colors = false;
|
||||
break;
|
||||
case 'D':
|
||||
opt_debug = true;
|
||||
break;
|
||||
@ -1801,8 +1829,7 @@ int main(int argc, char *argv[])
|
||||
#endif
|
||||
printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
|
||||
printf("\t and HVC extension from http://hvc.1gh.com/" "\n\n");
|
||||
printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
|
||||
printf("\t BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
|
||||
printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n\n");
|
||||
printf("\tInclude some of djm34 additions, cleaned by Tanguy Pruvot\n");
|
||||
printf("\t BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");
|
||||
|
||||
|
@ -156,7 +156,7 @@
|
||||
#define PACKAGE_NAME "ccminer"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "ccminer 2014.09.06"
|
||||
#define PACKAGE_STRING "ccminer 2014.09.11"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "ccminer"
|
||||
@ -165,7 +165,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "2014.09.06"
|
||||
#define PACKAGE_VERSION "2014.09.11"
|
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be
|
||||
@ -188,7 +188,7 @@
|
||||
#define USE_XOP 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "2014.09.06"
|
||||
#define VERSION "2014.09.11"
|
||||
|
||||
/* Define curl_free() as free() if our version of curl lacks curl_free. */
|
||||
/* #undef curl_free */
|
||||
|
@ -9,66 +9,56 @@ __constant__ uint32_t pTarget[8];
|
||||
static uint32_t *d_resNounce[8];
|
||||
static uint32_t *h_resNounce[8];
|
||||
|
||||
// aus heavy.cu
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||
|
||||
__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
|
||||
__global__
|
||||
void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
// bestimme den aktuellen Zähler
|
||||
// bestimme den aktuellen Zähler
|
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||
|
||||
int hashPosition = nounce - startNounce;
|
||||
uint32_t *inpHash = &g_hash[16 * hashPosition];
|
||||
uint32_t *inpHash = &g_hash[hashPosition<<4];
|
||||
|
||||
uint32_t hash[8];
|
||||
#pragma unroll 8
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++)
|
||||
hash[i] = inpHash[i];
|
||||
|
||||
// kopiere Ergebnis
|
||||
int i, position = -1;
|
||||
bool rc = true;
|
||||
|
||||
#pragma unroll 8
|
||||
for (i = 7; i >= 0; i--) {
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (hash[i] > pTarget[i]) {
|
||||
if(position < i) {
|
||||
position = i;
|
||||
rc = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (hash[i] < pTarget[i]) {
|
||||
if(position < i) {
|
||||
position = i;
|
||||
rc = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(rc == true)
|
||||
if(resNounce[0] > nounce)
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
}
|
||||
|
||||
// Setup-Funktionen
|
||||
__host__ void cuda_check_cpu_init(int thr_id, int threads)
|
||||
__host__
|
||||
void cuda_check_cpu_init(int thr_id, int threads)
|
||||
{
|
||||
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
|
||||
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// Target Difficulty setzen
|
||||
__host__ void cuda_check_cpu_setTarget(const void *ptarget)
|
||||
__host__
|
||||
void cuda_check_cpu_setTarget(const void *ptarget)
|
||||
{
|
||||
// die Message zur Berechnung auf der GPU
|
||||
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
|
||||
__host__
|
||||
uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
|
||||
{
|
||||
uint32_t result = 0xffffffff;
|
||||
cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t));
|
||||
@ -79,7 +69,7 @@ __host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t start
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs
|
||||
// Größe des dynamischen Shared Memory Bereichs
|
||||
size_t shared_size = 0;
|
||||
|
||||
cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
|
@ -12,6 +12,10 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// common functions
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||
|
||||
extern __device__ __device_builtin__ void __syncthreads(void);
|
||||
|
@ -11,7 +11,7 @@ extern "C"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
// aus cpu-miner.c
|
||||
// in cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
@ -33,9 +33,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
|
||||
extern void quark_skein512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
// Original nist5hash Funktion aus einem miner Quelltext
|
||||
extern "C" void nist5hash(void *state, const void *input)
|
||||
@ -46,30 +43,25 @@ extern "C" void nist5hash(void *state, const void *input)
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_skein512_context ctx_skein;
|
||||
|
||||
unsigned char hash[64];
|
||||
uint8_t hash[64];
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
// ZBLAKE;
|
||||
sph_blake512 (&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, (void*) hash);
|
||||
|
||||
sph_groestl512_init(&ctx_groestl);
|
||||
// ZGROESTL;
|
||||
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
||||
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
||||
|
||||
sph_jh512_init(&ctx_jh);
|
||||
// ZJH;
|
||||
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx_jh, (void*) hash);
|
||||
|
||||
sph_keccak512_init(&ctx_keccak);
|
||||
// ZKECCAK;
|
||||
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
||||
|
||||
sph_skein512_init(&ctx_skein);
|
||||
// ZSKEIN;
|
||||
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx_skein, (void*) hash);
|
||||
|
||||
|
27
miner.h
27
miner.h
@ -205,13 +205,13 @@ extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
|
||||
|
||||
extern unsigned char *scrypt_buffer_alloc();
|
||||
|
||||
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_heavy(int thr_id, uint32_t *pdata,
|
||||
extern int scanhash_deep(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done, uint32_t maxvote, int blocklen);
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_doom(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
@ -221,6 +221,10 @@ extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_heavy(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done, uint32_t maxvote, int blocklen);
|
||||
|
||||
extern int scanhash_myriad(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
@ -253,6 +257,14 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_qubit(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_whc(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
@ -426,6 +438,8 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
|
||||
void print_hash_tests(void);
|
||||
void animehash(void *state, const void *input);
|
||||
void blake256hash(void *output, const void *input, int rounds);
|
||||
void deephash(void *state, const void *input);
|
||||
void doomhash(void *state, const void *input);
|
||||
void fresh_hash(void *state, const void *input);
|
||||
void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
|
||||
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
|
||||
@ -435,6 +449,7 @@ void myriadhash(void *state, const void *input);
|
||||
void nist5hash(void *state, const void *input);
|
||||
void pentablakehash(void *output, const void *input);
|
||||
void quarkhash(void *state, const void *input);
|
||||
void qubithash(void *state, const void *input);
|
||||
void wcoinhash(void *state, const void *input);
|
||||
void x11hash(void *output, const void *input);
|
||||
void x13hash(void *output, const void *input);
|
||||
|
@ -6,14 +6,12 @@ extern "C"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_keccak.h"
|
||||
}
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// aus cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
||||
@ -43,10 +41,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
|
||||
extern void quark_jh512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||
uint32_t *d_nonces1, size_t *nrm1,
|
||||
@ -165,7 +159,6 @@ struct HashPredicate
|
||||
uint32_t m_startNonce;
|
||||
};
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
|
@ -14,7 +14,6 @@ extern "C"
|
||||
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
||||
@ -44,10 +43,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
|
||||
extern void quark_jh512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||
uint32_t *d_nonces1, size_t *nrm1,
|
||||
|
120
qubit/deep.cu
Normal file
120
qubit/deep.cu
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* deepcoin algorithm
|
||||
*
|
||||
*/
|
||||
extern "C" {
|
||||
#include "sph/sph_luffa.h"
|
||||
#include "sph/sph_cubehash.h"
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
extern int device_map[8];
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
|
||||
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
|
||||
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
|
||||
extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern "C" void deephash(void *state, const void *input)
|
||||
{
|
||||
// luffa1-cubehash2-shavite3-simd4-echo5
|
||||
sph_luffa512_context ctx_luffa;
|
||||
sph_cubehash512_context ctx_cubehash;
|
||||
sph_echo512_context ctx_echo;
|
||||
|
||||
uint8_t hash[64];
|
||||
|
||||
sph_luffa512_init(&ctx_luffa);
|
||||
sph_luffa512 (&ctx_luffa, input, 80);
|
||||
sph_luffa512_close(&ctx_luffa, (void*) hash);
|
||||
|
||||
sph_cubehash512_init(&ctx_cubehash);
|
||||
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
|
||||
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
|
||||
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512 (&ctx_echo, (const void*) hash, 64);
|
||||
sph_echo512_close(&ctx_echo, (void*) hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*256*8*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t endiandata[20];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
|
||||
|
||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||
x11_cubehash512_cpu_init(thr_id, throughput);
|
||||
x11_echo512_cpu_init(thr_id, throughput);
|
||||
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||
|
||||
qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int order = 0;
|
||||
|
||||
qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
deephash(vhash64, endiandata);
|
||||
|
||||
if (vhash64[7]<=Htarg && fulltest(vhash64, ptarget) )
|
||||
{
|
||||
pdata[19] = foundNonce;
|
||||
*hashes_done = foundNonce - first_nonce + 1;
|
||||
return 1;
|
||||
} else {
|
||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
|
||||
}
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
93
qubit/doom.cu
Normal file
93
qubit/doom.cu
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* qubit algorithm
|
||||
*
|
||||
*/
|
||||
extern "C" {
|
||||
#include "sph/sph_luffa.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
extern int device_map[8];
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
|
||||
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
|
||||
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
|
||||
extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
|
||||
extern void doomhash(void *state, const void *input)
|
||||
{
|
||||
// luffa512
|
||||
sph_luffa512_context ctx_luffa;
|
||||
|
||||
uint8_t hash[64];
|
||||
|
||||
sph_luffa512_init(&ctx_luffa);
|
||||
sph_luffa512 (&ctx_luffa, input, 80);
|
||||
sph_luffa512_close(&ctx_luffa, (void*) hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*256*8*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t endiandata[20];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
|
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
|
||||
|
||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||
|
||||
qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int order = 0;
|
||||
|
||||
uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
doomhash(vhash64, endiandata);
|
||||
|
||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
|
||||
{
|
||||
pdata[19] = foundNonce;
|
||||
*hashes_done = foundNonce - first_nonce + 1;
|
||||
return 1;
|
||||
} else {
|
||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
|
||||
}
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
146
qubit/qubit.cu
Normal file
146
qubit/qubit.cu
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* qubit algorithm
|
||||
*
|
||||
*/
|
||||
extern "C" {
|
||||
#include "sph/sph_luffa.h"
|
||||
#include "sph/sph_cubehash.h"
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
extern int device_map[8];
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
|
||||
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
|
||||
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_shavite512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_simd512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
int order);
|
||||
|
||||
extern "C" void qubithash(void *state, const void *input)
|
||||
{
|
||||
// luffa1-cubehash2-shavite3-simd4-echo5
|
||||
|
||||
sph_luffa512_context ctx_luffa;
|
||||
sph_cubehash512_context ctx_cubehash;
|
||||
sph_shavite512_context ctx_shavite;
|
||||
sph_simd512_context ctx_simd;
|
||||
sph_echo512_context ctx_echo;
|
||||
|
||||
uint8_t hash[64];
|
||||
|
||||
sph_luffa512_init(&ctx_luffa);
|
||||
sph_luffa512 (&ctx_luffa, input, 80);
|
||||
sph_luffa512_close(&ctx_luffa, (void*) hash);
|
||||
|
||||
sph_cubehash512_init(&ctx_cubehash);
|
||||
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
|
||||
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
|
||||
|
||||
sph_shavite512_init(&ctx_shavite);
|
||||
sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
|
||||
sph_shavite512_close(&ctx_shavite, (void*) hash);
|
||||
|
||||
sph_simd512_init(&ctx_simd);
|
||||
sph_simd512 (&ctx_simd, (const void*) hash, 64);
|
||||
sph_simd512_close(&ctx_simd, (void*) hash);
|
||||
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512 (&ctx_echo, (const void*) hash, 64);
|
||||
sph_echo512_close(&ctx_echo, (void*) hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*256*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t endiandata[20];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
|
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
|
||||
|
||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||
x11_cubehash512_cpu_init(thr_id, throughput);
|
||||
x11_shavite512_cpu_init(thr_id, throughput);
|
||||
x11_simd512_cpu_init(thr_id, throughput);
|
||||
x11_echo512_cpu_init(thr_id, throughput);
|
||||
|
||||
cuda_check_cpu_init(thr_id, throughput);
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||
|
||||
qubit_luffa512_cpu_setBlock_80((void*)endiandata);
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int order = 0;
|
||||
|
||||
// Hash with CUDA
|
||||
qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
|
||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
qubithash(vhash64, endiandata);
|
||||
|
||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
|
||||
{
|
||||
pdata[19] = foundNonce;
|
||||
*hashes_done = foundNonce - first_nonce + 1;
|
||||
return 1;
|
||||
} else {
|
||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
|
||||
}
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
496
qubit/qubit_luffa512.cu
Normal file
496
qubit/qubit_luffa512.cu
Normal file
@ -0,0 +1,496 @@
|
||||
/*
|
||||
* luffa_for_32.c
|
||||
* Version 2.0 (Sep 15th 2009)
|
||||
*
|
||||
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
|
||||
*
|
||||
* Hitachi, Ltd. is the owner of this software and hereby grant
|
||||
* the U.S. Government and any interested party the right to use
|
||||
* this software for the purposes of the SHA-3 evaluation process,
|
||||
* notwithstanding that this software is copyrighted.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
|
||||
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
||||
__constant__ uint32_t pTarget[8];
|
||||
|
||||
uint32_t *d_lnounce[8];
|
||||
uint32_t *d_LNonce[8];
|
||||
|
||||
typedef struct {
|
||||
uint32_t buffer[8]; /* Buffer to be hashed */
|
||||
uint32_t chainv[40]; /* Chaining values */
|
||||
} hashState;
|
||||
|
||||
#define BYTES_SWAP32(x) cuda_swab32(x)
|
||||
|
||||
#define MULT2(a,j)\
|
||||
tmp = a[7+(8*j)];\
|
||||
a[7+(8*j)] = a[6+(8*j)];\
|
||||
a[6+(8*j)] = a[5+(8*j)];\
|
||||
a[5+(8*j)] = a[4+(8*j)];\
|
||||
a[4+(8*j)] = a[3+(8*j)] ^ tmp;\
|
||||
a[3+(8*j)] = a[2+(8*j)] ^ tmp;\
|
||||
a[2+(8*j)] = a[1+(8*j)];\
|
||||
a[1+(8*j)] = a[0+(8*j)] ^ tmp;\
|
||||
a[0+(8*j)] = tmp;
|
||||
|
||||
#define TWEAK(a0,a1,a2,a3,j)\
|
||||
a0 = (a0<<(j))|(a0>>(32-j));\
|
||||
a1 = (a1<<(j))|(a1>>(32-j));\
|
||||
a2 = (a2<<(j))|(a2>>(32-j));\
|
||||
a3 = (a3<<(j))|(a3>>(32-j));
|
||||
|
||||
#define STEP(c0,c1)\
|
||||
SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\
|
||||
SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\
|
||||
MIXWORD(chainv[0],chainv[4]);\
|
||||
MIXWORD(chainv[1],chainv[5]);\
|
||||
MIXWORD(chainv[2],chainv[6]);\
|
||||
MIXWORD(chainv[3],chainv[7]);\
|
||||
ADD_CONSTANT(chainv[0],chainv[4],c0,c1);
|
||||
|
||||
#define SUBCRUMB(a0,a1,a2,a3,a4)\
|
||||
a4 = a0;\
|
||||
a0 |= a1;\
|
||||
a2 ^= a3;\
|
||||
a1 = ~a1;\
|
||||
a0 ^= a3;\
|
||||
a3 &= a4;\
|
||||
a1 ^= a3;\
|
||||
a3 ^= a2;\
|
||||
a2 &= a0;\
|
||||
a0 = ~a0;\
|
||||
a2 ^= a1;\
|
||||
a1 |= a3;\
|
||||
a4 ^= a1;\
|
||||
a3 ^= a2;\
|
||||
a2 &= a1;\
|
||||
a1 ^= a0;\
|
||||
a0 = a4;
|
||||
|
||||
#define MIXWORD(a0,a4)\
|
||||
a4 ^= a0;\
|
||||
a0 = (a0<<2) | (a0>>(30));\
|
||||
a0 ^= a4;\
|
||||
a4 = (a4<<14) | (a4>>(18));\
|
||||
a4 ^= a0;\
|
||||
a0 = (a0<<10) | (a0>>(22));\
|
||||
a0 ^= a4;\
|
||||
a4 = (a4<<1) | (a4>>(31));
|
||||
|
||||
#define ADD_CONSTANT(a0,b0,c0,c1)\
|
||||
a0 ^= c0;\
|
||||
b0 ^= c1;
|
||||
|
||||
/* initial values of chaining variables */
|
||||
__constant__ uint32_t c_IV[40];
|
||||
const uint32_t h2_IV[40] = {
|
||||
0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
|
||||
0x6e292011,0x90152df4,0xee058139,0xdef610bb,
|
||||
0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
|
||||
0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581,
|
||||
0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05,
|
||||
0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7,
|
||||
0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67,
|
||||
0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce,
|
||||
0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
|
||||
0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};
|
||||
|
||||
__constant__ uint32_t c_CNS[80];
|
||||
uint32_t h2_CNS[80] = {
|
||||
0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
|
||||
0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
|
||||
0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
|
||||
0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d,
|
||||
0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4,
|
||||
0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28,
|
||||
0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b,
|
||||
0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704,
|
||||
0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72,
|
||||
0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7,
|
||||
0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719,
|
||||
0xd9847356,0x36eda57f,0xa2c78434,0x703aace7,
|
||||
0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91,
|
||||
0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be,
|
||||
0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5,
|
||||
0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355,
|
||||
0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab,
|
||||
0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0,
|
||||
0x78602649,0x29131ab6,0x8edae952,0x0fc053c3,
|
||||
0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31};
|
||||
|
||||
|
||||
/***************************************************/
|
||||
__device__ __forceinline__
|
||||
void rnd512(hashState *state)
|
||||
{
|
||||
int i,j;
|
||||
uint32_t t[40];
|
||||
uint32_t chainv[8];
|
||||
uint32_t tmp;
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
t[i]=0;
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
t[i] ^= state->chainv[i+8*j];
|
||||
}
|
||||
}
|
||||
|
||||
MULT2(t, 0);
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+8*j] ^= t[i];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
t[i+8*j] = state->chainv[i+8*j];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
MULT2(state->chainv, j);
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[8*j+i] ^= t[8*((j+1)%5)+i];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
t[i+8*j] = state->chainv[i+8*j];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
MULT2(state->chainv, j);
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[8*j+i] ^= t[8*((j+4)%5)+i];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+8*j] ^= state->buffer[i];
|
||||
}
|
||||
MULT2(state->buffer, 0);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
chainv[i] = state->chainv[i];
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i] = chainv[i];
|
||||
chainv[i] = state->chainv[i+8];
|
||||
}
|
||||
|
||||
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+8] = chainv[i];
|
||||
chainv[i] = state->chainv[i+16];
|
||||
}
|
||||
|
||||
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+16] = chainv[i];
|
||||
chainv[i] = state->chainv[i+24];
|
||||
}
|
||||
|
||||
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+24] = chainv[i];
|
||||
chainv[i] = state->chainv[i+32];
|
||||
}
|
||||
|
||||
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
state->chainv[i+32] = chainv[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__device__ __forceinline__
|
||||
void Update512(hashState *state, const BitSequence *data)
|
||||
{
|
||||
#pragma unroll 8
|
||||
for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]);
|
||||
rnd512(state);
|
||||
|
||||
#pragma unroll 8
|
||||
for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]);
|
||||
rnd512(state);
|
||||
#pragma unroll 4
|
||||
for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]);
|
||||
}
|
||||
|
||||
|
||||
/***************************************************/
|
||||
__device__ __forceinline__
|
||||
void finalization512(hashState *state, uint32_t *b)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
state->buffer[4] = 0x80000000;
|
||||
#pragma unroll 3
|
||||
for(int i=5;i<8;i++) state->buffer[i] = 0;
|
||||
rnd512(state);
|
||||
|
||||
/*---- blank round with m=0 ----*/
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) state->buffer[i] =0;
|
||||
rnd512(state);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
b[i] = 0;
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
b[i] ^= state->chainv[i+8*j];
|
||||
}
|
||||
b[i] = BYTES_SWAP32((b[i]));
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) state->buffer[i]=0;
|
||||
rnd512(state);
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++) {
|
||||
b[8+i] = 0;
|
||||
#pragma unroll 5
|
||||
for(j=0;j<5;j++) {
|
||||
b[8+i] ^= state->chainv[i+8*j];
|
||||
}
|
||||
b[8+i] = BYTES_SWAP32((b[8+i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************************************************/
|
||||
// Die Hash-Funktion
|
||||
__global__
|
||||
void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t nounce = startNounce + thread;
|
||||
union {
|
||||
uint64_t buf64[16];
|
||||
uint32_t buf32[32];
|
||||
} buff;
|
||||
|
||||
#pragma unroll 16
|
||||
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
|
||||
|
||||
// die Nounce durch die thread-spezifische ersetzen
|
||||
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
|
||||
|
||||
|
||||
hashState state;
|
||||
#pragma unroll 40
|
||||
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
|
||||
#pragma unroll 8
|
||||
for(int i=0;i<8;i++) state.buffer[i] = 0;
|
||||
Update512(&state, (BitSequence*)buff.buf32);
|
||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||
finalization512(&state, (uint32_t*)outHash);
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t nounce = startNounce + thread;
|
||||
union {
|
||||
uint64_t buf64[16];
|
||||
uint32_t buf32[32];
|
||||
} buff;
|
||||
uint32_t Hash[16];
|
||||
|
||||
#pragma unroll 16
|
||||
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
|
||||
|
||||
// die Nounce durch die thread-spezifische ersetzen
|
||||
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
|
||||
|
||||
|
||||
hashState state;
|
||||
#pragma unroll 40
|
||||
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
|
||||
#pragma unroll 8
|
||||
for(int i=0;i<8;i++) state.buffer[i] = 0;
|
||||
Update512(&state, (BitSequence*)buff.buf32);
|
||||
finalization512(&state, Hash);
|
||||
|
||||
bool rc = true;
|
||||
int position = -1;
|
||||
#pragma unroll 8
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (Hash[i] > pTarget[i]) {
|
||||
if(position < i) {
|
||||
position = i;
|
||||
rc = false;
|
||||
}
|
||||
|
||||
}
|
||||
if (Hash[i] < pTarget[i]) {
|
||||
if(position < i) {
|
||||
position = i;
|
||||
rc = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(rc && resNounce[0] > nounce)
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpu_init(int thr_id, int threads)
|
||||
{
|
||||
cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
|
||||
cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
|
||||
cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
|
||||
cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
|
||||
}
|
||||
|
||||
__host__
|
||||
uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
|
||||
{
|
||||
uint32_t result = 0xffffffff;
|
||||
cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
size_t shared_size = 0;
|
||||
|
||||
qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
//cudaThreadSynchronize();
|
||||
result = *d_lnounce[thr_id];
|
||||
return result;
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
|
||||
{
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpu_setBlock_80(void *pdata)
|
||||
{
|
||||
unsigned char PaddedMessage[128];
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
PaddedMessage[80] = 0x80;
|
||||
PaddedMessage[111] = 1;
|
||||
PaddedMessage[126] = 0x02;
|
||||
PaddedMessage[127] = 0x80;
|
||||
|
||||
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
|
||||
{
|
||||
unsigned char PaddedMessage[128];
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
PaddedMessage[80] = 0x80;
|
||||
PaddedMessage[111] = 1;
|
||||
PaddedMessage[126] = 0x02;
|
||||
PaddedMessage[127] = 0x80;
|
||||
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
}
|
12
util.c
12
util.c
@ -1440,6 +1440,14 @@ void print_hash_tests(void)
|
||||
blake256hash(&hash[0], &buf[0], 14);
|
||||
printpfx("blake", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
deephash(&hash[0], &buf[0]);
|
||||
printpfx("deep", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
doomhash(&hash[0], &buf[0]);
|
||||
printpfx("doom", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
fresh_hash(&hash[0], &buf[0]);
|
||||
printpfx("fresh", hash);
|
||||
@ -1476,6 +1484,10 @@ void print_hash_tests(void)
|
||||
quarkhash(&hash[0], &buf[0]);
|
||||
printpfx("quark", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
qubithash(&hash[0], &buf[0]);
|
||||
printpfx("qubit", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
wcoinhash(&hash[0], &buf[0]);
|
||||
printpfx("whirl", hash);
|
||||
|
26
x11/fresh.cu
26
x11/fresh.cu
@ -5,9 +5,9 @@ extern "C" {
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
}
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// to test gpu hash on a null buffer
|
||||
#define NULLTEST 0
|
||||
@ -15,7 +15,6 @@ extern "C" {
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern int device_map[8];
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern void x11_shavite512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_shavite512_setBlock_80(void *pdata);
|
||||
@ -28,10 +27,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
|
||||
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
@ -75,15 +70,6 @@ extern "C" void fresh_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
#if NULLTEST
|
||||
static void print_hash(unsigned char *hash)
|
||||
{
|
||||
for (int i=0; i < 32; i += 4) {
|
||||
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
@ -92,15 +78,9 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||
const int throughput = 256*256*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t endiandata[20];
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||
|
||||
#if NULLTEST
|
||||
for (int k=0; k < 20; k++)
|
||||
pdata[k] = 0;
|
||||
#endif
|
||||
((uint32_t*)ptarget)[7] = 0x00ff;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
@ -123,6 +103,8 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
|
||||
x11_shavite512_setBlock_80((void*)endiandata);
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
do {
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t foundNonce;
|
||||
int order = 0;
|
||||
|
||||
|
10
x11/x11.cu
10
x11/x11.cu
@ -1,4 +1,3 @@
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "sph/sph_blake.h"
|
||||
@ -13,13 +12,13 @@ extern "C"
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
}
|
||||
|
||||
// in cpu-miner.c
|
||||
extern int device_map[8];
|
||||
@ -62,14 +61,9 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
|
||||
extern void x11_echo512_cpu_init(int thr_id, int threads);
|
||||
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
int order);
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||
|
||||
// X11 Hashfunktion
|
||||
extern "C" void x11hash(void *output, const void *input)
|
||||
|
16
x13/x13.cu
16
x13/x13.cu
@ -18,18 +18,16 @@ extern "C"
|
||||
|
||||
#include "sph/sph_hamsi.h"
|
||||
#include "sph/sph_fugue.h"
|
||||
|
||||
}
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// aus cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
|
||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
@ -40,7 +38,6 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
|
||||
|
||||
extern void quark_groestl512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void quark_skein512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
@ -72,14 +69,9 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
|
||||
extern void x13_fugue512_cpu_init(int thr_id, int threads);
|
||||
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
int order);
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||
|
||||
// X13 Hashfunktion
|
||||
extern "C" void x13hash(void *output, const void *input)
|
||||
@ -159,8 +151,6 @@ extern "C" void x13hash(void *output, const void *input)
|
||||
}
|
||||
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
|
@ -9,20 +9,17 @@ extern "C"
|
||||
|
||||
// from cpu-miner.c
|
||||
extern int device_map[8];
|
||||
extern bool opt_benchmark;
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
|
||||
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget);
|
||||
extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
// CPU Hash function
|
||||
extern "C" void wcoinhash(void *state, const void *input)
|
||||
|
@ -76,10 +76,6 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
|
||||
extern void x14_shabal512_cpu_init(int thr_id, int threads);
|
||||
extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||
|
@ -81,10 +81,6 @@ extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
|
||||
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
extern void x15_whirlpool_cpu_free(int thr_id);
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||
|
@ -89,11 +89,6 @@ extern void x17_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce
|
||||
extern void x17_haval256_cpu_init(int thr_id, int threads);
|
||||
extern void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||
|
||||
|
||||
extern void cuda_check_cpu_init(int thr_id, int threads);
|
||||
extern void cuda_check_cpu_setTarget(const void *ptarget);
|
||||
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||
|
||||
extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
||||
|
Loading…
Reference in New Issue
Block a user