mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-09 22:38:05 +00:00
bump to revision v0.8
This commit is contained in:
parent
61cbdc62d0
commit
6c8eff98c0
14
Makefile.am
14
Makefile.am
@ -18,13 +18,13 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
compat/sys/time.h compat/getopt/getopt.h \
|
compat/sys/time.h compat/getopt/getopt.h \
|
||||||
cpu-miner.c util.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
|
cpu-miner.c util.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
|
||||||
sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
|
sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
|
||||||
heavy.cu \
|
heavy/heavy.cu \
|
||||||
cuda_blake512.cu cuda_blake512.h \
|
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
|
||||||
cuda_combine.cu cuda_combine.h \
|
heavy/cuda_combine.cu heavy/cuda_combine.h \
|
||||||
cuda_groestl512.cu cuda_groestl512.h \
|
heavy/cuda_groestl512.cu heavy/cuda_groestl512.h \
|
||||||
cuda_hefty1.cu cuda_hefty1.h \
|
heavy/cuda_hefty1.cu heavy/cuda_hefty1.h \
|
||||||
cuda_keccak512.cu cuda_keccak512.h \
|
heavy/cuda_keccak512.cu heavy/cuda_keccak512.h \
|
||||||
cuda_sha256.cu cuda_sha256.h \
|
heavy/cuda_sha256.cu heavy/cuda_sha256.h \
|
||||||
fuguecoin.cpp cuda_fugue256.cu sph/fugue.c sph/sph_fugue.h uint256.h \
|
fuguecoin.cpp cuda_fugue256.cu sph/fugue.c sph/sph_fugue.h uint256.h \
|
||||||
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
|
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
|
||||||
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
|
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
|
||||||
|
14
README.txt
14
README.txt
@ -12,9 +12,8 @@ If you find this tool useful and like to support its continued
|
|||||||
VTC donation address: VrjeFzMgvteCGarLw85KivBzmsiH9fqp4a
|
VTC donation address: VrjeFzMgvteCGarLw85KivBzmsiH9fqp4a
|
||||||
MAX donation address: mHrhQP9EFArechWxTFJ97s9D3jvcCvEEnt
|
MAX donation address: mHrhQP9EFArechWxTFJ97s9D3jvcCvEEnt
|
||||||
DOGE donation address: DT9ghsGmez6ojVdEZgvaZbT2Z3TruXG6yP
|
DOGE donation address: DT9ghsGmez6ojVdEZgvaZbT2Z3TruXG6yP
|
||||||
PANDA donation address: PvgtxJ2ZKaudRogCXfUMLXVaWUMcKQgRed
|
|
||||||
MRC donation address: 1Lxc4JPDpQRJB8BN4YwhmSQ3Rcu8gjj2Kd
|
|
||||||
HVC donation address: HNN3PyyTMkDo4RkEjkWSGMwqia1yD8mwJN
|
HVC donation address: HNN3PyyTMkDo4RkEjkWSGMwqia1yD8mwJN
|
||||||
|
GRS donation address: FmJKJAhvyHWPeEVeLQHefr2naqgWc9ABTM
|
||||||
***************************************************************
|
***************************************************************
|
||||||
|
|
||||||
>>> Introduction <<<
|
>>> Introduction <<<
|
||||||
@ -35,6 +34,7 @@ its command line interface and options.
|
|||||||
|
|
||||||
-a, --algo=ALGO specify the algorithm to use
|
-a, --algo=ALGO specify the algorithm to use
|
||||||
heavy use to mine Heavycoin
|
heavy use to mine Heavycoin
|
||||||
|
mjollnir use to mine Mjollnircoin
|
||||||
fugue256 use to mine Fuguecoin
|
fugue256 use to mine Fuguecoin
|
||||||
groestl use to mine Groestlcoin
|
groestl use to mine Groestlcoin
|
||||||
myr-gr use to mine Myriad-Groestl
|
myr-gr use to mine Myriad-Groestl
|
||||||
@ -45,11 +45,12 @@ its command line interface and options.
|
|||||||
Alternatively give string names of your card like
|
Alternatively give string names of your card like
|
||||||
gtx780ti or gt640#2 (matching 2nd gt640 in the PC).
|
gtx780ti or gt640#2 (matching 2nd gt640 in the PC).
|
||||||
|
|
||||||
|
-f, --diff Divide difficulty by this factor (std is 1) \n\
|
||||||
|
-v, --vote Heavycoin block vote (default: 512)
|
||||||
-o, --url=URL URL of mining server (default: " DEF_RPC_URL ")
|
-o, --url=URL URL of mining server (default: " DEF_RPC_URL ")
|
||||||
-O, --userpass=U:P username:password pair for mining server
|
-O, --userpass=U:P username:password pair for mining server
|
||||||
-u, --user=USERNAME username for mining server
|
-u, --user=USERNAME username for mining server
|
||||||
-p, --pass=PASSWORD password for mining server
|
-p, --pass=PASSWORD password for mining server
|
||||||
-v, --vote Heavycoin block vote (default: 512)
|
|
||||||
--cert=FILE certificate for mining server using SSL
|
--cert=FILE certificate for mining server using SSL
|
||||||
-x, --proxy=[PROTOCOL://]HOST[:PORT] connect through a proxy
|
-x, --proxy=[PROTOCOL://]HOST[:PORT] connect through a proxy
|
||||||
-t, --threads=N number of miner threads (default: number of nVidia GPUs in your system)
|
-t, --threads=N number of miner threads (default: number of nVidia GPUs in your system)
|
||||||
@ -116,6 +117,13 @@ from your old clunkers.
|
|||||||
|
|
||||||
>>> RELEASE HISTORY <<<
|
>>> RELEASE HISTORY <<<
|
||||||
|
|
||||||
|
May 3rd 2014 add the MjollnirCoin hash algorithm for the upcomin
|
||||||
|
MjollnirCoin relaunch.
|
||||||
|
|
||||||
|
Add the -f (--diff) option to adjust the difficulty
|
||||||
|
e.g. for the erebor Dwarfpool myr-gr SaffronCoin pool.
|
||||||
|
Use -f 256 there.
|
||||||
|
|
||||||
May 1st 2014 adapt the Jackpot algorithms to changes made by the
|
May 1st 2014 adapt the Jackpot algorithms to changes made by the
|
||||||
coin developers. We keep our unique nVidia advantage
|
coin developers. We keep our unique nVidia advantage
|
||||||
because we have a way to break up the divergence.
|
because we have a way to break up the divergence.
|
||||||
|
@ -264,14 +264,15 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<ClInclude Include="compat\sys\time.h" />
|
<ClInclude Include="compat\sys\time.h" />
|
||||||
<ClInclude Include="compat\unistd.h" />
|
<ClInclude Include="compat\unistd.h" />
|
||||||
<ClInclude Include="cpuminer-config.h" />
|
<ClInclude Include="cpuminer-config.h" />
|
||||||
<ClInclude Include="cuda_blake512.h" />
|
|
||||||
<ClInclude Include="cuda_combine.h" />
|
|
||||||
<ClInclude Include="cuda_groestl512.h" />
|
|
||||||
<ClInclude Include="cuda_groestlcoin.h" />
|
<ClInclude Include="cuda_groestlcoin.h" />
|
||||||
<ClInclude Include="cuda_hefty1.h" />
|
<ClInclude Include="cuda_helper.h" />
|
||||||
<ClInclude Include="cuda_keccak512.h" />
|
|
||||||
<ClInclude Include="cuda_sha256.h" />
|
|
||||||
<ClInclude Include="elist.h" />
|
<ClInclude Include="elist.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_blake512.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_combine.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_groestl512.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_hefty1.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_keccak512.h" />
|
||||||
|
<ClInclude Include="heavy\cuda_sha256.h" />
|
||||||
<ClInclude Include="hefty1.h" />
|
<ClInclude Include="hefty1.h" />
|
||||||
<ClInclude Include="miner.h" />
|
<ClInclude Include="miner.h" />
|
||||||
<ClInclude Include="sph\sph_blake.h" />
|
<ClInclude Include="sph\sph_blake.h" />
|
||||||
@ -290,16 +291,16 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<ClInclude Include="uint256.h" />
|
<ClInclude Include="uint256.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CudaCompile Include="cuda_blake512.cu" />
|
|
||||||
<CudaCompile Include="cuda_combine.cu" />
|
|
||||||
<CudaCompile Include="cuda_fugue256.cu" />
|
<CudaCompile Include="cuda_fugue256.cu" />
|
||||||
<CudaCompile Include="cuda_groestl512.cu" />
|
|
||||||
<CudaCompile Include="cuda_groestlcoin.cu" />
|
<CudaCompile Include="cuda_groestlcoin.cu" />
|
||||||
<CudaCompile Include="cuda_hefty1.cu" />
|
|
||||||
<CudaCompile Include="cuda_keccak512.cu" />
|
|
||||||
<CudaCompile Include="cuda_myriadgroestl.cu" />
|
<CudaCompile Include="cuda_myriadgroestl.cu" />
|
||||||
<CudaCompile Include="cuda_sha256.cu" />
|
<CudaCompile Include="heavy\cuda_blake512.cu" />
|
||||||
<CudaCompile Include="heavy.cu" />
|
<CudaCompile Include="heavy\cuda_combine.cu" />
|
||||||
|
<CudaCompile Include="heavy\cuda_groestl512.cu" />
|
||||||
|
<CudaCompile Include="heavy\cuda_hefty1.cu" />
|
||||||
|
<CudaCompile Include="heavy\cuda_keccak512.cu" />
|
||||||
|
<CudaCompile Include="heavy\cuda_sha256.cu" />
|
||||||
|
<CudaCompile Include="heavy\heavy.cu" />
|
||||||
<CudaCompile Include="JHA\cuda_jha_compactionTest.cu" />
|
<CudaCompile Include="JHA\cuda_jha_compactionTest.cu" />
|
||||||
<CudaCompile Include="JHA\cuda_jha_keccak512.cu" />
|
<CudaCompile Include="JHA\cuda_jha_keccak512.cu" />
|
||||||
<CudaCompile Include="JHA\jackpotcoin.cu" />
|
<CudaCompile Include="JHA\jackpotcoin.cu" />
|
||||||
|
@ -46,6 +46,12 @@
|
|||||||
<Filter Include="Header Files\sph">
|
<Filter Include="Header Files\sph">
|
||||||
<UniqueIdentifier>{7c2a98c6-064c-4a69-b803-d6f6ff5edd0b}</UniqueIdentifier>
|
<UniqueIdentifier>{7c2a98c6-064c-4a69-b803-d6f6ff5edd0b}</UniqueIdentifier>
|
||||||
</Filter>
|
</Filter>
|
||||||
|
<Filter Include="Source Files\CUDA\heavy">
|
||||||
|
<UniqueIdentifier>{c3222908-22ba-4586-a637-6363f455b06d}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files\CUDA\heavy">
|
||||||
|
<UniqueIdentifier>{3281db48-f394-49ea-a1ef-6ebd09828d50}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="compat\jansson\dump.c">
|
<ClCompile Include="compat\jansson\dump.c">
|
||||||
@ -167,24 +173,6 @@
|
|||||||
<ClInclude Include="hefty1.h">
|
<ClInclude Include="hefty1.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="cuda_sha256.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="cuda_hefty1.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="cuda_keccak512.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="cuda_combine.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="cuda_blake512.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="cuda_groestl512.h">
|
|
||||||
<Filter>Header Files\CUDA</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="uint256.h">
|
<ClInclude Include="uint256.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
@ -230,29 +218,29 @@
|
|||||||
<ClInclude Include="sph\sph_types.h">
|
<ClInclude Include="sph\sph_types.h">
|
||||||
<Filter>Header Files\sph</Filter>
|
<Filter>Header Files\sph</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_blake512.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_combine.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_groestl512.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_hefty1.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_keccak512.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="heavy\cuda_sha256.h">
|
||||||
|
<Filter>Header Files\CUDA\heavy</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="cuda_helper.h">
|
||||||
|
<Filter>Header Files\CUDA</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CudaCompile Include="cuda_sha256.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_blake512.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_groestl512.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_hefty1.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_keccak512.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_combine.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="heavy.cu">
|
|
||||||
<Filter>Source Files\CUDA</Filter>
|
|
||||||
</CudaCompile>
|
|
||||||
<CudaCompile Include="cuda_fugue256.cu">
|
<CudaCompile Include="cuda_fugue256.cu">
|
||||||
<Filter>Source Files\CUDA</Filter>
|
<Filter>Source Files\CUDA</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
@ -286,5 +274,26 @@
|
|||||||
<CudaCompile Include="quark\cuda_skein512.cu">
|
<CudaCompile Include="quark\cuda_skein512.cu">
|
||||||
<Filter>Source Files\CUDA\quark</Filter>
|
<Filter>Source Files\CUDA\quark</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_blake512.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_combine.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_groestl512.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_hefty1.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_keccak512.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_sha256.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\heavy.cu">
|
||||||
|
<Filter>Source Files\CUDA\heavy</Filter>
|
||||||
|
</CudaCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
@ -1,4 +1,4 @@
|
|||||||
AC_INIT([ccminer], [2014.05.01])
|
AC_INIT([ccminer], [2014.05.03])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
34
cpu-miner.c
34
cpu-miner.c
@ -47,6 +47,7 @@
|
|||||||
#define PROGRAM_NAME "minerd"
|
#define PROGRAM_NAME "minerd"
|
||||||
#define LP_SCANTIME 60
|
#define LP_SCANTIME 60
|
||||||
#define HEAVYCOIN_BLKHDR_SZ 84
|
#define HEAVYCOIN_BLKHDR_SZ 84
|
||||||
|
#define MNR_BLKHDR_SZ 80
|
||||||
|
|
||||||
// from heavy.cu
|
// from heavy.cu
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
@ -121,6 +122,7 @@ struct workio_cmd {
|
|||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
ALGO_HEAVY, /* Heavycoin hash */
|
ALGO_HEAVY, /* Heavycoin hash */
|
||||||
|
ALGO_MJOLLNIR, /* Mjollnir hash */
|
||||||
ALGO_FUGUE256, /* Fugue256 */
|
ALGO_FUGUE256, /* Fugue256 */
|
||||||
ALGO_GROESTL,
|
ALGO_GROESTL,
|
||||||
ALGO_MYR_GR,
|
ALGO_MYR_GR,
|
||||||
@ -129,6 +131,7 @@ typedef enum {
|
|||||||
|
|
||||||
static const char *algo_names[] = {
|
static const char *algo_names[] = {
|
||||||
"heavy",
|
"heavy",
|
||||||
|
"mjollnir",
|
||||||
"fugue256",
|
"fugue256",
|
||||||
"groestl",
|
"groestl",
|
||||||
"myr-gr",
|
"myr-gr",
|
||||||
@ -154,6 +157,7 @@ static json_t *opt_config;
|
|||||||
static const bool opt_time = true;
|
static const bool opt_time = true;
|
||||||
static sha256_algos opt_algo = ALGO_HEAVY;
|
static sha256_algos opt_algo = ALGO_HEAVY;
|
||||||
static int opt_n_threads = 0;
|
static int opt_n_threads = 0;
|
||||||
|
static double opt_difficulty = 1; // CH
|
||||||
bool opt_trust_pool = false;
|
bool opt_trust_pool = false;
|
||||||
uint16_t opt_vote = 9999;
|
uint16_t opt_vote = 9999;
|
||||||
static int num_processors;
|
static int num_processors;
|
||||||
@ -195,6 +199,7 @@ Options:\n\
|
|||||||
-a, --algo=ALGO specify the algorithm to use\n\
|
-a, --algo=ALGO specify the algorithm to use\n\
|
||||||
fugue256 Fuguecoin hash\n\
|
fugue256 Fuguecoin hash\n\
|
||||||
heavy Heavycoin hash\n\
|
heavy Heavycoin hash\n\
|
||||||
|
mjollnir Mjollnircoin hash\n\
|
||||||
groestl Groestlcoin hash\n\
|
groestl Groestlcoin hash\n\
|
||||||
myr-gr Myriad-Groestl hash\n\
|
myr-gr Myriad-Groestl hash\n\
|
||||||
jackpot Jackpot hash\n\
|
jackpot Jackpot hash\n\
|
||||||
@ -244,7 +249,7 @@ static char const short_options[] =
|
|||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
"S"
|
"S"
|
||||||
#endif
|
#endif
|
||||||
"a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:mv:";
|
"a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
|
||||||
|
|
||||||
static struct option const options[] = {
|
static struct option const options[] = {
|
||||||
{ "algo", 1, NULL, 'a' },
|
{ "algo", 1, NULL, 'a' },
|
||||||
@ -277,6 +282,7 @@ static struct option const options[] = {
|
|||||||
{ "userpass", 1, NULL, 'O' },
|
{ "userpass", 1, NULL, 'O' },
|
||||||
{ "version", 0, NULL, 'V' },
|
{ "version", 0, NULL, 'V' },
|
||||||
{ "devices", 1, NULL, 'd' },
|
{ "devices", 1, NULL, 'd' },
|
||||||
|
{ "diff", 1, NULL, 'f' },
|
||||||
{ 0, 0, 0, 0 }
|
{ 0, 0, 0, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -684,7 +690,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
|||||||
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
|
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
|
||||||
|
|
||||||
/* Generate merkle root */
|
/* Generate merkle root */
|
||||||
if (opt_algo == ALGO_HEAVY)
|
if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
|
||||||
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||||
else
|
else
|
||||||
if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
|
if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
|
||||||
@ -694,7 +700,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
|||||||
|
|
||||||
for (i = 0; i < sctx->job.merkle_count; i++) {
|
for (i = 0; i < sctx->job.merkle_count; i++) {
|
||||||
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
|
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
|
||||||
if (opt_algo == ALGO_HEAVY)
|
if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
|
||||||
heavycoin_hash(merkle_root, merkle_root, 64);
|
heavycoin_hash(merkle_root, merkle_root, 64);
|
||||||
else
|
else
|
||||||
sha256d(merkle_root, merkle_root, 64);
|
sha256d(merkle_root, merkle_root, 64);
|
||||||
@ -738,11 +744,11 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (opt_algo == ALGO_JACKPOT)
|
if (opt_algo == ALGO_JACKPOT)
|
||||||
diff_to_target(work->target, sctx->job.diff / 65536.0);
|
diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
|
||||||
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
|
else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
|
||||||
diff_to_target(work->target, sctx->job.diff / 256.0);
|
diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
|
||||||
else
|
else
|
||||||
diff_to_target(work->target, sctx->job.diff);
|
diff_to_target(work->target, sctx->job.diff / opt_difficulty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *miner_thread(void *userdata)
|
static void *miner_thread(void *userdata)
|
||||||
@ -836,7 +842,12 @@ static void *miner_thread(void *userdata)
|
|||||||
|
|
||||||
case ALGO_HEAVY:
|
case ALGO_HEAVY:
|
||||||
rc = scanhash_heavy(thr_id, work.data, work.target,
|
rc = scanhash_heavy(thr_id, work.data, work.target,
|
||||||
max_nonce, &hashes_done, work.maxvote);
|
max_nonce, &hashes_done, work.maxvote, HEAVYCOIN_BLKHDR_SZ);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ALGO_MJOLLNIR:
|
||||||
|
rc = scanhash_heavy(thr_id, work.data, work.target,
|
||||||
|
max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ALGO_FUGUE256:
|
case ALGO_FUGUE256:
|
||||||
@ -1112,6 +1123,7 @@ static void parse_arg (int key, char *arg)
|
|||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
int v, i;
|
int v, i;
|
||||||
|
double d;
|
||||||
|
|
||||||
switch(key) {
|
switch(key) {
|
||||||
case 'a':
|
case 'a':
|
||||||
@ -1309,6 +1321,12 @@ static void parse_arg (int key, char *arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'f': // CH - Divisor for Difficulty
|
||||||
|
d = atof(arg);
|
||||||
|
if (d == 0) /* sanity check */
|
||||||
|
show_usage_and_exit(1);
|
||||||
|
opt_difficulty = d;
|
||||||
|
break;
|
||||||
case 'V':
|
case 'V':
|
||||||
show_version_and_exit();
|
show_version_and_exit();
|
||||||
case 'h':
|
case 'h':
|
||||||
@ -1404,7 +1422,7 @@ static void signal_handler(int sig)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define PROGRAM_VERSION "0.7"
|
#define PROGRAM_VERSION "0.8"
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
struct thr_info *thr;
|
struct thr_info *thr;
|
||||||
|
@ -152,7 +152,7 @@
|
|||||||
#define PACKAGE_NAME "ccminer"
|
#define PACKAGE_NAME "ccminer"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "ccminer 2014.05.01"
|
#define PACKAGE_STRING "ccminer 2014.05.03"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#undef PACKAGE_TARNAME
|
#undef PACKAGE_TARNAME
|
||||||
@ -161,7 +161,7 @@
|
|||||||
#undef PACKAGE_URL
|
#undef PACKAGE_URL
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "2014.05.01"
|
#define PACKAGE_VERSION "2014.05.03"
|
||||||
|
|
||||||
/* If using the C implementation of alloca, define if you know the
|
/* If using the C implementation of alloca, define if you know the
|
||||||
direction of stack growth for your system; otherwise it will be
|
direction of stack growth for your system; otherwise it will be
|
||||||
|
@ -16,9 +16,6 @@ extern int device_map[8];
|
|||||||
// aus heavy.cu
|
// aus heavy.cu
|
||||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
|
|
||||||
// aus driver.c
|
|
||||||
extern "C" void set_device(int device);
|
|
||||||
|
|
||||||
// Folgende Definitionen später durch header ersetzen
|
// Folgende Definitionen später durch header ersetzen
|
||||||
typedef unsigned char uint8_t;
|
typedef unsigned char uint8_t;
|
||||||
typedef unsigned int uint32_t;
|
typedef unsigned int uint32_t;
|
||||||
|
@ -17,8 +17,8 @@ extern uint32_t *d_nonceVector[8];
|
|||||||
// globaler Speicher für unsere Ergebnisse
|
// globaler Speicher für unsere Ergebnisse
|
||||||
uint32_t *d_hash5output[8];
|
uint32_t *d_hash5output[8];
|
||||||
|
|
||||||
// die Message (116 Bytes) mit Padding zur Berechnung auf der GPU
|
// die Message (112 bzw. 116 Bytes) mit Padding zur Berechnung auf der GPU
|
||||||
__constant__ uint64_t c_PaddedMessage[16]; // padded message (84+32 bytes + padding)
|
__constant__ uint64_t c_PaddedMessage[16]; // padded message (80/84+32 bytes + padding)
|
||||||
|
|
||||||
// ---------------------------- BEGIN CUDA blake512 functions ------------------------------------
|
// ---------------------------- BEGIN CUDA blake512 functions ------------------------------------
|
||||||
|
|
||||||
@ -44,10 +44,12 @@ const uint8_t host_sigma[16][16] =
|
|||||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Diese Makros besser nur für Compile Time Konstanten verwenden. Sie sind langsam.
|
||||||
#define SWAP32(x) \
|
#define SWAP32(x) \
|
||||||
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
|
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
|
||||||
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
||||||
|
|
||||||
|
// Diese Makros besser nur für Compile Time Konstanten verwenden. Sie sind langsam.
|
||||||
#define SWAP64(x) \
|
#define SWAP64(x) \
|
||||||
((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \
|
((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \
|
||||||
(((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \
|
(((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \
|
||||||
@ -58,11 +60,11 @@ const uint8_t host_sigma[16][16] =
|
|||||||
(((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \
|
(((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \
|
||||||
(((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
|
(((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
|
||||||
|
|
||||||
__constant__ uint64_t c_SecondRound[16];
|
__constant__ uint64_t c_SecondRound[15];
|
||||||
|
|
||||||
const uint64_t host_SecondRound[16] =
|
const uint64_t host_SecondRound[15] =
|
||||||
{
|
{
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,SWAP64(1),0,SWAP64(0x3A0)
|
0,0,0,0,0,0,0,0,0,0,0,0,0,SWAP64(1),0
|
||||||
};
|
};
|
||||||
|
|
||||||
__constant__ uint64_t c_u512[16];
|
__constant__ uint64_t c_u512[16];
|
||||||
@ -80,24 +82,22 @@ const uint64_t host_u512[16] =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#define ROTR(x,n) (((x)<<(64-n))|( (x)>>(n)))
|
|
||||||
|
|
||||||
#define G(a,b,c,d,e) \
|
#define G(a,b,c,d,e) \
|
||||||
v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\
|
v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\
|
||||||
v[d] = ROTR( v[d] ^ v[a],32); \
|
v[d] = ROTR64( v[d] ^ v[a],32); \
|
||||||
v[c] += v[d]; \
|
v[c] += v[d]; \
|
||||||
v[b] = ROTR( v[b] ^ v[c],25); \
|
v[b] = ROTR64( v[b] ^ v[c],25); \
|
||||||
v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \
|
v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \
|
||||||
v[d] = ROTR( v[d] ^ v[a],16); \
|
v[d] = ROTR64( v[d] ^ v[a],16); \
|
||||||
v[c] += v[d]; \
|
v[c] += v[d]; \
|
||||||
v[b] = ROTR( v[b] ^ v[c],11);
|
v[b] = ROTR64( v[b] ^ v[c],11);
|
||||||
|
|
||||||
__device__ void blake512_compress( uint64_t *h, const uint64_t *block, int nullt, const uint8_t ((*sigma)[16]), const uint64_t *u512 )
|
template <int BLOCKSIZE> __device__ void blake512_compress( uint64_t *h, const uint64_t *block, int nullt, const uint8_t ((*sigma)[16]), const uint64_t *u512 )
|
||||||
{
|
{
|
||||||
uint64_t v[16], m[16], i;
|
uint64_t v[16], m[16], i;
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for( i = 0; i < 16; ++i ) m[i] = SWAP64(block[i]);
|
for( i = 0; i < 16; ++i ) m[i] = cuda_swab64(block[i]);
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for( i = 0; i < 8; ++i ) v[i] = h[i];
|
for( i = 0; i < 8; ++i ) v[i] = h[i];
|
||||||
@ -113,11 +113,11 @@ __device__ void blake512_compress( uint64_t *h, const uint64_t *block, int nullt
|
|||||||
|
|
||||||
/* don't xor t when the block is only padding */
|
/* don't xor t when the block is only padding */
|
||||||
if ( !nullt ) {
|
if ( !nullt ) {
|
||||||
v[12] ^= 928;
|
v[12] ^= 8*(BLOCKSIZE+32);
|
||||||
v[13] ^= 928;
|
v[13] ^= 8*(BLOCKSIZE+32);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll 16
|
//#pragma unroll 16
|
||||||
for( i = 0; i < 16; ++i )
|
for( i = 0; i < 16; ++i )
|
||||||
{
|
{
|
||||||
/* column step */
|
/* column step */
|
||||||
@ -136,49 +136,9 @@ __device__ void blake512_compress( uint64_t *h, const uint64_t *block, int nullt
|
|||||||
for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i];
|
for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Endian Drehung für 32 Bit Typen
|
#include "cuda_helper.h"
|
||||||
static __device__ uint32_t cuda_swab32(uint32_t x)
|
|
||||||
{
|
|
||||||
return (((x << 24) & 0xff000000u) | ((x << 8) & 0x00ff0000u)
|
|
||||||
| ((x >> 8) & 0x0000ff00u) | ((x >> 24) & 0x000000ffu));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Endian Drehung für 64 Bit Typen
|
template <int BLOCKSIZE> __global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
||||||
static __device__ uint64_t cuda_swab64(uint64_t x) {
|
|
||||||
uint32_t h = (x >> 32);
|
|
||||||
uint32_t l = (x & 0xFFFFFFFFULL);
|
|
||||||
return (((uint64_t)cuda_swab32(l)) << 32) | ((uint64_t)cuda_swab32(h));
|
|
||||||
}
|
|
||||||
|
|
||||||
// das Hi Word aus einem 64 Bit Typen extrahieren
|
|
||||||
static __device__ uint32_t HIWORD(const uint64_t &x) {
|
|
||||||
#if __CUDA_ARCH__ >= 130
|
|
||||||
return (uint32_t)__double2hiint(__longlong_as_double(x));
|
|
||||||
#else
|
|
||||||
return (uint32_t)(x >> 32);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// das Hi Word in einem 64 Bit Typen ersetzen
|
|
||||||
static __device__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) {
|
|
||||||
return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32ULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// das Lo Word aus einem 64 Bit Typen extrahieren
|
|
||||||
static __device__ uint32_t LOWORD(const uint64_t &x) {
|
|
||||||
#if __CUDA_ARCH__ >= 130
|
|
||||||
return (uint32_t)__double2loint(__longlong_as_double(x));
|
|
||||||
#else
|
|
||||||
return (uint32_t)(x & 0xFFFFFFFFULL);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// das Lo Word in einem 64 Bit Typen ersetzen
|
|
||||||
static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) {
|
|
||||||
return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
|
||||||
{
|
{
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -211,8 +171,10 @@ __global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outpu
|
|||||||
// die Nounce durch die thread-spezifische ersetzen
|
// die Nounce durch die thread-spezifische ersetzen
|
||||||
buf[9] = REPLACE_HIWORD(buf[9], nounce);
|
buf[9] = REPLACE_HIWORD(buf[9], nounce);
|
||||||
|
|
||||||
// den thread-spezifischen Hefty1 hash einsetzen
|
|
||||||
uint32_t *hefty = heftyHashes + 8 * hashPosition;
|
uint32_t *hefty = heftyHashes + 8 * hashPosition;
|
||||||
|
if (BLOCKSIZE == 84) {
|
||||||
|
// den thread-spezifischen Hefty1 hash einsetzen
|
||||||
|
// aufwändig, weil das nicht mit uint64_t Wörtern aligned ist.
|
||||||
buf[10] = REPLACE_HIWORD(buf[10], hefty[0]);
|
buf[10] = REPLACE_HIWORD(buf[10], hefty[0]);
|
||||||
buf[11] = REPLACE_LOWORD(buf[11], hefty[1]);
|
buf[11] = REPLACE_LOWORD(buf[11], hefty[1]);
|
||||||
buf[11] = REPLACE_HIWORD(buf[11], hefty[2]);
|
buf[11] = REPLACE_HIWORD(buf[11], hefty[2]);
|
||||||
@ -221,30 +183,28 @@ __global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outpu
|
|||||||
buf[13] = REPLACE_LOWORD(buf[13], hefty[5]);
|
buf[13] = REPLACE_LOWORD(buf[13], hefty[5]);
|
||||||
buf[13] = REPLACE_HIWORD(buf[13], hefty[6]);
|
buf[13] = REPLACE_HIWORD(buf[13], hefty[6]);
|
||||||
buf[14] = REPLACE_LOWORD(buf[14], hefty[7]);
|
buf[14] = REPLACE_LOWORD(buf[14], hefty[7]);
|
||||||
|
}
|
||||||
|
else if (BLOCKSIZE == 80) {
|
||||||
|
buf[10] = MAKE_ULONGLONG(hefty[0], hefty[1]);
|
||||||
|
buf[11] = MAKE_ULONGLONG(hefty[2], hefty[3]);
|
||||||
|
buf[12] = MAKE_ULONGLONG(hefty[4], hefty[5]);
|
||||||
|
buf[13] = MAKE_ULONGLONG(hefty[6], hefty[7]);
|
||||||
|
}
|
||||||
|
|
||||||
// erste Runde
|
// erste Runde
|
||||||
blake512_compress( h, buf, 0, c_sigma, c_u512 );
|
blake512_compress<BLOCKSIZE>( h, buf, 0, c_sigma, c_u512 );
|
||||||
|
|
||||||
|
|
||||||
// zweite Runde
|
// zweite Runde
|
||||||
#pragma unroll 16
|
#pragma unroll 15
|
||||||
for (int i=0; i < 16; ++i) buf[i] = c_SecondRound[i];
|
for (int i=0; i < 15; ++i) buf[i] = c_SecondRound[i];
|
||||||
blake512_compress( h, buf, 1, c_sigma, c_u512 );
|
buf[15] = SWAP64(8*(BLOCKSIZE+32)); // Blocksize in Bits einsetzen
|
||||||
|
blake512_compress<BLOCKSIZE>( h, buf, 1, c_sigma, c_u512 );
|
||||||
|
|
||||||
// Hash rauslassen
|
// Hash rauslassen
|
||||||
#if 0
|
|
||||||
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
|
|
||||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * hashPosition;
|
|
||||||
#pragma unroll 8
|
|
||||||
for (int i=0; i < 8; ++i) {
|
|
||||||
outHash[2*i+0] = cuda_swab32( HIWORD(h[i]) );
|
|
||||||
outHash[2*i+1] = cuda_swab32( LOWORD(h[i]) );
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// in dieser Version passieren auch ein paar 64 Bit Shifts
|
|
||||||
uint64_t *outHash = (uint64_t *)outputHash + 8 * hashPosition;
|
uint64_t *outHash = (uint64_t *)outputHash + 8 * hashPosition;
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (int i=0; i < 8; ++i) outHash[i] = cuda_swab64( h[i] );
|
for (int i=0; i < 8; ++i) outHash[i] = cuda_swab64( h[i] );
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,22 +234,30 @@ __host__ void blake512_cpu_init(int thr_id, int threads)
|
|||||||
cudaMalloc(&d_hash5output[thr_id], 16 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash5output[thr_id], 16 * sizeof(uint32_t) * threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void blake512_cpu_setBlock(void *pdata)
|
static int BLOCKSIZE = 84;
|
||||||
|
|
||||||
|
__host__ void blake512_cpu_setBlock(void *pdata, int len)
|
||||||
// data muss 84-Byte haben!
|
// data muss 84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
// Message mit Padding für erste Runde bereitstellen
|
|
||||||
unsigned char PaddedMessage[128];
|
unsigned char PaddedMessage[128];
|
||||||
|
if (len == 84) {
|
||||||
|
// Message mit Padding für erste Runde bereitstellen
|
||||||
memcpy(PaddedMessage, pdata, 84);
|
memcpy(PaddedMessage, pdata, 84);
|
||||||
memset(PaddedMessage+84, 0, 32); // leeres Hefty Hash einfüllen
|
memset(PaddedMessage+84, 0, 32); // leeres Hefty Hash einfüllen
|
||||||
memset(PaddedMessage+116, 0, 12);
|
memset(PaddedMessage+116, 0, 12);
|
||||||
PaddedMessage[116] = 0x80;
|
PaddedMessage[116] = 0x80;
|
||||||
|
} else if (len == 80) {
|
||||||
|
memcpy(PaddedMessage, pdata, 80);
|
||||||
|
memset(PaddedMessage+80, 0, 32); // leeres Hefty Hash einfüllen
|
||||||
|
memset(PaddedMessage+112, 0, 16);
|
||||||
|
PaddedMessage[112] = 0x80;
|
||||||
|
}
|
||||||
// die Message (116 Bytes) ohne Padding zur Berechnung auf der GPU
|
// die Message (116 Bytes) ohne Padding zur Berechnung auf der GPU
|
||||||
cudaMemcpyToSymbol( c_PaddedMessage, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
cudaMemcpyToSymbol( c_PaddedMessage, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||||
|
BLOCKSIZE = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__host__ void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
|
__host__ void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
|
||||||
{
|
{
|
||||||
const int threadsperblock = 256;
|
const int threadsperblock = 256;
|
||||||
@ -303,5 +271,8 @@ __host__ void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
|
|||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
|
||||||
blake512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hash5output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
if (BLOCKSIZE == 80)
|
||||||
|
blake512_gpu_hash<80><<<grid, block, shared_size>>>(threads, startNounce, d_hash5output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
|
else if (BLOCKSIZE == 84)
|
||||||
|
blake512_gpu_hash<84><<<grid, block, shared_size>>>(threads, startNounce, d_hash5output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
}
|
}
|
@ -2,7 +2,6 @@
|
|||||||
#define _CUDA_BLAKE512_H
|
#define _CUDA_BLAKE512_H
|
||||||
|
|
||||||
void blake512_cpu_init(int thr_id, int threads);
|
void blake512_cpu_init(int thr_id, int threads);
|
||||||
void blake512_cpu_setBlock(void *pdata);
|
void blake512_cpu_setBlock(void *pdata, int len);
|
||||||
void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -676,7 +676,7 @@ __device__ void groestl512_perm_Q(uint32_t *a)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
template <int BLOCKSIZE> __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
||||||
{
|
{
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -706,7 +706,7 @@ __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *out
|
|||||||
uint32_t *heftyHash = &heftyHashes[8 * hashPosition];
|
uint32_t *heftyHash = &heftyHashes[8 * hashPosition];
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (int k=0; k<8; ++k)
|
for (int k=0; k<8; ++k)
|
||||||
message[21+k] = heftyHash[k];
|
message[BLOCKSIZE/4+k] = heftyHash[k];
|
||||||
|
|
||||||
uint32_t g[32];
|
uint32_t g[32];
|
||||||
#pragma unroll 32
|
#pragma unroll 32
|
||||||
@ -764,21 +764,27 @@ __host__ void groestl512_cpu_init(int thr_id, int threads)
|
|||||||
cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void groestl512_cpu_setBlock(void *data)
|
static int BLOCKSIZE = 84;
|
||||||
// data muss 84-Byte haben!
|
|
||||||
|
__host__ void groestl512_cpu_setBlock(void *data, int len)
|
||||||
|
// data muss 80/84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
// Nachricht expandieren und setzen
|
// Nachricht expandieren und setzen
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32];
|
||||||
|
|
||||||
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
||||||
memcpy(&msgBlock[0], data, 84);
|
memcpy(&msgBlock[0], data, len);
|
||||||
|
|
||||||
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
|
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
|
||||||
// Unsere Nachricht hat 116 Byte
|
// Unsere Nachricht hat 112/116 Byte
|
||||||
|
if (len == 84) {
|
||||||
msgBlock[29] = 0x80;
|
msgBlock[29] = 0x80;
|
||||||
msgBlock[31] = 0x01000000;
|
msgBlock[31] = 0x01000000;
|
||||||
|
} else if (len == 80) {
|
||||||
|
msgBlock[28] = 0x80;
|
||||||
|
msgBlock[31] = 0x01000000;
|
||||||
|
}
|
||||||
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
||||||
// auf der GPU ausgeführt)
|
// auf der GPU ausgeführt)
|
||||||
|
|
||||||
@ -796,6 +802,8 @@ __host__ void groestl512_cpu_setBlock(void *data)
|
|||||||
cudaMemcpyToSymbol( groestl_gpu_msg,
|
cudaMemcpyToSymbol( groestl_gpu_msg,
|
||||||
msgBlock,
|
msgBlock,
|
||||||
128);
|
128);
|
||||||
|
|
||||||
|
BLOCKSIZE = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
__host__ void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
||||||
@ -818,5 +826,8 @@ __host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
|
|||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
|
||||||
groestl512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hash4output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
if (BLOCKSIZE == 84)
|
||||||
|
groestl512_gpu_hash<84><<<grid, block, shared_size>>>(threads, startNounce, d_hash4output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
|
else if (BLOCKSIZE == 80)
|
||||||
|
groestl512_gpu_hash<80><<<grid, block, shared_size>>>(threads, startNounce, d_hash4output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
}
|
}
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
void groestl512_cpu_init(int thr_id, int threads);
|
void groestl512_cpu_init(int thr_id, int threads);
|
||||||
void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
||||||
void groestl512_cpu_setBlock(void *data);
|
void groestl512_cpu_setBlock(void *data, int len);
|
||||||
void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -2,21 +2,24 @@
|
|||||||
#include "cuda_runtime.h"
|
#include "cuda_runtime.h"
|
||||||
#include "device_launch_parameters.h"
|
#include "device_launch_parameters.h"
|
||||||
|
|
||||||
// aus cpu-miner.c
|
|
||||||
extern int device_map[8];
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
|
|
||||||
#define USE_SHARED 1
|
#define USE_SHARED 1
|
||||||
|
|
||||||
|
// aus cpu-miner.c
|
||||||
|
extern int device_map[8];
|
||||||
|
|
||||||
|
// aus heavy.cu
|
||||||
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
|
|
||||||
// Folgende Definitionen später durch header ersetzen
|
// Folgende Definitionen später durch header ersetzen
|
||||||
typedef unsigned int uint32_t;
|
typedef unsigned int uint32_t;
|
||||||
typedef unsigned char uint8_t;
|
typedef unsigned char uint8_t;
|
||||||
typedef unsigned short uint16_t;
|
typedef unsigned short uint16_t;
|
||||||
|
|
||||||
// diese Struktur wird in der Init Funktion angefordert
|
// diese Struktur wird in der Init Funktion angefordert
|
||||||
static cudaDeviceProp props;
|
static cudaDeviceProp props[8];
|
||||||
|
|
||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
uint32_t *d_heftyHashes[8];
|
uint32_t *d_heftyHashes[8];
|
||||||
@ -286,7 +289,7 @@ __global__ void hefty_gpu_hash(int threads, uint32_t startNounce, void *outputHa
|
|||||||
for(int j=0;j<16;j++)
|
for(int j=0;j<16;j++)
|
||||||
{
|
{
|
||||||
Absorb(sponge, regs[3] + regs[7]);
|
Absorb(sponge, regs[3] + regs[7]);
|
||||||
hefty_gpu_round(regs, W2[j], heftyLookUp(j + 16 * (k+1)), sponge);
|
hefty_gpu_round(regs, W2[j], heftyLookUp(j + ((k+1)<<4)), sponge);
|
||||||
}
|
}
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int j=0;j<16;j++)
|
for(int j=0;j<16;j++)
|
||||||
@ -299,7 +302,7 @@ __global__ void hefty_gpu_hash(int threads, uint32_t startNounce, void *outputHa
|
|||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for(int k=0;k<8;k++)
|
for(int k=0;k<8;k++)
|
||||||
((uint32_t*)outputHash)[8*thread+k] = SWAB32(hash[k]);
|
((uint32_t*)outputHash)[(thread<<3)+k] = SWAB32(hash[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +311,7 @@ __host__ void hefty_cpu_init(int thr_id, int threads)
|
|||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaGetDeviceProperties(&props, device_map[thr_id]);
|
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
||||||
|
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
cudaMemcpyToSymbol( hefty_gpu_constantTable,
|
cudaMemcpyToSymbol( hefty_gpu_constantTable,
|
||||||
@ -319,16 +322,21 @@ __host__ void hefty_cpu_init(int thr_id, int threads)
|
|||||||
cudaMalloc(&d_heftyHashes[thr_id], 8 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_heftyHashes[thr_id], 8 * sizeof(uint32_t) * threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void hefty_cpu_setBlock(int thr_id, int threads, void *data)
|
__host__ void hefty_cpu_setBlock(int thr_id, int threads, void *data, int len)
|
||||||
// data muss 84-Byte haben!
|
// data muss 80/84-Byte haben!
|
||||||
{
|
{
|
||||||
// Nachricht expandieren und setzen
|
// Nachricht expandieren und setzen
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32];
|
||||||
|
|
||||||
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
||||||
memcpy(&msgBlock[0], data, 84);
|
memcpy(&msgBlock[0], data, len);
|
||||||
|
if (len == 84) {
|
||||||
msgBlock[21] |= 0x80;
|
msgBlock[21] |= 0x80;
|
||||||
msgBlock[31] = 672; // bitlen
|
msgBlock[31] = 672; // bitlen
|
||||||
|
} else if (len == 80) {
|
||||||
|
msgBlock[20] |= 0x80;
|
||||||
|
msgBlock[31] = 640; // bitlen
|
||||||
|
}
|
||||||
|
|
||||||
for(int i=0;i<31;i++) // Byteorder drehen
|
for(int i=0;i<31;i++) // Byteorder drehen
|
||||||
msgBlock[i] = SWAB32(msgBlock[i]);
|
msgBlock[i] = SWAB32(msgBlock[i]);
|
||||||
@ -395,7 +403,7 @@ __host__ void hefty_cpu_hash(int thr_id, int threads, int startNounce)
|
|||||||
{
|
{
|
||||||
// Compute 3.x und 5.x Geräte am besten mit 768 Threads ansteuern,
|
// Compute 3.x und 5.x Geräte am besten mit 768 Threads ansteuern,
|
||||||
// alle anderen mit 512 Threads.
|
// alle anderen mit 512 Threads.
|
||||||
int threadsperblock = (props.major >= 3) ? 768 : 512;
|
int threadsperblock = (props[thr_id].major >= 3) ? 768 : 512;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
@ -411,4 +419,7 @@ __host__ void hefty_cpu_hash(int thr_id, int threads, int startNounce)
|
|||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
|
||||||
hefty_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (void*)d_heftyHashes[thr_id]);
|
hefty_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (void*)d_heftyHashes[thr_id]);
|
||||||
|
|
||||||
|
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
||||||
|
MyStreamSynchronize(NULL, 0, thr_id);
|
||||||
}
|
}
|
@ -2,7 +2,7 @@
|
|||||||
#define _CUDA_HEFTY1_H
|
#define _CUDA_HEFTY1_H
|
||||||
|
|
||||||
void hefty_cpu_hash(int thr_id, int threads, int startNounce);
|
void hefty_cpu_hash(int thr_id, int threads, int startNounce);
|
||||||
void hefty_cpu_setBlock(int thr_id, int threads, void *data);
|
void hefty_cpu_setBlock(int thr_id, int threads, void *data, int len);
|
||||||
void hefty_cpu_init(int thr_id, int threads);
|
void hefty_cpu_init(int thr_id, int threads);
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -16,6 +16,8 @@ extern uint32_t *d_nonceVector[8];
|
|||||||
|
|
||||||
// globaler Speicher für unsere Ergebnisse
|
// globaler Speicher für unsere Ergebnisse
|
||||||
uint32_t *d_hash3output[8];
|
uint32_t *d_hash3output[8];
|
||||||
|
extern uint32_t *d_hash4output[8];
|
||||||
|
extern uint32_t *d_hash5output[8];
|
||||||
|
|
||||||
// der Keccak512 State nach der ersten Runde (72 Bytes)
|
// der Keccak512 State nach der ersten Runde (72 Bytes)
|
||||||
__constant__ uint64_t c_State[25];
|
__constant__ uint64_t c_State[25];
|
||||||
@ -25,7 +27,7 @@ __constant__ uint32_t c_PaddedMessage2[18]; // 44 bytes of remaining message (No
|
|||||||
|
|
||||||
// ---------------------------- BEGIN CUDA keccak512 functions ------------------------------------
|
// ---------------------------- BEGIN CUDA keccak512 functions ------------------------------------
|
||||||
|
|
||||||
#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
#define U32TO64_LE(p) \
|
#define U32TO64_LE(p) \
|
||||||
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))
|
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))
|
||||||
@ -145,7 +147,7 @@ keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Die Hash-Funktion
|
// Die Hash-Funktion
|
||||||
__global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
template <int BLOCKSIZE> __global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
||||||
{
|
{
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -171,7 +173,7 @@ __global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outp
|
|||||||
msgBlock[1] = nounce;
|
msgBlock[1] = nounce;
|
||||||
|
|
||||||
// den individuellen Hefty1 Hash einsetzen
|
// den individuellen Hefty1 Hash einsetzen
|
||||||
mycpy32(&msgBlock[3], &heftyHashes[8 * hashPosition]);
|
mycpy32(&msgBlock[(BLOCKSIZE-72)/sizeof(uint32_t)], &heftyHashes[8 * hashPosition]);
|
||||||
|
|
||||||
// den Block einmal gut durchschütteln
|
// den Block einmal gut durchschütteln
|
||||||
keccak_block(keccak_gpu_state, msgBlock, c_keccak_round_constants);
|
keccak_block(keccak_gpu_state, msgBlock, c_keccak_round_constants);
|
||||||
@ -184,7 +186,6 @@ __global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outp
|
|||||||
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
|
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// und ins Global Memory rausschreiben
|
// und ins Global Memory rausschreiben
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int k=0;k<16;k++)
|
for(int k=0;k<16;k++)
|
||||||
@ -217,38 +218,49 @@ __host__ void keccak512_cpu_init(int thr_id, int threads)
|
|||||||
|
|
||||||
// --------------- END keccak512 CPU version from scrypt-jane code --------------------
|
// --------------- END keccak512 CPU version from scrypt-jane code --------------------
|
||||||
|
|
||||||
__host__ void keccak512_cpu_setBlock(void *data)
|
static int BLOCKSIZE = 84;
|
||||||
// data muss 84-Byte haben!
|
|
||||||
|
__host__ void keccak512_cpu_setBlock(void *data, int len)
|
||||||
|
// data muss 80 oder 84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
// CH
|
// CH
|
||||||
// state init
|
// state init
|
||||||
uint64_t keccak_cpu_state[25];
|
uint64_t keccak_cpu_state[25];
|
||||||
memset(keccak_cpu_state, 0, 200);
|
memset(keccak_cpu_state, 0, sizeof(keccak_cpu_state));
|
||||||
|
|
||||||
|
// erste Runde
|
||||||
|
keccak_block((uint64_t*)&keccak_cpu_state, (const uint32_t*)data, host_keccak_round_constants);
|
||||||
|
|
||||||
|
// state kopieren
|
||||||
|
cudaMemcpyToSymbol( c_State, keccak_cpu_state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
// keccak hat 72-Byte blöcke, d.h. in unserem Fall zwei Blöcke
|
// keccak hat 72-Byte blöcke, d.h. in unserem Fall zwei Blöcke
|
||||||
// zu jeweils
|
// zu jeweils
|
||||||
uint32_t msgBlock[18];
|
uint32_t msgBlock[18];
|
||||||
memset(msgBlock, 0, 18 * sizeof(uint32_t));
|
memset(msgBlock, 0, 18 * sizeof(uint32_t));
|
||||||
|
|
||||||
// kopiere die Daten rein (aber nur alles nach Bit 72)
|
// kopiere die restlichen Daten rein (aber nur alles nach Byte 72)
|
||||||
|
if (len == 84)
|
||||||
memcpy(&msgBlock[0], &((uint8_t*)data)[72], 12);
|
memcpy(&msgBlock[0], &((uint8_t*)data)[72], 12);
|
||||||
|
else if (len == 80)
|
||||||
|
memcpy(&msgBlock[0], &((uint8_t*)data)[72], 8);
|
||||||
|
|
||||||
// Nachricht abschließen
|
// Nachricht abschließen
|
||||||
|
if (len == 84)
|
||||||
msgBlock[11] = 0x01;
|
msgBlock[11] = 0x01;
|
||||||
|
else if (len == 80)
|
||||||
|
msgBlock[10] = 0x01;
|
||||||
msgBlock[17] = 0x80000000;
|
msgBlock[17] = 0x80000000;
|
||||||
|
|
||||||
// erste Runde
|
|
||||||
keccak_block((uint64_t*)&keccak_cpu_state, (const uint32_t*)data, host_keccak_round_constants);
|
|
||||||
|
|
||||||
// Message 2 ins Constant Memory kopieren (die variable Nonce und
|
// Message 2 ins Constant Memory kopieren (die variable Nonce und
|
||||||
// der Hefty1 Anteil muss aber auf der GPU erst noch ersetzt werden)
|
// der Hefty1 Anteil muss aber auf der GPU erst noch ersetzt werden)
|
||||||
cudaMemcpyToSymbol( c_PaddedMessage2, msgBlock, 18*sizeof(uint32_t), 0, cudaMemcpyHostToDevice );
|
cudaMemcpyToSymbol( c_PaddedMessage2, msgBlock, 18*sizeof(uint32_t), 0, cudaMemcpyHostToDevice );
|
||||||
|
|
||||||
// state kopieren
|
BLOCKSIZE = len;
|
||||||
cudaMemcpyToSymbol( c_State, keccak_cpu_state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__host__ void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
__host__ void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
||||||
{
|
{
|
||||||
// Hefty1 Hashes kopieren
|
// Hefty1 Hashes kopieren
|
||||||
@ -268,6 +280,8 @@ __host__ void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
|
|||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
if (BLOCKSIZE==84)
|
||||||
keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hash3output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
keccak512_gpu_hash<84><<<grid, block, shared_size>>>(threads, startNounce, d_hash3output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
|
else if (BLOCKSIZE==80)
|
||||||
|
keccak512_gpu_hash<80><<<grid, block, shared_size>>>(threads, startNounce, d_hash3output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
}
|
}
|
@ -2,7 +2,7 @@
|
|||||||
#define _CUDA_KECCAK512_H
|
#define _CUDA_KECCAK512_H
|
||||||
|
|
||||||
void keccak512_cpu_init(int thr_id, int threads);
|
void keccak512_cpu_init(int thr_id, int threads);
|
||||||
void keccak512_cpu_setBlock(void *data);
|
void keccak512_cpu_setBlock(void *data, int len);
|
||||||
void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
||||||
void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce);
|
||||||
|
|
@ -47,7 +47,7 @@ uint32_t sha256_cpu_constantTable[] = {
|
|||||||
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
|
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
|
||||||
|
|
||||||
// Die Hash-Funktion
|
// Die Hash-Funktion
|
||||||
__global__ void sha256_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
template <int BLOCKSIZE> __global__ void sha256_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector)
|
||||||
{
|
{
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -82,11 +82,10 @@ __global__ void sha256_gpu_hash(int threads, uint32_t startNounce, void *outputH
|
|||||||
uint32_t offset = 8 * (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t offset = 8 * (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for(int k=0;k<8;k++)
|
for(int k=0;k<8;k++)
|
||||||
W1[5+k] = heftyHashes[offset + k];
|
W1[((BLOCKSIZE-64)/4)+k] = heftyHashes[offset + k];
|
||||||
|
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (int i=5; i <5+8; ++i) W1[i] = SWAB32(W1[i]); // die Hefty1 Hashes brauchen eine Drehung ;)
|
for (int i=((BLOCKSIZE-64)/4); i < ((BLOCKSIZE-64)/4)+8; ++i) W1[i] = SWAB32(W1[i]); // die Hefty1 Hashes brauchen eine Drehung ;)
|
||||||
W1[3] = SWAB32(nounce);
|
W1[3] = SWAB32(nounce);
|
||||||
|
|
||||||
// Progress W1
|
// Progress W1
|
||||||
@ -178,18 +177,26 @@ __host__ void sha256_cpu_init(int thr_id, int threads)
|
|||||||
cudaMalloc(&d_hash2output[thr_id], 8 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash2output[thr_id], 8 * sizeof(uint32_t) * threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void sha256_cpu_setBlock(void *data)
|
static int BLOCKSIZE = 84;
|
||||||
// data muss 84-Byte haben!
|
|
||||||
|
__host__ void sha256_cpu_setBlock(void *data, int len)
|
||||||
|
// data muss 80/84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
// Nachricht expandieren und setzen
|
// Nachricht expandieren und setzen
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32];
|
||||||
|
|
||||||
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
||||||
memcpy(&msgBlock[0], data, 84);
|
memcpy(&msgBlock[0], data, len);
|
||||||
|
if (len == 84) {
|
||||||
memset(&msgBlock[21], 0, 32); // vorläufig Nullen anstatt der Hefty1 Hashes einfüllen
|
memset(&msgBlock[21], 0, 32); // vorläufig Nullen anstatt der Hefty1 Hashes einfüllen
|
||||||
msgBlock[29] |= 0x80;
|
msgBlock[29] |= 0x80;
|
||||||
msgBlock[31] = 928; // bitlen
|
msgBlock[31] = 928; // bitlen
|
||||||
|
} else if (len == 80) {
|
||||||
|
memset(&msgBlock[20], 0, 32); // vorläufig Nullen anstatt der Hefty1 Hashes einfüllen
|
||||||
|
msgBlock[28] |= 0x80;
|
||||||
|
msgBlock[31] = 896; // bitlen
|
||||||
|
}
|
||||||
|
|
||||||
for(int i=0;i<31;i++) // Byteorder drehen
|
for(int i=0;i<31;i++) // Byteorder drehen
|
||||||
msgBlock[i] = SWAB32(msgBlock[i]);
|
msgBlock[i] = SWAB32(msgBlock[i]);
|
||||||
@ -242,6 +249,8 @@ __host__ void sha256_cpu_setBlock(void *data)
|
|||||||
cudaMemcpyToSymbol( sha256_gpu_blockHeader,
|
cudaMemcpyToSymbol( sha256_gpu_blockHeader,
|
||||||
&msgBlock[16],
|
&msgBlock[16],
|
||||||
64);
|
64);
|
||||||
|
|
||||||
|
BLOCKSIZE = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
__host__ void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy)
|
||||||
@ -263,6 +272,9 @@ __host__ void sha256_cpu_hash(int thr_id, int threads, int startNounce)
|
|||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
if (BLOCKSIZE == 84)
|
||||||
sha256_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hash2output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
sha256_gpu_hash<84><<<grid, block, shared_size>>>(threads, startNounce, d_hash2output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
|
else if (BLOCKSIZE == 80) {
|
||||||
|
sha256_gpu_hash<80><<<grid, block, shared_size>>>(threads, startNounce, d_hash2output[thr_id], d_heftyHashes[thr_id], d_nonceVector[thr_id]);
|
||||||
|
}
|
||||||
}
|
}
|
@ -2,7 +2,7 @@
|
|||||||
#define _CUDA_SHA256_H
|
#define _CUDA_SHA256_H
|
||||||
|
|
||||||
void sha256_cpu_init(int thr_id, int threads);
|
void sha256_cpu_init(int thr_id, int threads);
|
||||||
void sha256_cpu_setBlock(void *data);
|
void sha256_cpu_setBlock(void *data, int len);
|
||||||
void sha256_cpu_hash(int thr_id, int threads, int startNounce);
|
void sha256_cpu_hash(int thr_id, int threads, int startNounce);
|
||||||
void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy);
|
||||||
#endif
|
#endif
|
@ -22,12 +22,12 @@
|
|||||||
#include "sph/sph_blake.h"
|
#include "sph/sph_blake.h"
|
||||||
#include "sph/sph_groestl.h"
|
#include "sph/sph_groestl.h"
|
||||||
|
|
||||||
#include "cuda_hefty1.h"
|
#include "heavy/cuda_hefty1.h"
|
||||||
#include "cuda_sha256.h"
|
#include "heavy/cuda_sha256.h"
|
||||||
#include "cuda_keccak512.h"
|
#include "heavy/cuda_keccak512.h"
|
||||||
#include "cuda_groestl512.h"
|
#include "heavy/cuda_groestl512.h"
|
||||||
#include "cuda_blake512.h"
|
#include "heavy/cuda_blake512.h"
|
||||||
#include "cuda_combine.h"
|
#include "heavy/cuda_combine.h"
|
||||||
|
|
||||||
extern uint32_t *d_hash2output[8];
|
extern uint32_t *d_hash2output[8];
|
||||||
extern uint32_t *d_hash3output[8];
|
extern uint32_t *d_hash3output[8];
|
||||||
@ -35,6 +35,7 @@ extern uint32_t *d_hash4output[8];
|
|||||||
extern uint32_t *d_hash5output[8];
|
extern uint32_t *d_hash5output[8];
|
||||||
|
|
||||||
#define HEAVYCOIN_BLKHDR_SZ 84
|
#define HEAVYCOIN_BLKHDR_SZ 84
|
||||||
|
#define MNR_BLKHDR_SZ 80
|
||||||
|
|
||||||
// nonce-array für die threads
|
// nonce-array für die threads
|
||||||
uint32_t *d_nonceVector[8];
|
uint32_t *d_nonceVector[8];
|
||||||
@ -230,24 +231,29 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
|
|||||||
|
|
||||||
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done, uint32_t maxvote);
|
unsigned long *hashes_done, uint32_t maxvote, int blocklen);
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
int scanhash_heavy(int thr_id, uint32_t *pdata,
|
int scanhash_heavy(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done, uint32_t maxvote)
|
unsigned long *hashes_done, uint32_t maxvote, int blocklen)
|
||||||
{
|
{
|
||||||
return scanhash_heavy_cpp(thr_id, pdata,
|
return scanhash_heavy_cpp(thr_id, pdata,
|
||||||
ptarget, max_nonce, hashes_done, maxvote);
|
ptarget, max_nonce, hashes_done, maxvote, blocklen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern bool opt_benchmark;
|
||||||
|
|
||||||
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done, uint32_t maxvote)
|
unsigned long *hashes_done, uint32_t maxvote, int blocklen)
|
||||||
{
|
{
|
||||||
// CUDA will process thousands of threads.
|
// CUDA will process thousands of threads.
|
||||||
const int throughput = 4096 * 128;
|
const int throughput = 4096 * 128;
|
||||||
|
|
||||||
|
if (opt_benchmark)
|
||||||
|
((uint32_t*)ptarget)[7] = 0x000000ff;
|
||||||
|
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
uint32_t *hash = NULL;
|
uint32_t *hash = NULL;
|
||||||
cudaMallocHost(&hash, throughput*8*sizeof(uint32_t));
|
cudaMallocHost(&hash, throughput*8*sizeof(uint32_t));
|
||||||
@ -258,7 +264,6 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
|||||||
memset(nrmCalls, 0, sizeof(int) * 6);
|
memset(nrmCalls, 0, sizeof(int) * 6);
|
||||||
|
|
||||||
uint32_t start_nonce = pdata[19];
|
uint32_t start_nonce = pdata[19];
|
||||||
uint16_t *ext = (uint16_t *)&pdata[20];
|
|
||||||
|
|
||||||
// für jeden Hash ein individuelles Target erstellen basierend
|
// für jeden Hash ein individuelles Target erstellen basierend
|
||||||
// auf dem höchsten Bit, das in ptarget gesetzt ist.
|
// auf dem höchsten Bit, das in ptarget gesetzt ist.
|
||||||
@ -282,6 +287,9 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
|||||||
cudaMalloc(&d_nonceVector[thr_id], sizeof(uint32_t) * throughput);
|
cudaMalloc(&d_nonceVector[thr_id], sizeof(uint32_t) * throughput);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (blocklen == HEAVYCOIN_BLKHDR_SZ)
|
||||||
|
{
|
||||||
|
uint16_t *ext = (uint16_t *)&pdata[20];
|
||||||
|
|
||||||
if (opt_vote > maxvote) {
|
if (opt_vote > maxvote) {
|
||||||
printf("Warning: Your block reward vote (%hu) exceeds "
|
printf("Warning: Your block reward vote (%hu) exceeds "
|
||||||
@ -295,13 +303,14 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
ext[0] = opt_vote;
|
ext[0] = opt_vote;
|
||||||
|
}
|
||||||
|
|
||||||
// Setze die Blockdaten
|
// Setze die Blockdaten
|
||||||
hefty_cpu_setBlock(thr_id, throughput, pdata);
|
hefty_cpu_setBlock(thr_id, throughput, pdata, blocklen);
|
||||||
sha256_cpu_setBlock(pdata);
|
sha256_cpu_setBlock(pdata, blocklen);
|
||||||
keccak512_cpu_setBlock(pdata);
|
keccak512_cpu_setBlock(pdata, blocklen);
|
||||||
groestl512_cpu_setBlock(pdata);
|
groestl512_cpu_setBlock(pdata, blocklen);
|
||||||
blake512_cpu_setBlock(pdata);
|
blake512_cpu_setBlock(pdata, blocklen);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int i;
|
int i;
|
||||||
@ -370,7 +379,7 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
|
|||||||
if (fulltest(foundhash, ptarget)) {
|
if (fulltest(foundhash, ptarget)) {
|
||||||
uint32_t verification[8];
|
uint32_t verification[8];
|
||||||
pdata[19] += nonce - pdata[19];
|
pdata[19] += nonce - pdata[19];
|
||||||
heavycoin_hash((unsigned char *)verification, (const unsigned char *)pdata, HEAVYCOIN_BLKHDR_SZ);
|
heavycoin_hash((unsigned char *)verification, (const unsigned char *)pdata, blocklen);
|
||||||
if (memcmp(verification, foundhash, 8*sizeof(uint32_t))) {
|
if (memcmp(verification, foundhash, 8*sizeof(uint32_t))) {
|
||||||
applog(LOG_ERR, "hash for nonce=$%08X does not validate on CPU!\n", nonce);
|
applog(LOG_ERR, "hash for nonce=$%08X does not validate on CPU!\n", nonce);
|
||||||
}
|
}
|
2
miner.h
2
miner.h
@ -205,7 +205,7 @@ extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
extern int scanhash_heavy(int thr_id, uint32_t *pdata,
|
extern int scanhash_heavy(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done, uint32_t maxvote);
|
unsigned long *hashes_done, uint32_t maxvote, int blocklen);
|
||||||
|
|
||||||
extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
|
extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
|
Loading…
Reference in New Issue
Block a user