Browse Source

Merge branch 'qubit'

2upstream 1.4.3-tpruvot
Tanguy Pruvot 10 years ago
parent
commit
80d6e09ca6
  1. 9
      JHA/jackpotcoin.cu
  2. 11
      Makefile.am
  3. 11
      README.txt
  4. 10
      bitslice_transformations_quad.cu
  5. 24
      ccminer.vcxproj
  6. 19
      ccminer.vcxproj.filters
  7. 2
      configure.ac
  8. 39
      cpu-miner.c
  9. 6
      cpuminer-config.h
  10. 54
      cuda_checkhash.cu
  11. 4
      cuda_helper.h
  12. 12
      cuda_nist5.cu
  13. 25
      miner.h
  14. 9
      quark/animecoin.cu
  15. 5
      quark/quarkcoin.cu
  16. 120
      qubit/deep.cu
  17. 93
      qubit/doom.cu
  18. 146
      qubit/qubit.cu
  19. 496
      qubit/qubit_luffa512.cu
  20. 12
      util.c
  21. 26
      x11/fresh.cu
  22. 12
      x11/x11.cu
  23. 16
      x13/x13.cu
  24. 7
      x15/whirlpool.cu
  25. 4
      x15/x14.cu
  26. 4
      x15/x15.cu
  27. 5
      x17/x17.cu

9
JHA/jackpotcoin.cu

@ -5,14 +5,13 @@ extern "C"
#include "sph/sph_groestl.h" #include "sph/sph_groestl.h"
#include "sph/sph_jh.h" #include "sph/sph_jh.h"
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
extern void jackpot_keccak512_cpu_init(int thr_id, int threads); extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
@ -31,10 +30,6 @@ extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
extern void quark_skein512_cpu_init(int thr_id, int threads); extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void jackpot_compactTest_cpu_init(int thr_id, int threads); extern void jackpot_compactTest_cpu_init(int thr_id, int threads);
extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces1, size_t *nrm1,

11
Makefile.am

@ -29,19 +29,22 @@ ccminer_SOURCES = elist.h miner.h compat.h \
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \ groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
myriadgroestl.cpp cuda_myriadgroestl.cu \ myriadgroestl.cpp cuda_myriadgroestl.cu \
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \ JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \ JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \ quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \ quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
quark/cuda_quark_compactionTest.cu \ quark/quarkcoin.cu quark/animecoin.cu \
quark/cuda_quark_compactionTest.cu \
cuda_nist5.cu blake32.cu pentablake.cu \ cuda_nist5.cu blake32.cu pentablake.cu \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \ sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \ sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/doom.cu \
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \ x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \ x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \ x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \ x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
x15/whirlpool.cu \
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu

11
README.txt

@ -1,5 +1,5 @@
ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake" ccMiner release 1.4.3-tpruvot (Sep 11th 2014) - "DJM34 Algos"
--------------------------------------------------------------- ---------------------------------------------------------------
*************************************************************** ***************************************************************
@ -35,6 +35,7 @@ TalkCoin
DarkCoin and other X11 coins DarkCoin and other X11 coins
NEOS blake (256 14-rounds) NEOS blake (256 14-rounds)
BlakeCoin (256 8-rounds) BlakeCoin (256 8-rounds)
Deep, Doom, Goalcoin and Qubit
Pentablake (Blake 512 x5) Pentablake (Blake 512 x5)
where some of these coins have a VERY NOTABLE nVidia advantage where some of these coins have a VERY NOTABLE nVidia advantage
@ -56,12 +57,15 @@ its command line interface and options.
-a, --algo=ALGO specify the algorithm to use -a, --algo=ALGO specify the algorithm to use
heavy use to mine Heavycoin heavy use to mine Heavycoin
mjollnir use to mine Mjollnircoin mjollnir use to mine Mjollnircoin
deep use to mine Deepcoin
doom use to mine Doomcoin
fugue256 use to mine Fuguecoin fugue256 use to mine Fuguecoin
groestl use to mine Groestlcoin groestl use to mine Groestlcoin
dmd-gr use to mine Diamond-Groestl dmd-gr use to mine Diamond-Groestl
myr-gr use to mine Myriad-Groest myr-gr use to mine Myriad-Groest
jackpot use to mine Jackpotcoin jackpot use to mine Jackpotcoin
quark use to mine Quarkcoin quark use to mine Quarkcoin
qubit use to mine Qubit Algo
anime use to mine Animecoin anime use to mine Animecoin
blake use to mine NEOS (Blake 256) blake use to mine NEOS (Blake 256)
blakecoin use to mine Old Blake 256 blakecoin use to mine Old Blake 256
@ -103,7 +107,7 @@ its command line interface and options.
--benchmark run in offline benchmark mode --benchmark run in offline benchmark mode
--cputest debug hashes from cpu algorithms --cputest debug hashes from cpu algorithms
-c, --config=FILE load a JSON-format configuration file -c, --config=FILE load a JSON-format configuration file
-C, --color display colored output in a linux Terminal -K, --nocolor disable colored console output
-V, --version display version information and exit -V, --version display version information and exit
-h, --help display this help text and exit -h, --help display this help text and exit
@ -154,6 +158,9 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Sep 11th 2O14
add algos from djm34 (deep,doom,qubit)
Sep. 1st 2014 add X17, optimized x15 and whirl Sep. 1st 2014 add X17, optimized x15 and whirl
add blake (256 variant) add blake (256 variant)
color support on Windows, color support on Windows,

10
bitslice_transformations_quad.cu

@ -7,6 +7,8 @@
#define __shfl(var, srcLane, width) (uint32_t)(var) #define __shfl(var, srcLane, width) (uint32_t)(var)
#endif #endif
#ifdef __CUDA_ARCH__
__device__ __forceinline__ __device__ __forceinline__
void to_bitslice_quad(uint32_t *input, uint32_t *output) void to_bitslice_quad(uint32_t *input, uint32_t *output)
{ {
@ -429,3 +431,11 @@ void from_bitslice_quad(uint32_t *input, uint32_t *output)
if (threadIdx.x % 4) output[i] = output[i+1] = 0; if (threadIdx.x % 4) output[i] = output[i+1] = 0;
} }
} }
#else
/* host "fake" functions */
#define from_bitslice_quad(st, out)
#define to_bitslice_quad(in, msg) in[0] = (uint32_t) in[0];
#endif /* device only code */

24
ccminer.vcxproj

@ -432,7 +432,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform> <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\cuda_checkhash.cu"> <CudaCompile Include="cuda_checkhash.cu">
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
@ -463,6 +463,26 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform> <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile> </CudaCompile>
<CudaCompile Include="qubit\deep.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile>
<CudaCompile Include="qubit\doom.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile>
<CudaCompile Include="qubit\qubit.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile>
<CudaCompile Include="qubit\qubit_luffa512.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
</CudaCompile>
<CudaCompile Include="x11\cuda_x11_aes.cu"> <CudaCompile Include="x11\cuda_x11_aes.cu">
<ExcludedFromBuild>true</ExcludedFromBuild> <ExcludedFromBuild>true</ExcludedFromBuild>
</CudaCompile> </CudaCompile>
@ -570,4 +590,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" /> <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

19
ccminer.vcxproj.filters

@ -52,6 +52,9 @@
<Filter Include="Header Files\CUDA\heavy"> <Filter Include="Header Files\CUDA\heavy">
<UniqueIdentifier>{3281db48-f394-49ea-a1ef-6ebd09828d50}</UniqueIdentifier> <UniqueIdentifier>{3281db48-f394-49ea-a1ef-6ebd09828d50}</UniqueIdentifier>
</Filter> </Filter>
<Filter Include="Source Files\CUDA\qubit">
<UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\CUDA\x11"> <Filter Include="Source Files\CUDA\x11">
<UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier> <UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
</Filter> </Filter>
@ -376,6 +379,18 @@
<CudaCompile Include="quark\quarkcoin.cu"> <CudaCompile Include="quark\quarkcoin.cu">
<Filter>Source Files\CUDA\quark</Filter> <Filter>Source Files\CUDA\quark</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="qubit\deep.cu">
<Filter>Source Files\CUDA\qubit</Filter>
</CudaCompile>
<CudaCompile Include="qubit\doom.cu">
<Filter>Source Files\CUDA\qubit</Filter>
</CudaCompile>
<CudaCompile Include="qubit\qubit.cu">
<Filter>Source Files\CUDA\qubit</Filter>
</CudaCompile>
<CudaCompile Include="qubit\qubit_luffa512.cu">
<Filter>Source Files\CUDA\qubit</Filter>
</CudaCompile>
<CudaCompile Include="cuda_nist5.cu"> <CudaCompile Include="cuda_nist5.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
@ -415,7 +430,7 @@
<CudaCompile Include="x13\x13.cu"> <CudaCompile Include="x13\x13.cu">
<Filter>Source Files\CUDA\x13</Filter> <Filter>Source Files\CUDA\x13</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\cuda_checkhash.cu"> <CudaCompile Include="cuda_checkhash.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="x15\x14.cu"> <CudaCompile Include="x15\x14.cu">
@ -449,4 +464,4 @@
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

2
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.09.09]) AC_INIT([ccminer], [2014.09.11])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

39
cpu-miner.c

@ -129,6 +129,8 @@ typedef enum {
ALGO_ANIME, ALGO_ANIME,
ALGO_BLAKE, ALGO_BLAKE,
ALGO_BLAKECOIN, ALGO_BLAKECOIN,
ALGO_DEEP,
ALGO_DOOM,
ALGO_FRESH, ALGO_FRESH,
ALGO_FUGUE256, /* Fugue256 */ ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL, ALGO_GROESTL,
@ -139,6 +141,7 @@ typedef enum {
ALGO_NIST5, ALGO_NIST5,
ALGO_PENTABLAKE, ALGO_PENTABLAKE,
ALGO_QUARK, ALGO_QUARK,
ALGO_QUBIT,
ALGO_WHC, ALGO_WHC,
ALGO_X11, ALGO_X11,
ALGO_X13, ALGO_X13,
@ -152,6 +155,8 @@ static const char *algo_names[] = {
"anime", "anime",
"blake", "blake",
"blakecoin", "blakecoin",
"deep",
"doom",
"fresh", "fresh",
"fugue256", "fugue256",
"groestl", "groestl",
@ -162,6 +167,7 @@ static const char *algo_names[] = {
"nist5", "nist5",
"penta", "penta",
"quark", "quark",
"qubit",
"whirl", "whirl",
"x11", "x11",
"x13", "x13",
@ -235,6 +241,8 @@ Options:\n\
anime Animecoin hash\n\ anime Animecoin hash\n\
blake Blake 256 (like NEOS blake)\n\ blake Blake 256 (like NEOS blake)\n\
blakecoin Old Blake 256 (8 rounds)\n\ blakecoin Old Blake 256 (8 rounds)\n\
deep Deepcoin hash\n\
doom Doomcoin hash\n\
fresh Freshcoin hash (shavite 80)\n\ fresh Freshcoin hash (shavite 80)\n\
fugue256 Fuguecoin hash\n\ fugue256 Fuguecoin hash\n\
groestl Groestlcoin hash\n\ groestl Groestlcoin hash\n\
@ -245,6 +253,7 @@ Options:\n\
nist5 NIST5 (TalkCoin) hash\n\ nist5 NIST5 (TalkCoin) hash\n\
penta Pentablake hash (5x Blake 512)\n\ penta Pentablake hash (5x Blake 512)\n\
quark Quark hash\n\ quark Quark hash\n\
qubit Qubit hash\n\
whirl Whirlcoin (old whirlpool)\n\ whirl Whirlcoin (old whirlpool)\n\
x11 X11 (DarkCoin) hash\n\ x11 X11 (DarkCoin) hash\n\
x13 X13 (MaruCoin) hash\n\ x13 X13 (MaruCoin) hash\n\
@ -275,7 +284,7 @@ Options:\n\
--no-longpoll disable X-Long-Polling support\n\ --no-longpoll disable X-Long-Polling support\n\
--no-stratum disable X-Stratum support\n\ --no-stratum disable X-Stratum support\n\
-q, --quiet disable per-thread hashmeter output\n\ -q, --quiet disable per-thread hashmeter output\n\
-C, --color enable colored output\n\ -K, --nocolor disable colored output\n\
-D, --debug enable debug output\n\ -D, --debug enable debug output\n\
-P, --protocol-dump verbose dump of protocol-level activities\n" -P, --protocol-dump verbose dump of protocol-level activities\n"
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
@ -301,7 +310,7 @@ static char const short_options[] =
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
"S" "S"
#endif #endif
"a:c:CDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:"; "a:c:CKDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
static struct option const options[] = { static struct option const options[] = {
{ "algo", 1, NULL, 'a' }, { "algo", 1, NULL, 'a' },
@ -312,7 +321,7 @@ static struct option const options[] = {
{ "cputest", 0, NULL, 1006 }, { "cputest", 0, NULL, 1006 },
{ "cert", 1, NULL, 1001 }, { "cert", 1, NULL, 1001 },
{ "config", 1, NULL, 'c' }, { "config", 1, NULL, 'c' },
{ "color", 0, NULL, 'C' }, { "nocolor", 0, NULL, 'K' },
{ "debug", 0, NULL, 'D' }, { "debug", 0, NULL, 'D' },
{ "help", 0, NULL, 'h' }, { "help", 0, NULL, 'h' },
{ "no-longpoll", 0, NULL, 1003 }, { "no-longpoll", 0, NULL, 1003 },
@ -1083,6 +1092,16 @@ continue_scan:
max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ); max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ);
break; break;
case ALGO_DEEP:
rc = scanhash_deep(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_DOOM:
rc = scanhash_doom(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_FUGUE256: case ALGO_FUGUE256:
rc = scanhash_fugue256(thr_id, work.data, work.target, rc = scanhash_fugue256(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done);
@ -1109,6 +1128,11 @@ continue_scan:
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_QUBIT:
rc = scanhash_qubit(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_ANIME: case ALGO_ANIME:
rc = scanhash_anime(thr_id, work.data, work.target, rc = scanhash_anime(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done);
@ -1427,7 +1451,7 @@ out:
return NULL; return NULL;
} }
#define PROGRAM_VERSION "1.4.2" #define PROGRAM_VERSION "1.4.3"
static void show_version_and_exit(void) static void show_version_and_exit(void)
{ {
printf("%s v%s\n" printf("%s v%s\n"
@ -1489,8 +1513,12 @@ static void parse_arg (int key, char *arg)
break; break;
} }
case 'C': case 'C':
/* color for compat */
use_colors = true; use_colors = true;
break; break;
case 'K':
use_colors = false;
break;
case 'D': case 'D':
opt_debug = true; opt_debug = true;
break; break;
@ -1801,8 +1829,7 @@ int main(int argc, char *argv[])
#endif #endif
printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n"); printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
printf("\t and HVC extension from http://hvc.1gh.com/" "\n\n"); printf("\t and HVC extension from http://hvc.1gh.com/" "\n\n");
printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n"); printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n\n");
printf("\t BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
printf("\tInclude some of djm34 additions, cleaned by Tanguy Pruvot\n"); printf("\tInclude some of djm34 additions, cleaned by Tanguy Pruvot\n");
printf("\t BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n"); printf("\t BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");

6
cpuminer-config.h

@ -156,7 +156,7 @@
#define PACKAGE_NAME "ccminer" #define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.09.06" #define PACKAGE_STRING "ccminer 2014.09.11"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer" #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "2014.09.06" #define PACKAGE_VERSION "2014.09.11"
/* If using the C implementation of alloca, define if you know the /* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
#define USE_XOP 1 #define USE_XOP 1
/* Version number of package */ /* Version number of package */
#define VERSION "2014.09.06" #define VERSION "2014.09.11"
/* Define curl_free() as free() if our version of curl lacks curl_free. */ /* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */ /* #undef curl_free */

54
quark/cuda_checkhash.cu → cuda_checkhash.cu

@ -9,66 +9,56 @@ __constant__ uint32_t pTarget[8];
static uint32_t *d_resNounce[8]; static uint32_t *d_resNounce[8];
static uint32_t *h_resNounce[8]; static uint32_t *h_resNounce[8];
// aus heavy.cu __global__
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
{ {
int thread = (blockDim.x * blockIdx.x + threadIdx.x); int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
// bestimme den aktuellen Zähler // bestimme den aktuellen Zähler
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce; int hashPosition = nounce - startNounce;
uint32_t *inpHash = &g_hash[16 * hashPosition]; uint32_t *inpHash = &g_hash[hashPosition<<4];
uint32_t hash[8]; uint32_t hash[8];
#pragma unroll 8
#pragma unroll 8
for (int i=0; i < 8; i++) for (int i=0; i < 8; i++)
hash[i] = inpHash[i]; hash[i] = inpHash[i];
// kopiere Ergebnis for (int i = 7; i >= 0; i--) {
int i, position = -1;
bool rc = true;
#pragma unroll 8
for (i = 7; i >= 0; i--) {
if (hash[i] > pTarget[i]) { if (hash[i] > pTarget[i]) {
if(position < i) { return;
position = i; }
rc = false; if (hash[i] < pTarget[i]) {
} break;
} }
if (hash[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
} }
if(rc == true) if(resNounce[0] > nounce)
if(resNounce[0] > nounce) resNounce[0] = nounce;
resNounce[0] = nounce;
} }
} }
// Setup-Funktionen // Setup-Funktionen
__host__ void cuda_check_cpu_init(int thr_id, int threads) __host__
void cuda_check_cpu_init(int thr_id, int threads)
{ {
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)); cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)); cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
} }
// Target Difficulty setzen // Target Difficulty setzen
__host__ void cuda_check_cpu_setTarget(const void *ptarget) __host__
void cuda_check_cpu_setTarget(const void *ptarget)
{ {
// die Message zur Berechnung auf der GPU // die Message zur Berechnung auf der GPU
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
} }
__host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order) __host__
uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
{ {
uint32_t result = 0xffffffff; uint32_t result = 0xffffffff;
cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)); cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t));
@ -79,7 +69,7 @@ __host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t start
dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock); dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs // Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0; size_t shared_size = 0;
cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]); cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);

4
cuda_helper.h

@ -12,6 +12,10 @@
#include <stdint.h> #include <stdint.h>
// common functions
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
extern __device__ __device_builtin__ void __syncthreads(void); extern __device__ __device_builtin__ void __syncthreads(void);

12
cuda_nist5.cu

@ -11,7 +11,7 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
// aus cpu-miner.c // in cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen // Speicher für Input/Output der verketteten Hashfunktionen
@ -33,9 +33,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void quark_skein512_cpu_init(int thr_id, int threads); extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
// Original nist5hash Funktion aus einem miner Quelltext // Original nist5hash Funktion aus einem miner Quelltext
extern "C" void nist5hash(void *state, const void *input) extern "C" void nist5hash(void *state, const void *input)
@ -46,30 +43,25 @@ extern "C" void nist5hash(void *state, const void *input)
sph_keccak512_context ctx_keccak; sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein; sph_skein512_context ctx_skein;
unsigned char hash[64]; uint8_t hash[64];
sph_blake512_init(&ctx_blake); sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, input, 80); sph_blake512 (&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, (void*) hash); sph_blake512_close(&ctx_blake, (void*) hash);
sph_groestl512_init(&ctx_groestl); sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64); sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash); sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_jh512_init(&ctx_jh); sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64); sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash); sph_jh512_close(&ctx_jh, (void*) hash);
sph_keccak512_init(&ctx_keccak); sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64); sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash); sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_skein512_init(&ctx_skein); sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64); sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash); sph_skein512_close(&ctx_skein, (void*) hash);

25
miner.h

@ -205,13 +205,13 @@ extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
extern unsigned char *scrypt_buffer_alloc(); extern unsigned char *scrypt_buffer_alloc();
extern int scanhash_scrypt(int thr_id, uint32_t *pdata, extern int scanhash_deep(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget, const uint32_t *ptarget, uint32_t max_nonce,
uint32_t max_nonce, unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_heavy(int thr_id, uint32_t *pdata, extern int scanhash_doom(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen); unsigned long *hashes_done);
extern int scanhash_fugue256(int thr_id, uint32_t *pdata, extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
@ -221,6 +221,10 @@ extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_heavy(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done, uint32_t maxvote, int blocklen);
extern int scanhash_myriad(int thr_id, uint32_t *pdata, extern int scanhash_myriad(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
@ -253,6 +257,14 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_qubit(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_whc(int thr_id, uint32_t *pdata, extern int scanhash_whc(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
@ -426,6 +438,8 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
void print_hash_tests(void); void print_hash_tests(void);
void animehash(void *state, const void *input); void animehash(void *state, const void *input);
void blake256hash(void *output, const void *input, int rounds); void blake256hash(void *output, const void *input, int rounds);
void deephash(void *state, const void *input);
void doomhash(void *state, const void *input);
void fresh_hash(void *state, const void *input); void fresh_hash(void *state, const void *input);
void fugue256_hash(unsigned char* output, const unsigned char* input, int len); void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
@ -435,6 +449,7 @@ void myriadhash(void *state, const void *input);
void nist5hash(void *state, const void *input); void nist5hash(void *state, const void *input);
void pentablakehash(void *output, const void *input); void pentablakehash(void *output, const void *input);
void quarkhash(void *state, const void *input); void quarkhash(void *state, const void *input);
void qubithash(void *state, const void *input);
void wcoinhash(void *state, const void *input); void wcoinhash(void *state, const void *input);
void x11hash(void *output, const void *input); void x11hash(void *output, const void *input);
void x13hash(void *output, const void *input); void x13hash(void *output, const void *input);

9
quark/animecoin.cu

@ -6,14 +6,12 @@ extern "C"
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
#include "sph/sph_jh.h" #include "sph/sph_jh.h"
#include "sph/sph_keccak.h" #include "sph/sph_keccak.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
@ -43,10 +41,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void quark_jh512_cpu_init(int thr_id, int threads); extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces1, size_t *nrm1,
@ -165,7 +159,6 @@ struct HashPredicate
uint32_t m_startNonce; uint32_t m_startNonce;
}; };
extern bool opt_benchmark;
extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

5
quark/quarkcoin.cu

@ -14,7 +14,6 @@ extern "C"
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
@ -44,10 +43,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void quark_jh512_cpu_init(int thr_id, int threads); extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces1, size_t *nrm1,

120
qubit/deep.cu

@ -0,0 +1,120 @@
/*
* deepcoin algorithm
*
*/
extern "C" {
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
}
#include "miner.h"
#include "cuda_helper.h"
extern int device_map[8];
static uint32_t *d_hash[8];
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern "C" void deephash(void *state, const void *input)
{
// luffa1-cubehash2-shavite3-simd4-echo5
sph_luffa512_context ctx_luffa;
sph_cubehash512_context ctx_cubehash;
sph_echo512_context ctx_echo;
uint8_t hash[64];
sph_luffa512_init(&ctx_luffa);
sph_luffa512 (&ctx_luffa, input, 80);
sph_luffa512_close(&ctx_luffa, (void*) hash);
sph_cubehash512_init(&ctx_cubehash);
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
sph_echo512_init(&ctx_echo);
sph_echo512 (&ctx_echo, (const void*) hash, 64);
sph_echo512_close(&ctx_echo, (void*) hash);
memcpy(state, hash, 32);
}
extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
const int throughput = 256*256*8*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
cuda_check_cpu_setTarget(ptarget);
do {
const uint32_t Htarg = ptarget[7];
int order = 0;
qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
deephash(vhash64, endiandata);
if (vhash64[7]<=Htarg && fulltest(vhash64, ptarget) )
{
pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

93
qubit/doom.cu

@ -0,0 +1,93 @@
/*
* qubit algorithm
*
*/
extern "C" {
#include "sph/sph_luffa.h"
}
#include "miner.h"
#include "cuda_helper.h"
extern int device_map[8];
static uint32_t *d_hash[8];
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void doomhash(void *state, const void *input)
{
// luffa512
sph_luffa512_context ctx_luffa;
uint8_t hash[64];
sph_luffa512_init(&ctx_luffa);
sph_luffa512 (&ctx_luffa, input, 80);
sph_luffa512_close(&ctx_luffa, (void*) hash);
memcpy(state, hash, 32);
}
extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
const int throughput = 256*256*8*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
qubit_luffa512_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
do {
const uint32_t Htarg = ptarget[7];
int order = 0;
uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
doomhash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
{
pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

146
qubit/qubit.cu

@ -0,0 +1,146 @@
/*
* qubit algorithm
*
*/
extern "C" {
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
}
#include "miner.h"
#include "cuda_helper.h"
extern int device_map[8];
static uint32_t *d_hash[8];
extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_cubehash512_cpu_init(int thr_id, int threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
int order);
extern "C" void qubithash(void *state, const void *input)
{
// luffa1-cubehash2-shavite3-simd4-echo5
sph_luffa512_context ctx_luffa;
sph_cubehash512_context ctx_cubehash;
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_echo512_context ctx_echo;
uint8_t hash[64];
sph_luffa512_init(&ctx_luffa);
sph_luffa512 (&ctx_luffa, input, 80);
sph_luffa512_close(&ctx_luffa, (void*) hash);
sph_cubehash512_init(&ctx_cubehash);
sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
sph_cubehash512_close(&ctx_cubehash, (void*) hash);
sph_shavite512_init(&ctx_shavite);
sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
sph_shavite512_close(&ctx_shavite, (void*) hash);
sph_simd512_init(&ctx_simd);
sph_simd512 (&ctx_simd, (const void*) hash, 64);
sph_simd512_close(&ctx_simd, (void*) hash);
sph_echo512_init(&ctx_echo);
sph_echo512 (&ctx_echo, (const void*) hash, 64);
sph_echo512_close(&ctx_echo, (void*) hash);
memcpy(state, hash, 32);
}
extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
qubit_luffa512_cpu_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget);
do {
const uint32_t Htarg = ptarget[7];
int order = 0;
// Hash with CUDA
qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
qubithash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
{
pdata[19] = foundNonce;
*hashes_done = foundNonce - first_nonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

496
qubit/qubit_luffa512.cu

@ -0,0 +1,496 @@
/*
* luffa_for_32.c
* Version 2.0 (Sep 15th 2009)
*
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
*
* Hitachi, Ltd. is the owner of this software and hereby grant
* the U.S. Government and any interested party the right to use
* this software for the purposes of the SHA-3 evaluation process,
* notwithstanding that this software is copyrighted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <memory.h>
#include "cuda_helper.h"
typedef unsigned char BitSequence;
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8];
uint32_t *d_lnounce[8];
uint32_t *d_LNonce[8];
typedef struct {
uint32_t buffer[8]; /* Buffer to be hashed */
uint32_t chainv[40]; /* Chaining values */
} hashState;
#define BYTES_SWAP32(x) cuda_swab32(x)
#define MULT2(a,j)\
tmp = a[7+(8*j)];\
a[7+(8*j)] = a[6+(8*j)];\
a[6+(8*j)] = a[5+(8*j)];\
a[5+(8*j)] = a[4+(8*j)];\
a[4+(8*j)] = a[3+(8*j)] ^ tmp;\
a[3+(8*j)] = a[2+(8*j)] ^ tmp;\
a[2+(8*j)] = a[1+(8*j)];\
a[1+(8*j)] = a[0+(8*j)] ^ tmp;\
a[0+(8*j)] = tmp;
#define TWEAK(a0,a1,a2,a3,j)\
a0 = (a0<<(j))|(a0>>(32-j));\
a1 = (a1<<(j))|(a1>>(32-j));\
a2 = (a2<<(j))|(a2>>(32-j));\
a3 = (a3<<(j))|(a3>>(32-j));
#define STEP(c0,c1)\
SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\
SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\
MIXWORD(chainv[0],chainv[4]);\
MIXWORD(chainv[1],chainv[5]);\
MIXWORD(chainv[2],chainv[6]);\
MIXWORD(chainv[3],chainv[7]);\
ADD_CONSTANT(chainv[0],chainv[4],c0,c1);
#define SUBCRUMB(a0,a1,a2,a3,a4)\
a4 = a0;\
a0 |= a1;\
a2 ^= a3;\
a1 = ~a1;\
a0 ^= a3;\
a3 &= a4;\
a1 ^= a3;\
a3 ^= a2;\
a2 &= a0;\
a0 = ~a0;\
a2 ^= a1;\
a1 |= a3;\
a4 ^= a1;\
a3 ^= a2;\
a2 &= a1;\
a1 ^= a0;\
a0 = a4;
#define MIXWORD(a0,a4)\
a4 ^= a0;\
a0 = (a0<<2) | (a0>>(30));\
a0 ^= a4;\
a4 = (a4<<14) | (a4>>(18));\
a4 ^= a0;\
a0 = (a0<<10) | (a0>>(22));\
a0 ^= a4;\
a4 = (a4<<1) | (a4>>(31));
#define ADD_CONSTANT(a0,b0,c0,c1)\
a0 ^= c0;\
b0 ^= c1;
/* initial values of chaining variables */
__constant__ uint32_t c_IV[40];
const uint32_t h2_IV[40] = {
0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
0x6e292011,0x90152df4,0xee058139,0xdef610bb,
0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581,
0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05,
0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7,
0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67,
0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce,
0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};
__constant__ uint32_t c_CNS[80];
uint32_t h2_CNS[80] = {
0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d,
0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4,
0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28,
0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b,
0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704,
0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72,
0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7,
0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719,
0xd9847356,0x36eda57f,0xa2c78434,0x703aace7,
0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91,
0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be,
0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5,
0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355,
0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab,
0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0,
0x78602649,0x29131ab6,0x8edae952,0x0fc053c3,
0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31};
/***************************************************/
__device__ __forceinline__
void rnd512(hashState *state)
{
int i,j;
uint32_t t[40];
uint32_t chainv[8];
uint32_t tmp;
#pragma unroll 8
for(i=0;i<8;i++) {
t[i]=0;
#pragma unroll 5
for(j=0;j<5;j++) {
t[i] ^= state->chainv[i+8*j];
}
}
MULT2(t, 0);
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+8*j] ^= t[i];
}
}
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
t[i+8*j] = state->chainv[i+8*j];
}
}
#pragma unroll 5
for(j=0;j<5;j++) {
MULT2(state->chainv, j);
}
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[8*j+i] ^= t[8*((j+1)%5)+i];
}
}
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
t[i+8*j] = state->chainv[i+8*j];
}
}
#pragma unroll 5
for(j=0;j<5;j++) {
MULT2(state->chainv, j);
}
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[8*j+i] ^= t[8*((j+4)%5)+i];
}
}
#pragma unroll 5
for(j=0;j<5;j++) {
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+8*j] ^= state->buffer[i];
}
MULT2(state->buffer, 0);
}
#pragma unroll 8
for(i=0;i<8;i++) {
chainv[i] = state->chainv[i];
}
#pragma unroll 8
for(i=0;i<8;i++) {
STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]);
}
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i] = chainv[i];
chainv[i] = state->chainv[i+8];
}
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1);
#pragma unroll 8
for(i=0;i<8;i++) {
STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]);
}
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+8] = chainv[i];
chainv[i] = state->chainv[i+16];
}
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2);
#pragma unroll 8
for(i=0;i<8;i++) {
STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]);
}
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+16] = chainv[i];
chainv[i] = state->chainv[i+24];
}
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3);
#pragma unroll 8
for(i=0;i<8;i++) {
STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]);
}
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+24] = chainv[i];
chainv[i] = state->chainv[i+32];
}
TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4);
#pragma unroll 8
for(i=0;i<8;i++) {
STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]);
}
#pragma unroll 8
for(i=0;i<8;i++) {
state->chainv[i+32] = chainv[i];
}
}
__device__ __forceinline__
void Update512(hashState *state, const BitSequence *data)
{
#pragma unroll 8
for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]);
rnd512(state);
#pragma unroll 8
for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]);
rnd512(state);
#pragma unroll 4
for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]);
}
/***************************************************/
__device__ __forceinline__
void finalization512(hashState *state, uint32_t *b)
{
int i,j;
state->buffer[4] = 0x80000000;
#pragma unroll 3
for(int i=5;i<8;i++) state->buffer[i] = 0;
rnd512(state);
/*---- blank round with m=0 ----*/
#pragma unroll 8
for(i=0;i<8;i++) state->buffer[i] =0;
rnd512(state);
#pragma unroll 8
for(i=0;i<8;i++) {
b[i] = 0;
#pragma unroll 5
for(j=0;j<5;j++) {
b[i] ^= state->chainv[i+8*j];
}
b[i] = BYTES_SWAP32((b[i]));
}
#pragma unroll 8
for(i=0;i<8;i++) state->buffer[i]=0;
rnd512(state);
#pragma unroll 8
for(i=0;i<8;i++) {
b[8+i] = 0;
#pragma unroll 5
for(j=0;j<5;j++) {
b[8+i] ^= state->chainv[i+8*j];
}
b[8+i] = BYTES_SWAP32((b[8+i]));
}
}
/***************************************************/
// Die Hash-Funktion
__global__
void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
union {
uint64_t buf64[16];
uint32_t buf32[32];
} buff;
#pragma unroll 16
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
hashState state;
#pragma unroll 40
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
#pragma unroll 8
for(int i=0;i<8;i++) state.buffer[i] = 0;
Update512(&state, (BitSequence*)buff.buf32);
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
finalization512(&state, (uint32_t*)outHash);
}
}
__global__
void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
union {
uint64_t buf64[16];
uint32_t buf32[32];
} buff;
uint32_t Hash[16];
#pragma unroll 16
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
hashState state;
#pragma unroll 40
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
#pragma unroll 8
for(int i=0;i<8;i++) state.buffer[i] = 0;
Update512(&state, (BitSequence*)buff.buf32);
finalization512(&state, Hash);
bool rc = true;
int position = -1;
#pragma unroll 8
for (int i = 7; i >= 0; i--) {
if (Hash[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
}
}
if (Hash[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
}
if(rc && resNounce[0] > nounce)
resNounce[0] = nounce;
}
}
__host__
void qubit_luffa512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
}
__host__
uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
{
uint32_t result = 0xffffffff;
cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
MyStreamSynchronize(NULL, order, thr_id);
cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
//cudaThreadSynchronize();
result = *d_lnounce[thr_id];
return result;
}
__host__
void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}
__host__
void qubit_luffa512_cpu_setBlock_80(void *pdata)
{
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80;
PaddedMessage[111] = 1;
PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
__host__
void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
{
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80;
PaddedMessage[111] = 1;
PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}

12
util.c

@ -1440,6 +1440,14 @@ void print_hash_tests(void)
blake256hash(&hash[0], &buf[0], 14); blake256hash(&hash[0], &buf[0], 14);
printpfx("blake", hash); printpfx("blake", hash);
memset(hash, 0, sizeof hash);
deephash(&hash[0], &buf[0]);
printpfx("deep", hash);
memset(hash, 0, sizeof hash);
doomhash(&hash[0], &buf[0]);
printpfx("doom", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
fresh_hash(&hash[0], &buf[0]); fresh_hash(&hash[0], &buf[0]);
printpfx("fresh", hash); printpfx("fresh", hash);
@ -1476,6 +1484,10 @@ void print_hash_tests(void)
quarkhash(&hash[0], &buf[0]); quarkhash(&hash[0], &buf[0]);
printpfx("quark", hash); printpfx("quark", hash);
memset(hash, 0, sizeof hash);
qubithash(&hash[0], &buf[0]);
printpfx("qubit", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
wcoinhash(&hash[0], &buf[0]); wcoinhash(&hash[0], &buf[0]);
printpfx("whirl", hash); printpfx("whirl", hash);

26
x11/fresh.cu

@ -5,9 +5,9 @@ extern "C" {
#include "sph/sph_shavite.h" #include "sph/sph_shavite.h"
#include "sph/sph_simd.h" #include "sph/sph_simd.h"
#include "sph/sph_echo.h" #include "sph/sph_echo.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// to test gpu hash on a null buffer // to test gpu hash on a null buffer
#define NULLTEST 0 #define NULLTEST 0
@ -15,7 +15,6 @@ extern "C" {
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
extern int device_map[8]; extern int device_map[8];
extern bool opt_benchmark;
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_setBlock_80(void *pdata); extern void x11_shavite512_setBlock_80(void *pdata);
@ -28,10 +27,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
@ -75,15 +70,6 @@ extern "C" void fresh_hash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
#if NULLTEST
static void print_hash(unsigned char *hash)
{
for (int i=0; i < 32; i += 4) {
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
}
}
#endif
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)
@ -92,15 +78,9 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
const int throughput = 256*256*8; const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t endiandata[20]; uint32_t endiandata[20];
uint32_t Htarg = ptarget[7];
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; ((uint32_t*)ptarget)[7] = 0x00ff;
#if NULLTEST
for (int k=0; k < 20; k++)
pdata[k] = 0;
#endif
if (!init[thr_id]) if (!init[thr_id])
{ {
@ -123,6 +103,8 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
x11_shavite512_setBlock_80((void*)endiandata); x11_shavite512_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
uint32_t Htarg = ptarget[7];
uint32_t foundNonce; uint32_t foundNonce;
int order = 0; int order = 0;

12
x11/x11.cu

@ -1,4 +1,3 @@
extern "C" extern "C"
{ {
#include "sph/sph_blake.h" #include "sph/sph_blake.h"
@ -13,13 +12,13 @@ extern "C"
#include "sph/sph_shavite.h" #include "sph/sph_shavite.h"
#include "sph/sph_simd.h" #include "sph/sph_simd.h"
#include "sph/sph_echo.h" #include "sph/sph_echo.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
}
// in cpu-miner.c // in cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -62,14 +61,9 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
int order);
// X11 Hashfunktion // X11 Hashfunktion
extern "C" void x11hash(void *output, const void *input) extern "C" void x11hash(void *output, const void *input)

16
x13/x13.cu

@ -18,18 +18,16 @@ extern "C"
#include "sph/sph_hamsi.h" #include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h" #include "sph/sph_fugue.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
@ -40,7 +38,6 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
extern void quark_groestl512_cpu_init(int thr_id, int threads); extern void quark_groestl512_cpu_init(int thr_id, int threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads); extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -72,14 +69,9 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
extern void x13_fugue512_cpu_init(int thr_id, int threads); extern void x13_fugue512_cpu_init(int thr_id, int threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
int order);
// X13 Hashfunktion // X13 Hashfunktion
extern "C" void x13hash(void *output, const void *input) extern "C" void x13hash(void *output, const void *input)
@ -159,8 +151,6 @@ extern "C" void x13hash(void *output, const void *input)
} }
extern bool opt_benchmark;
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)

7
x15/whirlpool.cu

@ -9,20 +9,17 @@ extern "C"
// from cpu-miner.c // from cpu-miner.c
extern int device_map[8]; extern int device_map[8];
extern bool opt_benchmark;
// Speicher für Input/Output der verketteten Hashfunktionen // Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget); extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget);
extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
// CPU Hash function // CPU Hash function
extern "C" void wcoinhash(void *state, const void *input) extern "C" void wcoinhash(void *state, const void *input)

4
x15/x14.cu

@ -76,10 +76,6 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
extern void x14_shabal512_cpu_init(int thr_id, int threads); extern void x14_shabal512_cpu_init(int thr_id, int threads);
extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);

4
x15/x15.cu

@ -81,10 +81,6 @@ extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x15_whirlpool_cpu_free(int thr_id); extern void x15_whirlpool_cpu_free(int thr_id);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);

5
x17/x17.cu

@ -89,11 +89,6 @@ extern void x17_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce
extern void x17_haval256_cpu_init(int thr_id, int threads); extern void x17_haval256_cpu_init(int thr_id, int threads);
extern void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads); extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,

Loading…
Cancel
Save