Merge branch 'blake-dev' into blake
This commit is contained in:
commit
474ee97d6e
@ -16,7 +16,7 @@ bin_PROGRAMS = ccminer
|
|||||||
ccminer_SOURCES = elist.h miner.h compat.h \
|
ccminer_SOURCES = elist.h miner.h compat.h \
|
||||||
compat/inttypes.h compat/stdbool.h compat/unistd.h \
|
compat/inttypes.h compat/stdbool.h compat/unistd.h \
|
||||||
compat/sys/time.h compat/getopt/getopt.h \
|
compat/sys/time.h compat/getopt/getopt.h \
|
||||||
cpu-miner.c util.c hefty1.c scrypt.c \
|
cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
|
||||||
hashlog.cpp \
|
hashlog.cpp \
|
||||||
heavy/heavy.cu \
|
heavy/heavy.cu \
|
||||||
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
|
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
|
||||||
@ -33,7 +33,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
||||||
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
|
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
|
||||||
quark/cuda_quark_compactionTest.cu \
|
quark/cuda_quark_compactionTest.cu \
|
||||||
cuda_nist5.cu blake32.cu \
|
cuda_nist5.cu blake32.cu pentablake.cu \
|
||||||
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
|
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
|
||||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
||||||
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
ccMiner release 1.4.1-tpruvot (Sep 06th 2014) - "Cached Blake"
|
ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
|
|
||||||
***************************************************************
|
***************************************************************
|
||||||
@ -35,6 +35,7 @@ TalkCoin
|
|||||||
DarkCoin and other X11 coins
|
DarkCoin and other X11 coins
|
||||||
NEOS blake (256 14-rounds)
|
NEOS blake (256 14-rounds)
|
||||||
BlakeCoin (256 8-rounds)
|
BlakeCoin (256 8-rounds)
|
||||||
|
Pentablake (Blake 512 x5)
|
||||||
|
|
||||||
where some of these coins have a VERY NOTABLE nVidia advantage
|
where some of these coins have a VERY NOTABLE nVidia advantage
|
||||||
over competing AMD (OpenCL) implementations.
|
over competing AMD (OpenCL) implementations.
|
||||||
@ -65,6 +66,7 @@ its command line interface and options.
|
|||||||
blake use to mine NEOS (Blake 256)
|
blake use to mine NEOS (Blake 256)
|
||||||
blakecoin use to mine Old Blake 256
|
blakecoin use to mine Old Blake 256
|
||||||
nist5 use to mine TalkCoin
|
nist5 use to mine TalkCoin
|
||||||
|
penta use to mine Joincoin / Pentablake
|
||||||
fresh use to mine Freshcoin
|
fresh use to mine Freshcoin
|
||||||
whirl use to mine Whirlcoin
|
whirl use to mine Whirlcoin
|
||||||
x11 use to mine DarkCoin
|
x11 use to mine DarkCoin
|
||||||
|
79
blake32.cu
79
blake32.cu
@ -15,6 +15,9 @@ extern "C" {
|
|||||||
/* threads per block */
|
/* threads per block */
|
||||||
#define TPB 128
|
#define TPB 128
|
||||||
|
|
||||||
|
/* crc32.c */
|
||||||
|
extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size);
|
||||||
|
|
||||||
extern "C" int blake256_rounds = 14;
|
extern "C" int blake256_rounds = 14;
|
||||||
|
|
||||||
/* hash by cpu with blake 256 */
|
/* hash by cpu with blake 256 */
|
||||||
@ -41,8 +44,6 @@ extern bool opt_n_threads;
|
|||||||
extern bool opt_benchmark;
|
extern bool opt_benchmark;
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
uint32_t crc32(const uint32_t *buf, size_t size);
|
|
||||||
|
|
||||||
__constant__
|
__constant__
|
||||||
static uint32_t __align__(32) c_Target[8];
|
static uint32_t __align__(32) c_Target[8];
|
||||||
|
|
||||||
@ -331,7 +332,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
|
|
||||||
blake256_cpu_setBlock_80(pdata, ptarget);
|
blake256_cpu_setBlock_80(pdata, ptarget);
|
||||||
#if USE_CACHE
|
#if USE_CACHE
|
||||||
crcsum = crc32(pdata, 64);
|
crcsum = crc32_u32t(pdata, 64);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -360,7 +361,8 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
be32enc(&endiandata[19], extra_results[0]);
|
be32enc(&endiandata[19], extra_results[0]);
|
||||||
blake256hash(vhashcpu, endiandata, blakerounds);
|
blake256hash(vhashcpu, endiandata, blakerounds);
|
||||||
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
|
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
|
||||||
applog(LOG_NOTICE, "GPU found more than one result yippee!");
|
applog(LOG_NOTICE, "GPU found more than one result " CL_GRN "yippee!");
|
||||||
|
rc = 2;
|
||||||
} else {
|
} else {
|
||||||
extra_results[0] = MAXU;
|
extra_results[0] = MAXU;
|
||||||
}
|
}
|
||||||
@ -379,9 +381,14 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
||||||
|
pdata[19] = max_nonce - first_nonce + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
exit_scan:
|
exit_scan:
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
@ -394,66 +401,6 @@ exit_scan:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// wait proper end of all threads
|
// wait proper end of all threads
|
||||||
cudaDeviceSynchronize();
|
//cudaDeviceSynchronize();
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t crc32_tab[] = {
|
|
||||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
|
|
||||||
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
|
||||||
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
|
|
||||||
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
|
||||||
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
|
||||||
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
|
||||||
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
|
|
||||||
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
|
||||||
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
|
|
||||||
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
|
||||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
|
|
||||||
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
|
||||||
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
|
|
||||||
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
|
||||||
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
|
||||||
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
|
||||||
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
|
|
||||||
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
|
||||||
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
|
|
||||||
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
|
||||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
|
|
||||||
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
|
||||||
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
|
|
||||||
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
|
||||||
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
|
||||||
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
|
||||||
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
|
|
||||||
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
|
||||||
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
|
|
||||||
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
|
||||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
|
|
||||||
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
|
||||||
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
|
|
||||||
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
|
||||||
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
|
||||||
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
|
||||||
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
|
|
||||||
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
|
||||||
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
|
|
||||||
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
|
||||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
|
|
||||||
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
|
||||||
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32_t crc32(const uint32_t *buf, size_t size)
|
|
||||||
{
|
|
||||||
const uint8_t *p;
|
|
||||||
uint32_t crc = 0;
|
|
||||||
|
|
||||||
p = (uint8_t *) buf;
|
|
||||||
crc = crc ^ ~0U;
|
|
||||||
|
|
||||||
while (size--)
|
|
||||||
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
|
||||||
|
|
||||||
return crc ^ ~0U;
|
|
||||||
}
|
|
@ -241,6 +241,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
|||||||
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
|
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
|
||||||
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
|
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="crc32.c" />
|
||||||
<ClCompile Include="fuguecoin.cpp" />
|
<ClCompile Include="fuguecoin.cpp" />
|
||||||
<ClCompile Include="groestlcoin.cpp" />
|
<ClCompile Include="groestlcoin.cpp" />
|
||||||
<ClCompile Include="hashlog.cpp" />
|
<ClCompile Include="hashlog.cpp" />
|
||||||
@ -404,6 +405,12 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
|||||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
<FastMath>true</FastMath>
|
<FastMath>true</FastMath>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="pentablake.cu">
|
||||||
|
<MaxRegCount>80</MaxRegCount>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<FastMath>true</FastMath>
|
||||||
|
</CudaCompile>
|
||||||
<CudaCompile Include="quark\animecoin.cu">
|
<CudaCompile Include="quark\animecoin.cu">
|
||||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||||
|
@ -96,6 +96,9 @@
|
|||||||
<ClCompile Include="cpu-miner.c">
|
<ClCompile Include="cpu-miner.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="crc32.c">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="hefty1.c">
|
<ClCompile Include="hefty1.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@ -442,5 +445,8 @@
|
|||||||
<CudaCompile Include="blake32.cu">
|
<CudaCompile Include="blake32.cu">
|
||||||
<Filter>Source Files\CUDA</Filter>
|
<Filter>Source Files\CUDA</Filter>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="pentablake.cu">
|
||||||
|
<Filter>Source Files\CUDA</Filter>
|
||||||
|
</CudaCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
@ -1,4 +1,4 @@
|
|||||||
AC_INIT([ccminer], [2014.09.06])
|
AC_INIT([ccminer], [2014.09.09])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
142
cpu-miner.c
142
cpu-miner.c
@ -136,8 +136,9 @@ typedef enum {
|
|||||||
ALGO_JACKPOT,
|
ALGO_JACKPOT,
|
||||||
ALGO_MJOLLNIR, /* Mjollnir hash */
|
ALGO_MJOLLNIR, /* Mjollnir hash */
|
||||||
ALGO_MYR_GR,
|
ALGO_MYR_GR,
|
||||||
ALGO_QUARK,
|
|
||||||
ALGO_NIST5,
|
ALGO_NIST5,
|
||||||
|
ALGO_PENTABLAKE,
|
||||||
|
ALGO_QUARK,
|
||||||
ALGO_WHC,
|
ALGO_WHC,
|
||||||
ALGO_X11,
|
ALGO_X11,
|
||||||
ALGO_X13,
|
ALGO_X13,
|
||||||
@ -159,6 +160,7 @@ static const char *algo_names[] = {
|
|||||||
"mjollnir",
|
"mjollnir",
|
||||||
"myr-gr",
|
"myr-gr",
|
||||||
"nist5",
|
"nist5",
|
||||||
|
"penta",
|
||||||
"quark",
|
"quark",
|
||||||
"whirl",
|
"whirl",
|
||||||
"x11",
|
"x11",
|
||||||
@ -170,7 +172,6 @@ static const char *algo_names[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
bool opt_debug = false;
|
bool opt_debug = false;
|
||||||
bool opt_debug_rpc = false;
|
|
||||||
bool opt_protocol = false;
|
bool opt_protocol = false;
|
||||||
bool opt_benchmark = false;
|
bool opt_benchmark = false;
|
||||||
bool want_longpoll = true;
|
bool want_longpoll = true;
|
||||||
@ -179,7 +180,7 @@ bool want_stratum = true;
|
|||||||
bool have_stratum = false;
|
bool have_stratum = false;
|
||||||
static bool submit_old = false;
|
static bool submit_old = false;
|
||||||
bool use_syslog = false;
|
bool use_syslog = false;
|
||||||
bool use_colors = false;
|
bool use_colors = true;
|
||||||
static bool opt_background = false;
|
static bool opt_background = false;
|
||||||
bool opt_quiet = false;
|
bool opt_quiet = false;
|
||||||
static int opt_retries = -1;
|
static int opt_retries = -1;
|
||||||
@ -242,6 +243,7 @@ Options:\n\
|
|||||||
mjollnir Mjollnircoin hash\n\
|
mjollnir Mjollnircoin hash\n\
|
||||||
myr-gr Myriad-Groestl hash\n\
|
myr-gr Myriad-Groestl hash\n\
|
||||||
nist5 NIST5 (TalkCoin) hash\n\
|
nist5 NIST5 (TalkCoin) hash\n\
|
||||||
|
penta Pentablake hash (5x Blake 512)\n\
|
||||||
quark Quark hash\n\
|
quark Quark hash\n\
|
||||||
whirl Whirlcoin (old whirlpool)\n\
|
whirl Whirlcoin (old whirlpool)\n\
|
||||||
x11 X11 (DarkCoin) hash\n\
|
x11 X11 (DarkCoin) hash\n\
|
||||||
@ -437,7 +439,7 @@ static int share_result(int result, const char *reason)
|
|||||||
(result ? CL_GRN "yay!!!" : CL_RED "booooo")
|
(result ? CL_GRN "yay!!!" : CL_RED "booooo")
|
||||||
: (result ? "(yay!!!)" : "(booooo)"));
|
: (result ? "(yay!!!)" : "(booooo)"));
|
||||||
|
|
||||||
if (reason && !opt_quiet) {
|
if (reason) {
|
||||||
applog(LOG_WARNING, "reject reason: %s", reason);
|
applog(LOG_WARNING, "reject reason: %s", reason);
|
||||||
if (strncmp(reason, "low difficulty share", 20) == 0) {
|
if (strncmp(reason, "low difficulty share", 20) == 0) {
|
||||||
opt_difficulty = (opt_difficulty * 2.0) / 3.0;
|
opt_difficulty = (opt_difficulty * 2.0) / 3.0;
|
||||||
@ -457,11 +459,14 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
|||||||
bool rc = false;
|
bool rc = false;
|
||||||
|
|
||||||
/* pass if the previous hash is not the current previous hash */
|
/* pass if the previous hash is not the current previous hash */
|
||||||
|
pthread_mutex_lock(&g_work_lock);
|
||||||
if (memcmp(work->data + 1, g_work.data + 1, 32)) {
|
if (memcmp(work->data + 1, g_work.data + 1, 32)) {
|
||||||
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
|
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
|
|
||||||
if (have_stratum) {
|
if (have_stratum) {
|
||||||
uint32_t sent;
|
uint32_t sent;
|
||||||
@ -544,10 +549,6 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
|||||||
json_decref(val);
|
json_decref(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_debug_rpc) {
|
|
||||||
applog(LOG_DEBUG, "submit: %s", s);
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = true;
|
rc = true;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -786,13 +787,20 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
|||||||
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
|
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
|
||||||
|
|
||||||
/* Generate merkle root */
|
/* Generate merkle root */
|
||||||
if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
|
switch (opt_algo) {
|
||||||
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
case ALGO_HEAVY:
|
||||||
else
|
case ALGO_MJOLLNIR:
|
||||||
if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WHC || opt_algo == ALGO_BLAKECOIN)
|
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||||
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
|
break;
|
||||||
else
|
case ALGO_FUGUE256:
|
||||||
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
case ALGO_GROESTL:
|
||||||
|
case ALGO_BLAKECOIN:
|
||||||
|
case ALGO_WHC:
|
||||||
|
SHA256((uint8_t*)sctx->job.coinbase, sctx->job.coinbase_size, (uint8_t*)merkle_root);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < sctx->job.merkle_count; i++) {
|
for (i = 0; i < sctx->job.merkle_count; i++) {
|
||||||
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
|
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
|
||||||
@ -864,7 +872,9 @@ static void *miner_thread(void *userdata)
|
|||||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
||||||
unsigned char *scratchbuf = NULL;
|
unsigned char *scratchbuf = NULL;
|
||||||
bool work_done = false;
|
bool work_done = false;
|
||||||
|
bool extrajob = false;
|
||||||
char s[16];
|
char s[16];
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
|
memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
|
||||||
|
|
||||||
@ -891,20 +901,31 @@ static void *miner_thread(void *userdata)
|
|||||||
struct timeval tv_start, tv_end, diff;
|
struct timeval tv_start, tv_end, diff;
|
||||||
int64_t max64;
|
int64_t max64;
|
||||||
uint64_t umax64;
|
uint64_t umax64;
|
||||||
int rc;
|
|
||||||
|
|
||||||
// &work.data[19]
|
// &work.data[19]
|
||||||
int wcmplen = 76;
|
int wcmplen = 76;
|
||||||
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||||
|
|
||||||
if (have_stratum) {
|
if (have_stratum) {
|
||||||
while (time(NULL) >= (g_work_time + opt_scantime) && !work_done)
|
uint32_t sleeptime = 0;
|
||||||
usleep(500*1000);
|
while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
|
||||||
work_done = false;
|
usleep(100*1000);
|
||||||
pthread_mutex_lock(&g_work_lock);
|
if (sleeptime > 4) {
|
||||||
|
extrajob = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleeptime++;
|
||||||
|
}
|
||||||
|
if (sleeptime && opt_debug && !opt_quiet)
|
||||||
|
applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
|
||||||
nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||||
if ((*nonceptr) >= end_nonce)
|
pthread_mutex_lock(&g_work_lock);
|
||||||
|
extrajob |= work_done;
|
||||||
|
if ((*nonceptr) >= end_nonce || extrajob) {
|
||||||
|
work_done = false;
|
||||||
|
extrajob = false;
|
||||||
stratum_gen_work(&stratum, &g_work);
|
stratum_gen_work(&stratum, &g_work);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
|
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
|
||||||
/* obtain new work from internal workio thread */
|
/* obtain new work from internal workio thread */
|
||||||
@ -920,34 +941,54 @@ static void *miner_thread(void *userdata)
|
|||||||
g_work_time = time(NULL);
|
g_work_time = time(NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
|
if (!opt_benchmark && g_work.xnonce2_len == 0) {
|
||||||
|
applog(LOG_ERR, "work data not read yet");
|
||||||
|
extrajob = true;
|
||||||
|
work_done = true;
|
||||||
|
sleep(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (rc > 1) {
|
||||||
|
/* if we found more than one on last loop */
|
||||||
|
/* todo: handle an array to get them directly */
|
||||||
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
|
goto continue_scan;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
||||||
|
if (opt_debug) {
|
||||||
|
applog(LOG_DEBUG, "job %s target change:", g_work.job_id);
|
||||||
|
applog_hash((uint8_t*) work.target);
|
||||||
|
applog_compare_hash((uint8_t*) g_work.target, (uint8_t*) work.target);
|
||||||
|
}
|
||||||
|
memcpy(work.target, g_work.target, sizeof(work.target));
|
||||||
|
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
||||||
|
/* on new target, ignoring nonce, clear sent data (hashlog) */
|
||||||
|
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
||||||
|
hashlog_purge_job(work.job_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (memcmp(work.data, g_work.data, wcmplen)) {
|
if (memcmp(work.data, g_work.data, wcmplen)) {
|
||||||
if (opt_debug) {
|
if (opt_debug) {
|
||||||
applog(LOG_DEBUG, "job %s work updated", g_work.job_id);
|
for (int n=0; n <= (wcmplen-8); n+=8) {
|
||||||
for (int n=0; n<wcmplen; n+=8) {
|
|
||||||
if (memcmp(work.data + n, g_work.data + n, 8)) {
|
if (memcmp(work.data + n, g_work.data + n, 8)) {
|
||||||
applog(LOG_DEBUG, "diff detected at offset %d", n);
|
applog(LOG_DEBUG, "job %s work updated at offset %d:", g_work.job_id, n);
|
||||||
applog_hash((uint8_t*) work.data + n);
|
applog_hash((uint8_t*) work.data + n);
|
||||||
applog_hash((uint8_t*) g_work.data + n);
|
applog_compare_hash((uint8_t*) g_work.data + n, (uint8_t*) work.data + n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memcpy(&work, &g_work, sizeof(struct work));
|
memcpy(&work, &g_work, sizeof(struct work));
|
||||||
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
||||||
} else if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
|
||||||
if (opt_debug) {
|
|
||||||
applog(LOG_DEBUG, "job %s target change", g_work.job_id);
|
|
||||||
applog_hash((uint8_t*) work.target);
|
|
||||||
applog_hash((uint8_t*) g_work.target);
|
|
||||||
}
|
|
||||||
memcpy(work.target, g_work.target, sizeof(work.target));
|
|
||||||
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
|
||||||
} else
|
} else
|
||||||
(*nonceptr)++; //??
|
(*nonceptr)++; //??
|
||||||
pthread_mutex_unlock(&g_work_lock);
|
|
||||||
work_restart[thr_id].restart = 0;
|
work_restart[thr_id].restart = 0;
|
||||||
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_WARNING, "job %s %08x", g_work.job_id, (*nonceptr));
|
applog(LOG_WARNING, "job %s %08x", g_work.job_id, (*nonceptr));
|
||||||
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
|
|
||||||
/* adjust max_nonce to meet target scan time */
|
/* adjust max_nonce to meet target scan time */
|
||||||
if (have_stratum)
|
if (have_stratum)
|
||||||
@ -959,15 +1000,18 @@ static void *miner_thread(void *userdata)
|
|||||||
max64 *= (int64_t)thr_hashrates[thr_id];
|
max64 *= (int64_t)thr_hashrates[thr_id];
|
||||||
|
|
||||||
if (max64 <= 0) {
|
if (max64 <= 0) {
|
||||||
|
/* should not be set too high,
|
||||||
|
else you can miss multiple nounces */
|
||||||
switch (opt_algo) {
|
switch (opt_algo) {
|
||||||
case ALGO_JACKPOT:
|
case ALGO_JACKPOT:
|
||||||
max64 = 0x1fffLL;
|
max64 = 0x1fffLL;
|
||||||
break;
|
break;
|
||||||
case ALGO_BLAKECOIN:
|
case ALGO_BLAKECOIN:
|
||||||
max64 = 0x3ffffffLL;
|
max64 = 0x3ffffffLL;
|
||||||
|
break;
|
||||||
case ALGO_BLAKE:
|
case ALGO_BLAKE:
|
||||||
/* based on the 750Ti hashrate (100kH) */
|
/* based on the 750Ti hashrate (100kH) */
|
||||||
max64 = 0x3ffffffLL;
|
max64 = 0x1ffffffLL;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
max64 = 0xfffffLL;
|
max64 = 0xfffffLL;
|
||||||
@ -1000,12 +1044,12 @@ static void *miner_thread(void *userdata)
|
|||||||
stall |= (start_nonce > range.scanned[0] && start_nonce < range.scanned[1]);
|
stall |= (start_nonce > range.scanned[0] && start_nonce < range.scanned[1]);
|
||||||
|
|
||||||
if (stall) {
|
if (stall) {
|
||||||
if (opt_algo)
|
if (opt_debug && !opt_quiet)
|
||||||
applog(LOG_DEBUG, "job done, wait for a new one...");
|
applog(LOG_DEBUG, "job done, wait for a new one...");
|
||||||
work_restart[thr_id].restart = 1;
|
work_restart[thr_id].restart = 1;
|
||||||
hashlog_purge_old();
|
hashlog_purge_old();
|
||||||
// wait a bit for a new job...
|
// wait a bit for a new job...
|
||||||
sleep(1);
|
usleep(500*1000);
|
||||||
(*nonceptr) = end_nonce + 1;
|
(*nonceptr) = end_nonce + 1;
|
||||||
work_done = true;
|
work_done = true;
|
||||||
continue;
|
continue;
|
||||||
@ -1023,6 +1067,7 @@ static void *miner_thread(void *userdata)
|
|||||||
(*nonceptr) = start_nonce;
|
(*nonceptr) = start_nonce;
|
||||||
|
|
||||||
hashes_done = 0;
|
hashes_done = 0;
|
||||||
|
continue_scan:
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
|
|
||||||
/* scan nonces for a proof-of-work hash */
|
/* scan nonces for a proof-of-work hash */
|
||||||
@ -1089,6 +1134,11 @@ static void *miner_thread(void *userdata)
|
|||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ALGO_PENTABLAKE:
|
||||||
|
rc = scanhash_pentablake(thr_id, work.data, work.target,
|
||||||
|
max_nonce, &hashes_done);
|
||||||
|
break;
|
||||||
|
|
||||||
case ALGO_WHC:
|
case ALGO_WHC:
|
||||||
rc = scanhash_whc(thr_id, work.data, work.target,
|
rc = scanhash_whc(thr_id, work.data, work.target,
|
||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done);
|
||||||
@ -1133,8 +1183,11 @@ static void *miner_thread(void *userdata)
|
|||||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||||
if (diff.tv_usec || diff.tv_sec) {
|
if (diff.tv_usec || diff.tv_sec) {
|
||||||
pthread_mutex_lock(&stats_lock);
|
pthread_mutex_lock(&stats_lock);
|
||||||
thr_hashrates[thr_id] =
|
if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) {
|
||||||
hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
||||||
|
if (rc > 1)
|
||||||
|
thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
||||||
|
}
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
}
|
}
|
||||||
if (!opt_quiet) {
|
if (!opt_quiet) {
|
||||||
@ -1342,7 +1395,6 @@ static void *stratum_thread(void *userdata)
|
|||||||
pthread_mutex_lock(&g_work_lock);
|
pthread_mutex_lock(&g_work_lock);
|
||||||
stratum_gen_work(&stratum, &g_work);
|
stratum_gen_work(&stratum, &g_work);
|
||||||
time(&g_work_time);
|
time(&g_work_time);
|
||||||
pthread_mutex_unlock(&g_work_lock);
|
|
||||||
if (stratum.job.clean) {
|
if (stratum.job.clean) {
|
||||||
if (!opt_quiet)
|
if (!opt_quiet)
|
||||||
applog(LOG_BLUE, "%s send a new %s block %d", short_url, algo_names[opt_algo],
|
applog(LOG_BLUE, "%s send a new %s block %d", short_url, algo_names[opt_algo],
|
||||||
@ -1353,6 +1405,7 @@ static void *stratum_thread(void *userdata)
|
|||||||
applog(LOG_BLUE, "%s send job %d for block %d", short_url,
|
applog(LOG_BLUE, "%s send job %d for block %d", short_url,
|
||||||
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
|
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
|
||||||
}
|
}
|
||||||
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!stratum_socket_full(&stratum, 120)) {
|
if (!stratum_socket_full(&stratum, 120)) {
|
||||||
@ -1374,7 +1427,7 @@ out:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PROGRAM_VERSION "1.4.1"
|
#define PROGRAM_VERSION "1.4.2"
|
||||||
static void show_version_and_exit(void)
|
static void show_version_and_exit(void)
|
||||||
{
|
{
|
||||||
printf("%s v%s\n"
|
printf("%s v%s\n"
|
||||||
@ -1438,12 +1491,11 @@ static void parse_arg (int key, char *arg)
|
|||||||
case 'C':
|
case 'C':
|
||||||
use_colors = true;
|
use_colors = true;
|
||||||
break;
|
break;
|
||||||
case 'q':
|
|
||||||
opt_quiet = true;
|
|
||||||
break;
|
|
||||||
case 'D':
|
case 'D':
|
||||||
opt_debug = true;
|
opt_debug = true;
|
||||||
opt_debug_rpc = true;
|
break;
|
||||||
|
case 'q':
|
||||||
|
opt_quiet = true;
|
||||||
break;
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
free(rpc_pass);
|
free(rpc_pass);
|
||||||
|
119
crc32.c
Normal file
119
crc32.c
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
/*-
|
||||||
|
* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
|
||||||
|
* code or tables extracted from it, as desired without restriction.
|
||||||
|
*
|
||||||
|
* First, the polynomial itself and its table of feedback terms. The
|
||||||
|
* polynomial is
|
||||||
|
* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
|
||||||
|
*
|
||||||
|
* Note that we take it "backwards" and put the highest-order term in
|
||||||
|
* the lowest-order bit. The X^32 term is "implied"; the LSB is the
|
||||||
|
* X^31 term, etc. The X^0 term (usually shown as "+1") results in
|
||||||
|
* the MSB being 1
|
||||||
|
*
|
||||||
|
* Note that the usual hardware shift register implementation, which
|
||||||
|
* is what we're using (we're merely optimizing it by doing eight-bit
|
||||||
|
* chunks at a time) shifts bits into the lowest-order term. In our
|
||||||
|
* implementation, that means shifting towards the right. Why do we
|
||||||
|
* do it this way? Because the calculated CRC must be transmitted in
|
||||||
|
* order from highest-order term to lowest-order term. UARTs transmit
|
||||||
|
* characters in order from LSB to MSB. By storing the CRC this way
|
||||||
|
* we hand it to the UART in the order low-byte to high-byte; the UART
|
||||||
|
* sends each low-bit to hight-bit; and the result is transmission bit
|
||||||
|
* by bit from highest- to lowest-order term without requiring any bit
|
||||||
|
* shuffling on our part. Reception works similarly
|
||||||
|
*
|
||||||
|
* The feedback terms table consists of 256, 32-bit entries. Notes
|
||||||
|
*
|
||||||
|
* The table can be generated at runtime if desired; code to do so
|
||||||
|
* is shown later. It might not be obvious, but the feedback
|
||||||
|
* terms simply represent the results of eight shift/xor opera
|
||||||
|
* tions for all combinations of data and CRC register values
|
||||||
|
*
|
||||||
|
* The values must be right-shifted by eight bits by the "updcrc
|
||||||
|
* logic; the shift must be unsigned (bring in zeroes). On some
|
||||||
|
* hardware you could probably optimize the shift in assembler by
|
||||||
|
* using byte-swap instructions
|
||||||
|
* polynomial $edb88320
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* CRC32 code derived from work by Gary S. Brown.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static uint32_t crc32_tab[] = {
|
||||||
|
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
|
||||||
|
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||||
|
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
|
||||||
|
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||||
|
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
||||||
|
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||||
|
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
|
||||||
|
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||||
|
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
|
||||||
|
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||||
|
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
|
||||||
|
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||||
|
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
|
||||||
|
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||||
|
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
||||||
|
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||||
|
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
|
||||||
|
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||||
|
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
|
||||||
|
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||||
|
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
|
||||||
|
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||||
|
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
|
||||||
|
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||||
|
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
||||||
|
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||||
|
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
|
||||||
|
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||||
|
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
|
||||||
|
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||||
|
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
|
||||||
|
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||||
|
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
|
||||||
|
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||||
|
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
||||||
|
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||||
|
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
|
||||||
|
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||||
|
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
|
||||||
|
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||||
|
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
|
||||||
|
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||||
|
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Real CRC32 Function */
|
||||||
|
extern uint32_t crc32(uint32_t crc, const void *buf, size_t size)
|
||||||
|
{
|
||||||
|
const uint8_t *p;
|
||||||
|
|
||||||
|
p = buf;
|
||||||
|
crc = crc ^ ~0U;
|
||||||
|
|
||||||
|
while (size--)
|
||||||
|
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
||||||
|
|
||||||
|
return crc ^ ~0U;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CRC32 Function simplified for ccminer */
|
||||||
|
extern uint32_t crc32_u32t(const uint32_t *buf, size_t size)
|
||||||
|
{
|
||||||
|
const uint8_t *p;
|
||||||
|
uint32_t crc = 0;
|
||||||
|
|
||||||
|
p = (uint8_t *) buf;
|
||||||
|
crc = crc ^ ~0U;
|
||||||
|
|
||||||
|
while (size--)
|
||||||
|
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
||||||
|
|
||||||
|
return crc ^ ~0U;
|
||||||
|
}
|
@ -12,6 +12,8 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
|
|
||||||
extern __device__ __device_builtin__ void __syncthreads(void);
|
extern __device__ __device_builtin__ void __syncthreads(void);
|
||||||
|
|
||||||
#ifndef __CUDA_ARCH__
|
#ifndef __CUDA_ARCH__
|
||||||
|
@ -5,10 +5,12 @@ extern "C"
|
|||||||
#include "sph/sph_skein.h"
|
#include "sph/sph_skein.h"
|
||||||
#include "sph/sph_jh.h"
|
#include "sph/sph_jh.h"
|
||||||
#include "sph/sph_keccak.h"
|
#include "sph/sph_keccak.h"
|
||||||
#include "miner.h"
|
|
||||||
#include "cuda_helper.h"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
@ -74,9 +76,6 @@ extern "C" void nist5hash(void *state, const void *input)
|
|||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
@ -84,7 +83,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
((uint32_t*)ptarget)[7] = 0x00FF;
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
|
8
miner.h
8
miner.h
@ -249,6 +249,10 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
|
|||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done);
|
unsigned long *hashes_done);
|
||||||
|
|
||||||
|
extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
|
||||||
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
|
unsigned long *hashes_done);
|
||||||
|
|
||||||
extern int scanhash_whc(int thr_id, uint32_t *pdata,
|
extern int scanhash_whc(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done);
|
unsigned long *hashes_done);
|
||||||
@ -284,8 +288,8 @@ struct work_restart {
|
|||||||
char padding[128 - sizeof(unsigned long)];
|
char padding[128 - sizeof(unsigned long)];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern bool opt_benchmark;
|
||||||
extern bool opt_debug;
|
extern bool opt_debug;
|
||||||
extern bool opt_debug_rpc;
|
|
||||||
extern bool opt_quiet;
|
extern bool opt_quiet;
|
||||||
extern bool opt_protocol;
|
extern bool opt_protocol;
|
||||||
extern int opt_timeout;
|
extern int opt_timeout;
|
||||||
@ -417,6 +421,7 @@ size_t time2str(char* buf, time_t timer);
|
|||||||
char* atime2str(time_t timer);
|
char* atime2str(time_t timer);
|
||||||
|
|
||||||
void applog_hash(unsigned char *hash);
|
void applog_hash(unsigned char *hash);
|
||||||
|
void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
|
||||||
|
|
||||||
void print_hash_tests(void);
|
void print_hash_tests(void);
|
||||||
void animehash(void *state, const void *input);
|
void animehash(void *state, const void *input);
|
||||||
@ -428,6 +433,7 @@ unsigned int jackpothash(void *state, const void *input);
|
|||||||
void groestlhash(void *state, const void *input);
|
void groestlhash(void *state, const void *input);
|
||||||
void myriadhash(void *state, const void *input);
|
void myriadhash(void *state, const void *input);
|
||||||
void nist5hash(void *state, const void *input);
|
void nist5hash(void *state, const void *input);
|
||||||
|
void pentablakehash(void *output, const void *input);
|
||||||
void quarkhash(void *state, const void *input);
|
void quarkhash(void *state, const void *input);
|
||||||
void wcoinhash(void *state, const void *input);
|
void wcoinhash(void *state, const void *input);
|
||||||
void x11hash(void *output, const void *input);
|
void x11hash(void *output, const void *input);
|
||||||
|
600
pentablake.cu
Normal file
600
pentablake.cu
Normal file
@ -0,0 +1,600 @@
|
|||||||
|
/**
|
||||||
|
* Penta Blake-512 Cuda Kernel (Tested on SM 5.0)
|
||||||
|
*
|
||||||
|
* Tanguy Pruvot - Aug. 2014
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#include "sph/sph_blake.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <memory.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
/* threads per block */
|
||||||
|
#define TPB 192
|
||||||
|
|
||||||
|
/* hash by cpu with blake 256 */
|
||||||
|
extern "C" void pentablakehash(void *output, const void *input)
|
||||||
|
{
|
||||||
|
unsigned char hash[128];
|
||||||
|
#define hashB hash + 64
|
||||||
|
sph_blake512_context ctx;
|
||||||
|
|
||||||
|
sph_blake512_init(&ctx);
|
||||||
|
sph_blake512(&ctx, input, 80);
|
||||||
|
sph_blake512_close(&ctx, hash);
|
||||||
|
|
||||||
|
sph_blake512(&ctx, hash, 64);
|
||||||
|
sph_blake512_close(&ctx, hashB);
|
||||||
|
|
||||||
|
sph_blake512(&ctx, hashB, 64);
|
||||||
|
sph_blake512_close(&ctx, hash);
|
||||||
|
|
||||||
|
sph_blake512(&ctx, hash, 64);
|
||||||
|
sph_blake512_close(&ctx, hashB);
|
||||||
|
|
||||||
|
sph_blake512(&ctx, hashB, 64);
|
||||||
|
sph_blake512_close(&ctx, hash);
|
||||||
|
|
||||||
|
memcpy(output, hash, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
|
#define MAXU 0xffffffffU
|
||||||
|
|
||||||
|
// in cpu-miner.c
|
||||||
|
extern bool opt_n_threads;
|
||||||
|
extern bool opt_benchmark;
|
||||||
|
extern int device_map[8];
|
||||||
|
|
||||||
|
__constant__
|
||||||
|
static uint32_t __align__(32) c_Target[8];
|
||||||
|
|
||||||
|
__constant__
|
||||||
|
static uint64_t __align__(32) c_data[32];
|
||||||
|
|
||||||
|
static uint32_t *d_hash[8];
|
||||||
|
static uint32_t *d_resNounce[8];
|
||||||
|
static uint32_t *h_resNounce[8];
|
||||||
|
static uint32_t extra_results[2] = { MAXU, MAXU };
|
||||||
|
|
||||||
|
/* prefer uint32_t to prevent size conversions = speed +5/10 % */
|
||||||
|
__constant__
|
||||||
|
static uint32_t __align__(32) c_sigma[16][16];
|
||||||
|
const uint32_t host_sigma[16][16] = {
|
||||||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||||
|
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||||
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||||
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||||
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||||
|
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||||
|
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||||
|
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||||
|
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
|
||||||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||||
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||||
|
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||||
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||||
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||||
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ __constant__
|
||||||
|
static const uint64_t __align__(32) c_IV512[8] = {
|
||||||
|
0x6a09e667f3bcc908ULL,
|
||||||
|
0xbb67ae8584caa73bULL,
|
||||||
|
0x3c6ef372fe94f82bULL,
|
||||||
|
0xa54ff53a5f1d36f1ULL,
|
||||||
|
0x510e527fade682d1ULL,
|
||||||
|
0x9b05688c2b3e6c1fULL,
|
||||||
|
0x1f83d9abfb41bd6bULL,
|
||||||
|
0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ __constant__
|
||||||
|
const uint64_t c_u512[16] =
|
||||||
|
{
|
||||||
|
0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL,
|
||||||
|
0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL,
|
||||||
|
0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL,
|
||||||
|
0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL,
|
||||||
|
0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL,
|
||||||
|
0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL,
|
||||||
|
0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL,
|
||||||
|
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
#define G(a,b,c,d,x) { \
|
||||||
|
uint32_t idx1 = c_sigma[i][x]; \
|
||||||
|
uint32_t idx2 = c_sigma[i][x+1]; \
|
||||||
|
v[a] += (m[idx1] ^ c_u512[idx2]) + v[b]; \
|
||||||
|
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||||
|
v[c] += v[d]; \
|
||||||
|
v[b] = ROTR64(v[b] ^ v[c], 25); \
|
||||||
|
v[a] += (m[idx2] ^ c_u512[idx1]) + v[b]; \
|
||||||
|
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||||
|
v[c] += v[d]; \
|
||||||
|
v[b] = ROTR64(v[b] ^ v[c], 11); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hash-Padding
|
||||||
|
__device__ __constant__
|
||||||
|
static const uint64_t d_constHashPadding[8] = {
|
||||||
|
0x0000000000000080ull,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0x0100000000000000ull,
|
||||||
|
0,
|
||||||
|
0x0002000000000000ull
|
||||||
|
};
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
__device__ __constant__
|
||||||
|
static const uint64_t __align__(32) c_Padding[16] = {
|
||||||
|
0, 0, 0, 0,
|
||||||
|
0x80000000ULL, 0, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
0, 1, 0, 640,
|
||||||
|
};
|
||||||
|
|
||||||
|
__device__ static
|
||||||
|
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint32_t T0)
|
||||||
|
{
|
||||||
|
uint64_t v[16], m[16];
|
||||||
|
|
||||||
|
m[0] = block[0];
|
||||||
|
m[1] = block[1];
|
||||||
|
m[2] = block[2];
|
||||||
|
m[3] = block[3];
|
||||||
|
|
||||||
|
for (uint32_t i = 4; i < 16; i++) {
|
||||||
|
m[i] = (T0 == 0x200) ? block[i] : c_Padding[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
//#pragma unroll 8
|
||||||
|
for(uint32_t i = 0; i < 8; i++)
|
||||||
|
v[i] = h[i];
|
||||||
|
|
||||||
|
v[ 8] = c_u512[0];
|
||||||
|
v[ 9] = c_u512[1];
|
||||||
|
v[10] = c_u512[2];
|
||||||
|
v[11] = c_u512[3];
|
||||||
|
|
||||||
|
v[12] = xor1(c_u512[4], T0);
|
||||||
|
v[13] = xor1(c_u512[5], T0);
|
||||||
|
v[14] = c_u512[6];
|
||||||
|
v[15] = c_u512[7];
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 16; i++) {
|
||||||
|
/* column step */
|
||||||
|
G(0, 4, 0x8, 0xC, 0x0);
|
||||||
|
G(1, 5, 0x9, 0xD, 0x2);
|
||||||
|
G(2, 6, 0xA, 0xE, 0x4);
|
||||||
|
G(3, 7, 0xB, 0xF, 0x6);
|
||||||
|
/* diagonal step */
|
||||||
|
G(0, 5, 0xA, 0xF, 0x8);
|
||||||
|
G(1, 6, 0xB, 0xC, 0xA);
|
||||||
|
G(2, 7, 0x8, 0xD, 0xC);
|
||||||
|
G(3, 4, 0x9, 0xE, 0xE);
|
||||||
|
}
|
||||||
|
|
||||||
|
//#pragma unroll 16
|
||||||
|
for (uint32_t i = 0; i < 16; i++) {
|
||||||
|
uint32_t j = i % 8;
|
||||||
|
h[j] ^= v[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void pentablake_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
|
||||||
|
{
|
||||||
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
if (thread < threads)
|
||||||
|
{
|
||||||
|
const uint32_t nounce = startNounce + thread;
|
||||||
|
uint64_t h[8];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for(int i=0; i<8; i++) {
|
||||||
|
h[i] = c_IV512[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t ending[4];
|
||||||
|
ending[0] = c_data[16];
|
||||||
|
ending[1] = c_data[17];
|
||||||
|
ending[2] = c_data[18];
|
||||||
|
ending[3] = nounce; /* our tested value */
|
||||||
|
|
||||||
|
pentablake_compress(h, ending, 640);
|
||||||
|
|
||||||
|
// -----------------------------------
|
||||||
|
|
||||||
|
for (int r = 0; r < 4; r++) {
|
||||||
|
uint64_t data[8];
|
||||||
|
for (int i = 0; i < 7; i++) {
|
||||||
|
data[i] = h[i];
|
||||||
|
}
|
||||||
|
pentablake_compress(h, data, 512); /* todo: use h,h when ok*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__device__ static
|
||||||
|
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint64_t T0)
|
||||||
|
{
|
||||||
|
uint64_t v[16], m[16], i;
|
||||||
|
|
||||||
|
#pragma unroll 16
|
||||||
|
for(i = 0; i < 16; i++) {
|
||||||
|
m[i] = cuda_swab64(block[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma unroll 8
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
v[i] = h[i];
|
||||||
|
|
||||||
|
v[ 8] = c_u512[0];
|
||||||
|
v[ 9] = c_u512[1];
|
||||||
|
v[10] = c_u512[2];
|
||||||
|
v[11] = c_u512[3];
|
||||||
|
v[12] = c_u512[4] ^ T0;
|
||||||
|
v[13] = c_u512[5] ^ T0;
|
||||||
|
v[14] = c_u512[6];
|
||||||
|
v[15] = c_u512[7];
|
||||||
|
|
||||||
|
//#pragma unroll 16
|
||||||
|
for( i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
/* column step */
|
||||||
|
G(0, 4, 0x8, 0xC, 0x0);
|
||||||
|
G(1, 5, 0x9, 0xD, 0x2);
|
||||||
|
G(2, 6, 0xA, 0xE, 0x4);
|
||||||
|
G(3, 7, 0xB, 0xF, 0x6);
|
||||||
|
/* diagonal step */
|
||||||
|
G(0, 5, 0xA, 0xF, 0x8);
|
||||||
|
G(1, 6, 0xB, 0xC, 0xA);
|
||||||
|
G(2, 7, 0x8, 0xD, 0xC);
|
||||||
|
G(3, 4, 0x9, 0xE, 0xE);
|
||||||
|
}
|
||||||
|
|
||||||
|
//#pragma unroll 16
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
uint32_t idx = i % 8;
|
||||||
|
h[idx] ^= v[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void pentablake_gpu_hash_80(int threads, const uint32_t startNounce, void *outputHash)
|
||||||
|
{
|
||||||
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
if (thread < threads)
|
||||||
|
{
|
||||||
|
uint64_t h[8];
|
||||||
|
uint64_t buf[16];
|
||||||
|
uint32_t nounce = startNounce + thread;
|
||||||
|
|
||||||
|
//#pragma unroll 8
|
||||||
|
for(int i=0; i<8; i++)
|
||||||
|
h[i] = c_IV512[i];
|
||||||
|
|
||||||
|
//#pragma unroll 16
|
||||||
|
for (int i=0; i < 16; i++)
|
||||||
|
buf[i] = c_data[i];
|
||||||
|
|
||||||
|
// The test Nonce
|
||||||
|
((uint32_t*)buf)[19] = cuda_swab32(nounce);
|
||||||
|
|
||||||
|
pentablake_compress(h, buf, 640ULL);
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ < 300
|
||||||
|
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||||
|
#pragma unroll 8
|
||||||
|
for (uint32_t i=0; i < 8; i++) {
|
||||||
|
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
|
||||||
|
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
|
||||||
|
for (uint32_t i=0; i < 8; i++) {
|
||||||
|
outHash[i] = cuda_swab64( h[i] );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||||
|
{
|
||||||
|
const int threadsperblock = TPB;
|
||||||
|
|
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
size_t shared_size = 0;
|
||||||
|
|
||||||
|
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
||||||
|
|
||||||
|
//MyStreamSynchronize(NULL, order, thr_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void pentablake_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash)
|
||||||
|
{
|
||||||
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
|
||||||
|
if (thread < threads)
|
||||||
|
{
|
||||||
|
uint64_t *inpHash = &g_hash[thread<<3]; // hashPosition * 8
|
||||||
|
uint64_t buf[16]; // 128 Bytes
|
||||||
|
uint64_t h[8]; // State
|
||||||
|
|
||||||
|
#pragma unroll 8
|
||||||
|
for (int i=0; i<8; i++)
|
||||||
|
h[i] = c_IV512[i];
|
||||||
|
|
||||||
|
// Message for first round
|
||||||
|
#pragma unroll 8
|
||||||
|
for (int i=0; i < 8; ++i)
|
||||||
|
buf[i] = inpHash[i];
|
||||||
|
|
||||||
|
#pragma unroll 8
|
||||||
|
for (int i=0; i < 8; i++)
|
||||||
|
buf[i+8] = d_constHashPadding[i];
|
||||||
|
|
||||||
|
// Ending round
|
||||||
|
pentablake_compress(h, buf, 512);
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ < 300
|
||||||
|
uint32_t *outHash = (uint32_t*)&g_hash[thread<<3];
|
||||||
|
#pragma unroll 8
|
||||||
|
for (int i=0; i < 8; i++) {
|
||||||
|
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
||||||
|
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
uint64_t *outHash = &g_hash[thread<<3];
|
||||||
|
for (int i=0; i < 8; i++) {
|
||||||
|
outHash[i] = cuda_swab64(h[i]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||||
|
{
|
||||||
|
const int threadsperblock = TPB;
|
||||||
|
|
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
size_t shared_size = 0;
|
||||||
|
|
||||||
|
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
|
||||||
|
|
||||||
|
//MyStreamSynchronize(NULL, order, thr_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
__host__
|
||||||
|
uint32_t pentablake_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce)
|
||||||
|
{
|
||||||
|
const int threadsperblock = TPB;
|
||||||
|
uint32_t result = MAXU;
|
||||||
|
|
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
size_t shared_size = 0;
|
||||||
|
|
||||||
|
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
|
||||||
|
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
pentablake_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
|
cudaThreadSynchronize();
|
||||||
|
result = h_resNounce[thr_id][0];
|
||||||
|
extra_results[0] = h_resNounce[thr_id][1];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void pentablake_gpu_check_hash(uint32_t threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *resNounce)
|
||||||
|
{
|
||||||
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
|
if (thread < threads)
|
||||||
|
{
|
||||||
|
uint32_t nounce = startNounce + thread;
|
||||||
|
uint32_t *inpHash = &g_hash[thread<<4];
|
||||||
|
uint32_t h[8];
|
||||||
|
|
||||||
|
#pragma unroll 8
|
||||||
|
for (int i=0; i < 8; i++)
|
||||||
|
h[i] = inpHash[i];
|
||||||
|
|
||||||
|
for (int i = 7; i >= 0; i--) {
|
||||||
|
uint32_t hash = h[i]; // cuda_swab32(h[i]);
|
||||||
|
if (hash > c_Target[i]) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (hash < c_Target[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* keep the smallest nounce, + extra one if found */
|
||||||
|
if (resNounce[0] > nounce) {
|
||||||
|
resNounce[1] = resNounce[0];
|
||||||
|
resNounce[0] = nounce;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
resNounce[1] = nounce;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ static
|
||||||
|
uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, int order)
|
||||||
|
{
|
||||||
|
const int threadsperblock = TPB;
|
||||||
|
uint32_t result = MAXU;
|
||||||
|
|
||||||
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
dim3 block(threadsperblock);
|
||||||
|
size_t shared_size = 0;
|
||||||
|
|
||||||
|
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
|
||||||
|
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
||||||
|
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
|
cudaThreadSynchronize();
|
||||||
|
result = h_resNounce[thr_id][0];
|
||||||
|
extra_results[0] = h_resNounce[thr_id][1];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
|
||||||
|
{
|
||||||
|
uint8_t data[128];
|
||||||
|
memcpy((void*) data, (void*) pdata, 80);
|
||||||
|
memset(data+80, 0, 48);
|
||||||
|
|
||||||
|
// to swab...
|
||||||
|
data[80] = 0x80;
|
||||||
|
data[111] = 1;
|
||||||
|
data[126] = 0x02;
|
||||||
|
data[127] = 0x80;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice));
|
||||||
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_sigma, host_sigma, sizeof(host_sigma), 0, cudaMemcpyHostToDevice));
|
||||||
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||||
|
uint32_t max_nonce, unsigned long *hashes_done)
|
||||||
|
{
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
uint32_t throughput = min(128 * 2560, max_nonce - first_nonce);
|
||||||
|
uint32_t endiandata[20];
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
if (extra_results[0] != MAXU) {
|
||||||
|
// possible extra result found in previous call
|
||||||
|
if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) {
|
||||||
|
pdata[19] = extra_results[0];
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
extra_results[0] = MAXU;
|
||||||
|
rc = 1;
|
||||||
|
goto exit_scan;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_benchmark)
|
||||||
|
((uint32_t*)ptarget)[7] = 0x000F;
|
||||||
|
|
||||||
|
if (!init[thr_id]) {
|
||||||
|
if (opt_n_threads > 1) {
|
||||||
|
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||||
|
}
|
||||||
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
|
||||||
|
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||||
|
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||||
|
|
||||||
|
init[thr_id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k=0; k < 20; k++)
|
||||||
|
be32enc(&endiandata[k], pdata[k]);
|
||||||
|
|
||||||
|
pentablake_cpu_setBlock_80(endiandata, ptarget);
|
||||||
|
|
||||||
|
do {
|
||||||
|
int order = 0;
|
||||||
|
|
||||||
|
// GPU HASH
|
||||||
|
pentablake_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
uint32_t foundNonce = pentablake_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
if (foundNonce != MAXU)
|
||||||
|
{
|
||||||
|
uint32_t vhashcpu[8];
|
||||||
|
uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
|
be32enc(&endiandata[19], foundNonce);
|
||||||
|
|
||||||
|
pentablakehash(vhashcpu, endiandata);
|
||||||
|
|
||||||
|
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
|
||||||
|
{
|
||||||
|
pdata[19] = foundNonce;
|
||||||
|
rc = 1;
|
||||||
|
|
||||||
|
// Rare but possible if the throughput is big
|
||||||
|
be32enc(&endiandata[19], extra_results[0]);
|
||||||
|
pentablakehash(vhashcpu, endiandata);
|
||||||
|
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
|
||||||
|
applog(LOG_NOTICE, "GPU found more than one result yippee!");
|
||||||
|
rc = 2;
|
||||||
|
} else {
|
||||||
|
extra_results[0] = MAXU;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto exit_scan;
|
||||||
|
}
|
||||||
|
else if (vhashcpu[7] > Htarg) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: result for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[7], Htarg);
|
||||||
|
}
|
||||||
|
else if (vhashcpu[6] > ptarget[6]) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: hash[6] for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[6], ptarget[6]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pdata[19] += throughput;
|
||||||
|
|
||||||
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
exit_scan:
|
||||||
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
#if 0
|
||||||
|
/* reset the device to allow multiple instances
|
||||||
|
* could be made in cpu-miner... check later if required */
|
||||||
|
if (opt_n_threads == 1) {
|
||||||
|
CUDA_SAFE_CALL(cudaDeviceReset());
|
||||||
|
init[thr_id] = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return rc;
|
||||||
|
}
|
@ -50,59 +50,60 @@ const uint64_t c_u512[16] =
|
|||||||
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||||
};
|
};
|
||||||
|
|
||||||
#define G(a,b,c,d,e) \
|
#define G(a,b,c,d,x) { \
|
||||||
v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\
|
uint32_t idx1 = sigma[i][x]; \
|
||||||
v[d] = ROTR( v[d] ^ v[a],32); \
|
uint32_t idx2 = sigma[i][x+1]; \
|
||||||
v[c] += v[d]; \
|
v[a] += (m[idx1] ^ u512[idx2]) + v[b]; \
|
||||||
v[b] = ROTR( v[b] ^ v[c],25); \
|
v[d] = ROTR( v[d] ^ v[a], 32); \
|
||||||
v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \
|
v[c] += v[d]; \
|
||||||
v[d] = ROTR( v[d] ^ v[a],16); \
|
v[b] = ROTR( v[b] ^ v[c], 25); \
|
||||||
v[c] += v[d]; \
|
v[a] += (m[idx2] ^ u512[idx1]) + v[b]; \
|
||||||
v[b] = ROTR( v[b] ^ v[c],11);
|
v[d] = ROTR( v[d] ^ v[a], 16); \
|
||||||
|
v[c] += v[d]; \
|
||||||
|
v[b] = ROTR( v[b] ^ v[c], 11); \
|
||||||
|
}
|
||||||
|
|
||||||
__device__ static
|
__device__ static
|
||||||
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int bits )
|
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
|
||||||
{
|
{
|
||||||
uint64_t v[16], m[16], i;
|
uint64_t v[16], m[16], i;
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for( i = 0; i < 16; ++i ) {
|
for( i = 0; i < 16; i++) {
|
||||||
m[i] = cuda_swab64(block[i]);
|
m[i] = cuda_swab64(block[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for( i = 0; i < 8; ++i ) v[i] = h[i];
|
for (i = 0; i < 8; i++)
|
||||||
|
v[i] = h[i];
|
||||||
|
|
||||||
v[ 8] = u512[0];
|
v[ 8] = u512[0];
|
||||||
v[ 9] = u512[1];
|
v[ 9] = u512[1];
|
||||||
v[10] = u512[2];
|
v[10] = u512[2];
|
||||||
v[11] = u512[3];
|
v[11] = u512[3];
|
||||||
v[12] = u512[4];
|
v[12] = u512[4] ^ T0;
|
||||||
v[13] = u512[5];
|
v[13] = u512[5] ^ T0;
|
||||||
v[14] = u512[6];
|
v[14] = u512[6];
|
||||||
v[15] = u512[7];
|
v[15] = u512[7];
|
||||||
|
|
||||||
v[12] ^= bits;
|
//#pragma unroll 16
|
||||||
v[13] ^= bits;
|
for( i = 0; i < 16; ++i )
|
||||||
|
{
|
||||||
|
/* column step */
|
||||||
|
G( 0, 4, 8, 12, 0 );
|
||||||
|
G( 1, 5, 9, 13, 2 );
|
||||||
|
G( 2, 6, 10, 14, 4 );
|
||||||
|
G( 3, 7, 11, 15, 6 );
|
||||||
|
/* diagonal step */
|
||||||
|
G( 0, 5, 10, 15, 8 );
|
||||||
|
G( 1, 6, 11, 12, 10 );
|
||||||
|
G( 2, 7, 8, 13, 12 );
|
||||||
|
G( 3, 4, 9, 14, 14 );
|
||||||
|
}
|
||||||
|
|
||||||
//#pragma unroll 16
|
#pragma unroll 16
|
||||||
for( i = 0; i < 16; ++i )
|
for( i = 0; i < 16; ++i )
|
||||||
{
|
h[i % 8] ^= v[i];
|
||||||
/* column step */
|
|
||||||
G( 0, 4, 8, 12, 0 );
|
|
||||||
G( 1, 5, 9, 13, 2 );
|
|
||||||
G( 2, 6, 10, 14, 4 );
|
|
||||||
G( 3, 7, 11, 15, 6 );
|
|
||||||
/* diagonal step */
|
|
||||||
G( 0, 5, 10, 15, 8 );
|
|
||||||
G( 1, 6, 11, 12, 10 );
|
|
||||||
G( 2, 7, 8, 13, 12 );
|
|
||||||
G( 3, 4, 9, 14, 14 );
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma unroll 16
|
|
||||||
for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __constant__
|
__device__ __constant__
|
||||||
@ -114,7 +115,8 @@ static const uint64_t d_constMem[8] = {
|
|||||||
0x510e527fade682d1ULL,
|
0x510e527fade682d1ULL,
|
||||||
0x9b05688c2b3e6c1fULL,
|
0x9b05688c2b3e6c1fULL,
|
||||||
0x1f83d9abfb41bd6bULL,
|
0x1f83d9abfb41bd6bULL,
|
||||||
0x5be0cd19137e2179ULL };
|
0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
// Hash-Padding
|
// Hash-Padding
|
||||||
__device__ __constant__
|
__device__ __constant__
|
||||||
@ -126,7 +128,8 @@ static const uint64_t d_constHashPadding[8] = {
|
|||||||
0,
|
0,
|
||||||
0x0100000000000000ull,
|
0x0100000000000000ull,
|
||||||
0,
|
0,
|
||||||
0x0002000000000000ull };
|
0x0002000000000000ull
|
||||||
|
};
|
||||||
|
|
||||||
__global__ __launch_bounds__(256, 4)
|
__global__ __launch_bounds__(256, 4)
|
||||||
void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
|
void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
|
||||||
@ -145,48 +148,42 @@ void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_n
|
|||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
uint8_t i;
|
|
||||||
// bestimme den aktuellen Zähler
|
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
|
|
||||||
int hashPosition = nounce - startNounce;
|
int hashPosition = nounce - startNounce;
|
||||||
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
|
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
|
||||||
|
|
||||||
// 128 Byte für die Message
|
// 128 Bytes
|
||||||
uint64_t buf[16];
|
uint64_t buf[16];
|
||||||
|
|
||||||
// State vorbereiten
|
// State
|
||||||
uint64_t h[8];
|
uint64_t h[8];
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i=0;i<8;i++)
|
for (int i=0;i<8;i++)
|
||||||
h[i] = d_constMem[i];
|
h[i] = d_constMem[i];
|
||||||
|
|
||||||
// Message für die erste Runde in Register holen
|
// Message for first round
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i=0; i < 8; ++i)
|
for (int i=0; i < 8; ++i)
|
||||||
buf[i] = inpHash[i];
|
buf[i] = inpHash[i];
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i=0; i < 8; i++)
|
for (int i=0; i < 8; i++)
|
||||||
buf[i+8] = d_constHashPadding[i];
|
buf[i+8] = d_constHashPadding[i];
|
||||||
|
|
||||||
// die einzige Hashing-Runde
|
// Ending round
|
||||||
quark_blake512_compress( h, buf, c_sigma, c_u512, 512 );
|
quark_blake512_compress( h, buf, c_sigma, c_u512, 512 );
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ <= 350
|
||||||
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
|
|
||||||
uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition];
|
uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition];
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i=0; i < 8; ++i) {
|
for (int i=0; i < 8; i++) {
|
||||||
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
||||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// in dieser Version passieren auch ein paar 64 Bit Shifts
|
|
||||||
uint64_t *outHash = &g_hash[8 * hashPosition];
|
uint64_t *outHash = &g_hash[8 * hashPosition];
|
||||||
#pragma unroll 8
|
for (int i=0; i < 8; i++) {
|
||||||
for (i=0; i < 8; ++i)
|
|
||||||
{
|
|
||||||
outHash[i] = cuda_swab64(h[i]);
|
outHash[i] = cuda_swab64(h[i]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -198,45 +195,38 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo
|
|||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
// State vorbereiten
|
|
||||||
uint64_t h[8];
|
uint64_t h[8];
|
||||||
// 128 Byte für die Message
|
|
||||||
uint64_t buf[16];
|
uint64_t buf[16];
|
||||||
uint8_t i;
|
|
||||||
// bestimme den aktuellen Zähler
|
|
||||||
uint32_t nounce = startNounce + thread;
|
uint32_t nounce = startNounce + thread;
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for(i=0;i<8;i++)
|
for(int i=0; i<8; i++)
|
||||||
h[i] = d_constMem[i];
|
h[i] = d_constMem[i];
|
||||||
|
|
||||||
// Message für die erste Runde in Register holen
|
// Message für die erste Runde in Register holen
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for (i=0; i < 16; ++i) buf[i] = c_PaddedMessage80[i];
|
for (int i=0; i < 16; ++i)
|
||||||
|
buf[i] = c_PaddedMessage80[i];
|
||||||
|
|
||||||
// die Nounce durch die thread-spezifische ersetzen
|
// The test Nonce
|
||||||
buf[9] = REPLACE_HIWORD(buf[9], cuda_swab32(nounce));
|
((uint32_t*)buf)[19] = cuda_swab32(nounce);
|
||||||
|
|
||||||
// die einzige Hashing-Runde
|
|
||||||
quark_blake512_compress( h, buf, c_sigma, c_u512, 640 );
|
quark_blake512_compress( h, buf, c_sigma, c_u512, 640 );
|
||||||
|
|
||||||
// Hash rauslassen
|
#if __CUDA_ARCH__ <= 350
|
||||||
#if __CUDA_ARCH__ >= 130
|
|
||||||
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
|
|
||||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i=0; i < 8; ++i) {
|
for (uint32_t i=0; i < 8; i++) {
|
||||||
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
|
||||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// in dieser Version passieren auch ein paar 64 Bit Shifts
|
|
||||||
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
|
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
|
||||||
#pragma unroll 8
|
for (uint32_t i=0; i < 8; i++) {
|
||||||
for (i=0; i < 8; ++i) {
|
|
||||||
outHash[i] = cuda_swab64( h[i] );
|
outHash[i] = cuda_swab64( h[i] );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,12 +6,12 @@ extern "C"
|
|||||||
#include "sph/sph_skein.h"
|
#include "sph/sph_skein.h"
|
||||||
#include "sph/sph_jh.h"
|
#include "sph/sph_jh.h"
|
||||||
#include "sph/sph_keccak.h"
|
#include "sph/sph_keccak.h"
|
||||||
|
}
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
}
|
|
||||||
|
|
||||||
// aus cpu-miner.c
|
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
@ -70,76 +70,64 @@ extern "C" void quarkhash(void *state, const void *input)
|
|||||||
unsigned char hash[64];
|
unsigned char hash[64];
|
||||||
|
|
||||||
sph_blake512_init(&ctx_blake);
|
sph_blake512_init(&ctx_blake);
|
||||||
// ZBLAKE;
|
|
||||||
sph_blake512 (&ctx_blake, input, 80);
|
sph_blake512 (&ctx_blake, input, 80);
|
||||||
sph_blake512_close(&ctx_blake, (void*) hash);
|
sph_blake512_close(&ctx_blake, (void*) hash);
|
||||||
|
|
||||||
sph_bmw512_init(&ctx_bmw);
|
sph_bmw512_init(&ctx_bmw);
|
||||||
// ZBMW;
|
|
||||||
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
||||||
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
||||||
|
|
||||||
if (hash[0] & 0x8)
|
if (hash[0] & 0x8)
|
||||||
{
|
{
|
||||||
sph_groestl512_init(&ctx_groestl);
|
sph_groestl512_init(&ctx_groestl);
|
||||||
// ZGROESTL;
|
|
||||||
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
||||||
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_skein512_init(&ctx_skein);
|
sph_skein512_init(&ctx_skein);
|
||||||
// ZSKEIN;
|
|
||||||
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
||||||
sph_skein512_close(&ctx_skein, (void*) hash);
|
sph_skein512_close(&ctx_skein, (void*) hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_groestl512_init(&ctx_groestl);
|
sph_groestl512_init(&ctx_groestl);
|
||||||
// ZGROESTL;
|
|
||||||
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
||||||
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
||||||
|
|
||||||
sph_jh512_init(&ctx_jh);
|
sph_jh512_init(&ctx_jh);
|
||||||
// ZJH;
|
|
||||||
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
||||||
sph_jh512_close(&ctx_jh, (void*) hash);
|
sph_jh512_close(&ctx_jh, (void*) hash);
|
||||||
|
|
||||||
if (hash[0] & 0x8)
|
if (hash[0] & 0x8)
|
||||||
{
|
{
|
||||||
sph_blake512_init(&ctx_blake);
|
sph_blake512_init(&ctx_blake);
|
||||||
// ZBLAKE;
|
|
||||||
sph_blake512 (&ctx_blake, (const void*) hash, 64);
|
sph_blake512 (&ctx_blake, (const void*) hash, 64);
|
||||||
sph_blake512_close(&ctx_blake, (void*) hash);
|
sph_blake512_close(&ctx_blake, (void*) hash);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_bmw512_init(&ctx_bmw);
|
sph_bmw512_init(&ctx_bmw);
|
||||||
// ZBMW;
|
|
||||||
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
||||||
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_keccak512_init(&ctx_keccak);
|
sph_keccak512_init(&ctx_keccak);
|
||||||
// ZKECCAK;
|
|
||||||
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
||||||
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
||||||
|
|
||||||
sph_skein512_init(&ctx_skein);
|
sph_skein512_init(&ctx_skein);
|
||||||
// SKEIN;
|
|
||||||
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
||||||
sph_skein512_close(&ctx_skein, (void*) hash);
|
sph_skein512_close(&ctx_skein, (void*) hash);
|
||||||
|
|
||||||
if (hash[0] & 0x8)
|
if (hash[0] & 0x8)
|
||||||
{
|
{
|
||||||
sph_keccak512_init(&ctx_keccak);
|
sph_keccak512_init(&ctx_keccak);
|
||||||
// ZKECCAK;
|
|
||||||
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
||||||
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_jh512_init(&ctx_jh);
|
sph_jh512_init(&ctx_jh);
|
||||||
// ZJH;
|
|
||||||
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
||||||
sph_jh512_close(&ctx_jh, (void*) hash);
|
sph_jh512_close(&ctx_jh, (void*) hash);
|
||||||
}
|
}
|
||||||
@ -147,23 +135,17 @@ extern "C" void quarkhash(void *state, const void *input)
|
|||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const int throughput = 256*4096; // 100;
|
||||||
|
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
((uint32_t*)ptarget)[7] = 0x00FF;
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
const int throughput = 256*4096; // 100;
|
|
||||||
|
|
||||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
@ -252,11 +234,12 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
|
|||||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
if (foundNonce != 0xffffffff)
|
if (foundNonce != 0xffffffff)
|
||||||
{
|
{
|
||||||
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
quarkhash(vhash64, endiandata);
|
quarkhash(vhash64, endiandata);
|
||||||
|
|
||||||
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||||
|
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
*hashes_done = (foundNonce - first_nonce + 1)/2;
|
*hashes_done = (foundNonce - first_nonce + 1)/2;
|
||||||
|
29
util.c
29
util.c
@ -1020,7 +1020,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
|||||||
int merkle_count, i;
|
int merkle_count, i;
|
||||||
json_t *merkle_arr;
|
json_t *merkle_arr;
|
||||||
unsigned char **merkle;
|
unsigned char **merkle;
|
||||||
int ntime;
|
int ntime, hoffset;
|
||||||
|
|
||||||
job_id = json_string_value(json_array_get(params, 0));
|
job_id = json_string_value(json_array_get(params, 0));
|
||||||
prevhash = json_string_value(json_array_get(params, 1));
|
prevhash = json_string_value(json_array_get(params, 1));
|
||||||
@ -1078,7 +1078,8 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
|||||||
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
|
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
|
||||||
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
|
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
|
||||||
|
|
||||||
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + 43);
|
hoffset = coinb1_size - 15; // 43;
|
||||||
|
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + hoffset);
|
||||||
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
|
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
|
||||||
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
|
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
|
||||||
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
|
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
|
||||||
@ -1125,7 +1126,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
|
|||||||
sctx->next_diff = diff;
|
sctx->next_diff = diff;
|
||||||
pthread_mutex_unlock(&sctx->work_lock);
|
pthread_mutex_unlock(&sctx->work_lock);
|
||||||
|
|
||||||
applog(LOG_INFO, "Stratum difficulty set to %g", diff);
|
applog(LOG_WARNING, "Stratum difficulty set to %g", diff);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1221,10 +1222,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
|
|||||||
id = json_object_get(val, "id");
|
id = json_object_get(val, "id");
|
||||||
params = json_object_get(val, "params");
|
params = json_object_get(val, "params");
|
||||||
|
|
||||||
if (opt_debug_rpc) {
|
|
||||||
applog(LOG_DEBUG, "method: %s", s);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcasecmp(method, "mining.notify")) {
|
if (!strcasecmp(method, "mining.notify")) {
|
||||||
ret = stratum_notify(sctx, params);
|
ret = stratum_notify(sctx, params);
|
||||||
goto out;
|
goto out;
|
||||||
@ -1400,6 +1397,20 @@ static char* format_hash(char* buf, unsigned char *hash)
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* to debug diff in data */
|
||||||
|
extern void applog_compare_hash(unsigned char *hash, unsigned char *hash2)
|
||||||
|
{
|
||||||
|
char s[256] = "";
|
||||||
|
int len = 0;
|
||||||
|
for (int i=0; i < 32; i += 4) {
|
||||||
|
char *color = memcmp(hash+i, hash2+i, 4) ? CL_RED : CL_GRY;
|
||||||
|
len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color,
|
||||||
|
hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||||
|
s[len] = '\0';
|
||||||
|
}
|
||||||
|
applog(LOG_DEBUG, "%s", s);
|
||||||
|
}
|
||||||
|
|
||||||
extern void applog_hash(unsigned char *hash)
|
extern void applog_hash(unsigned char *hash)
|
||||||
{
|
{
|
||||||
char s[128] = {'\0'};
|
char s[128] = {'\0'};
|
||||||
@ -1457,6 +1468,10 @@ void print_hash_tests(void)
|
|||||||
nist5hash(&hash[0], &buf[0]);
|
nist5hash(&hash[0], &buf[0]);
|
||||||
printpfx("nist5", hash);
|
printpfx("nist5", hash);
|
||||||
|
|
||||||
|
memset(hash, 0, sizeof hash);
|
||||||
|
pentablakehash(&hash[0], &buf[0]);
|
||||||
|
printpfx("pentablake", hash);
|
||||||
|
|
||||||
memset(hash, 0, sizeof hash);
|
memset(hash, 0, sizeof hash);
|
||||||
quarkhash(&hash[0], &buf[0]);
|
quarkhash(&hash[0], &buf[0]);
|
||||||
printpfx("quark", hash);
|
printpfx("quark", hash);
|
||||||
|
18
x11/x11.cu
18
x11/x11.cu
@ -21,10 +21,9 @@ extern "C"
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
}
|
}
|
||||||
|
|
||||||
// aus cpu-miner.c
|
// in cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
@ -140,22 +139,17 @@ extern "C" void x11hash(void *output, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const int throughput = 256*256*8;
|
||||||
|
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
const int throughput = 256*256*8;
|
|
||||||
|
|
||||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||||
@ -186,8 +180,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
|||||||
cuda_check_cpu_setTarget(ptarget);
|
cuda_check_cpu_setTarget(ptarget);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint32_t foundNonce;
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
int order = 0;
|
int order = 0;
|
||||||
|
uint32_t foundNonce;
|
||||||
|
|
||||||
// Hash with CUDA
|
// Hash with CUDA
|
||||||
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
@ -204,7 +200,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
// Scan nach Gewinner Hashes auf der GPU
|
// Scan nach Gewinner Hashes auf der GPU
|
||||||
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
if (foundNonce != 0xffffffff)
|
if (foundNonce != 0xffffffff)
|
||||||
{
|
{
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
|
@ -20,11 +20,11 @@ extern "C" {
|
|||||||
#include "sph/sph_hamsi.h"
|
#include "sph/sph_hamsi.h"
|
||||||
#include "sph/sph_fugue.h"
|
#include "sph/sph_fugue.h"
|
||||||
#include "sph/sph_shabal.h"
|
#include "sph/sph_shabal.h"
|
||||||
|
}
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
}
|
|
||||||
|
|
||||||
// from cpu-miner.c
|
// from cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
@ -167,8 +167,6 @@ extern "C" void x14hash(void *output, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
|
31
x15/x15.cu
31
x15/x15.cu
@ -21,14 +21,11 @@ extern "C" {
|
|||||||
#include "sph/sph_fugue.h"
|
#include "sph/sph_fugue.h"
|
||||||
#include "sph/sph_shabal.h"
|
#include "sph/sph_shabal.h"
|
||||||
#include "sph/sph_whirlpool.h"
|
#include "sph/sph_whirlpool.h"
|
||||||
|
}
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
}
|
|
||||||
|
|
||||||
// to test gpu hash on a null buffer
|
|
||||||
#define NULLTEST 0
|
|
||||||
|
|
||||||
// from cpu-miner.c
|
// from cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
@ -92,8 +89,6 @@ extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
|||||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||||
|
|
||||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
|
||||||
|
|
||||||
// X15 CPU Hash function
|
// X15 CPU Hash function
|
||||||
extern "C" void x15hash(void *output, const void *input)
|
extern "C" void x15hash(void *output, const void *input)
|
||||||
{
|
{
|
||||||
@ -181,17 +176,6 @@ extern "C" void x15hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if NULLTEST
|
|
||||||
static void print_hash(unsigned char *hash)
|
|
||||||
{
|
|
||||||
for (int i=0; i < 32; i += 4) {
|
|
||||||
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
@ -203,12 +187,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
|||||||
uint32_t Htarg = ptarget[7];
|
uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
|
||||||
|
|
||||||
#if NULLTEST
|
|
||||||
for (int k=0; k < 20; k++)
|
|
||||||
pdata[k] = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
@ -259,12 +238,6 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
|||||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
#if NULLTEST
|
|
||||||
uint32_t buf[8]; memset(buf, 0, sizeof buf);
|
|
||||||
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
|
|
||||||
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
|
||||||
print_hash((unsigned char*)buf); printf("\n");
|
|
||||||
#endif
|
|
||||||
/* Scan with GPU */
|
/* Scan with GPU */
|
||||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
18
x17/x17.cu
18
x17/x17.cu
@ -26,17 +26,15 @@ extern "C"
|
|||||||
|
|
||||||
#include "sph/sph_sha2.h"
|
#include "sph/sph_sha2.h"
|
||||||
#include "sph/sph_haval.h"
|
#include "sph/sph_haval.h"
|
||||||
|
}
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
}
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
|
// in cpu-miner.c
|
||||||
// cpu-miner.c
|
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
@ -204,20 +202,12 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
|
|||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
|
||||||
if (opt_benchmark)
|
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
|
||||||
|
|
||||||
const int throughput = 256*256*8;
|
const int throughput = 256*256*8;
|
||||||
|
|
||||||
if (opt_benchmark)
|
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
|
||||||
|
|
||||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||||
uint32_t Htarg = ptarget[7];
|
uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user