mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-27 23:14:27 +00:00
Merge branch 'blake-dev' into blake
This commit is contained in:
commit
474ee97d6e
@ -16,7 +16,7 @@ bin_PROGRAMS = ccminer
|
||||
ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
compat/inttypes.h compat/stdbool.h compat/unistd.h \
|
||||
compat/sys/time.h compat/getopt/getopt.h \
|
||||
cpu-miner.c util.c hefty1.c scrypt.c \
|
||||
cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
|
||||
hashlog.cpp \
|
||||
heavy/heavy.cu \
|
||||
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
|
||||
@ -33,7 +33,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
||||
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
||||
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
|
||||
quark/cuda_quark_compactionTest.cu \
|
||||
cuda_nist5.cu blake32.cu \
|
||||
cuda_nist5.cu blake32.cu pentablake.cu \
|
||||
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
|
||||
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
||||
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
ccMiner release 1.4.1-tpruvot (Sep 06th 2014) - "Cached Blake"
|
||||
ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
|
||||
---------------------------------------------------------------
|
||||
|
||||
***************************************************************
|
||||
@ -35,6 +35,7 @@ TalkCoin
|
||||
DarkCoin and other X11 coins
|
||||
NEOS blake (256 14-rounds)
|
||||
BlakeCoin (256 8-rounds)
|
||||
Pentablake (Blake 512 x5)
|
||||
|
||||
where some of these coins have a VERY NOTABLE nVidia advantage
|
||||
over competing AMD (OpenCL) implementations.
|
||||
@ -65,6 +66,7 @@ its command line interface and options.
|
||||
blake use to mine NEOS (Blake 256)
|
||||
blakecoin use to mine Old Blake 256
|
||||
nist5 use to mine TalkCoin
|
||||
penta use to mine Joincoin / Pentablake
|
||||
fresh use to mine Freshcoin
|
||||
whirl use to mine Whirlcoin
|
||||
x11 use to mine DarkCoin
|
||||
|
79
blake32.cu
79
blake32.cu
@ -15,6 +15,9 @@ extern "C" {
|
||||
/* threads per block */
|
||||
#define TPB 128
|
||||
|
||||
/* crc32.c */
|
||||
extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size);
|
||||
|
||||
extern "C" int blake256_rounds = 14;
|
||||
|
||||
/* hash by cpu with blake 256 */
|
||||
@ -41,8 +44,6 @@ extern bool opt_n_threads;
|
||||
extern bool opt_benchmark;
|
||||
extern int device_map[8];
|
||||
|
||||
uint32_t crc32(const uint32_t *buf, size_t size);
|
||||
|
||||
__constant__
|
||||
static uint32_t __align__(32) c_Target[8];
|
||||
|
||||
@ -331,7 +332,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
||||
|
||||
blake256_cpu_setBlock_80(pdata, ptarget);
|
||||
#if USE_CACHE
|
||||
crcsum = crc32(pdata, 64);
|
||||
crcsum = crc32_u32t(pdata, 64);
|
||||
#endif
|
||||
|
||||
do {
|
||||
@ -360,7 +361,8 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
||||
be32enc(&endiandata[19], extra_results[0]);
|
||||
blake256hash(vhashcpu, endiandata, blakerounds);
|
||||
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
|
||||
applog(LOG_NOTICE, "GPU found more than one result yippee!");
|
||||
applog(LOG_NOTICE, "GPU found more than one result " CL_GRN "yippee!");
|
||||
rc = 2;
|
||||
} else {
|
||||
extra_results[0] = MAXU;
|
||||
}
|
||||
@ -379,9 +381,14 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
||||
}
|
||||
}
|
||||
|
||||
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
||||
pdata[19] = max_nonce - first_nonce + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
} while (!work_restart[thr_id].restart);
|
||||
|
||||
exit_scan:
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
@ -394,66 +401,6 @@ exit_scan:
|
||||
}
|
||||
#endif
|
||||
// wait proper end of all threads
|
||||
cudaDeviceSynchronize();
|
||||
//cudaDeviceSynchronize();
|
||||
return rc;
|
||||
}
|
||||
|
||||
static uint32_t crc32_tab[] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
|
||||
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
|
||||
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
||||
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
|
||||
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
|
||||
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
|
||||
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
|
||||
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
||||
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
|
||||
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
|
||||
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
|
||||
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
|
||||
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
||||
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
|
||||
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
|
||||
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
|
||||
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
|
||||
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
||||
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
|
||||
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
|
||||
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
|
||||
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
||||
};
|
||||
|
||||
uint32_t crc32(const uint32_t *buf, size_t size)
|
||||
{
|
||||
const uint8_t *p;
|
||||
uint32_t crc = 0;
|
||||
|
||||
p = (uint8_t *) buf;
|
||||
crc = crc ^ ~0U;
|
||||
|
||||
while (size--)
|
||||
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
||||
|
||||
return crc ^ ~0U;
|
||||
}
|
@ -241,6 +241,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
|
||||
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crc32.c" />
|
||||
<ClCompile Include="fuguecoin.cpp" />
|
||||
<ClCompile Include="groestlcoin.cpp" />
|
||||
<ClCompile Include="hashlog.cpp" />
|
||||
@ -404,6 +405,12 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<FastMath>true</FastMath>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="pentablake.cu">
|
||||
<MaxRegCount>80</MaxRegCount>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
<FastMath>true</FastMath>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="quark\animecoin.cu">
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
|
||||
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
|
||||
|
@ -96,6 +96,9 @@
|
||||
<ClCompile Include="cpu-miner.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="crc32.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="hefty1.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
@ -442,5 +445,8 @@
|
||||
<CudaCompile Include="blake32.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="pentablake.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -1,4 +1,4 @@
|
||||
AC_INIT([ccminer], [2014.09.06])
|
||||
AC_INIT([ccminer], [2014.09.09])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
142
cpu-miner.c
142
cpu-miner.c
@ -136,8 +136,9 @@ typedef enum {
|
||||
ALGO_JACKPOT,
|
||||
ALGO_MJOLLNIR, /* Mjollnir hash */
|
||||
ALGO_MYR_GR,
|
||||
ALGO_QUARK,
|
||||
ALGO_NIST5,
|
||||
ALGO_PENTABLAKE,
|
||||
ALGO_QUARK,
|
||||
ALGO_WHC,
|
||||
ALGO_X11,
|
||||
ALGO_X13,
|
||||
@ -159,6 +160,7 @@ static const char *algo_names[] = {
|
||||
"mjollnir",
|
||||
"myr-gr",
|
||||
"nist5",
|
||||
"penta",
|
||||
"quark",
|
||||
"whirl",
|
||||
"x11",
|
||||
@ -170,7 +172,6 @@ static const char *algo_names[] = {
|
||||
};
|
||||
|
||||
bool opt_debug = false;
|
||||
bool opt_debug_rpc = false;
|
||||
bool opt_protocol = false;
|
||||
bool opt_benchmark = false;
|
||||
bool want_longpoll = true;
|
||||
@ -179,7 +180,7 @@ bool want_stratum = true;
|
||||
bool have_stratum = false;
|
||||
static bool submit_old = false;
|
||||
bool use_syslog = false;
|
||||
bool use_colors = false;
|
||||
bool use_colors = true;
|
||||
static bool opt_background = false;
|
||||
bool opt_quiet = false;
|
||||
static int opt_retries = -1;
|
||||
@ -242,6 +243,7 @@ Options:\n\
|
||||
mjollnir Mjollnircoin hash\n\
|
||||
myr-gr Myriad-Groestl hash\n\
|
||||
nist5 NIST5 (TalkCoin) hash\n\
|
||||
penta Pentablake hash (5x Blake 512)\n\
|
||||
quark Quark hash\n\
|
||||
whirl Whirlcoin (old whirlpool)\n\
|
||||
x11 X11 (DarkCoin) hash\n\
|
||||
@ -437,7 +439,7 @@ static int share_result(int result, const char *reason)
|
||||
(result ? CL_GRN "yay!!!" : CL_RED "booooo")
|
||||
: (result ? "(yay!!!)" : "(booooo)"));
|
||||
|
||||
if (reason && !opt_quiet) {
|
||||
if (reason) {
|
||||
applog(LOG_WARNING, "reject reason: %s", reason);
|
||||
if (strncmp(reason, "low difficulty share", 20) == 0) {
|
||||
opt_difficulty = (opt_difficulty * 2.0) / 3.0;
|
||||
@ -457,11 +459,14 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
||||
bool rc = false;
|
||||
|
||||
/* pass if the previous hash is not the current previous hash */
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
if (memcmp(work->data + 1, g_work.data + 1, 32)) {
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
|
||||
return true;
|
||||
}
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
|
||||
if (have_stratum) {
|
||||
uint32_t sent;
|
||||
@ -544,10 +549,6 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
||||
json_decref(val);
|
||||
}
|
||||
|
||||
if (opt_debug_rpc) {
|
||||
applog(LOG_DEBUG, "submit: %s", s);
|
||||
}
|
||||
|
||||
rc = true;
|
||||
|
||||
out:
|
||||
@ -786,13 +787,20 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
|
||||
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
|
||||
|
||||
/* Generate merkle root */
|
||||
if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
|
||||
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||
else
|
||||
if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WHC || opt_algo == ALGO_BLAKECOIN)
|
||||
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
|
||||
else
|
||||
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||
switch (opt_algo) {
|
||||
case ALGO_HEAVY:
|
||||
case ALGO_MJOLLNIR:
|
||||
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||
break;
|
||||
case ALGO_FUGUE256:
|
||||
case ALGO_GROESTL:
|
||||
case ALGO_BLAKECOIN:
|
||||
case ALGO_WHC:
|
||||
SHA256((uint8_t*)sctx->job.coinbase, sctx->job.coinbase_size, (uint8_t*)merkle_root);
|
||||
break;
|
||||
default:
|
||||
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < sctx->job.merkle_count; i++) {
|
||||
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
|
||||
@ -864,7 +872,9 @@ static void *miner_thread(void *userdata)
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
||||
unsigned char *scratchbuf = NULL;
|
||||
bool work_done = false;
|
||||
bool extrajob = false;
|
||||
char s[16];
|
||||
int rc = 0;
|
||||
|
||||
memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
|
||||
|
||||
@ -891,20 +901,31 @@ static void *miner_thread(void *userdata)
|
||||
struct timeval tv_start, tv_end, diff;
|
||||
int64_t max64;
|
||||
uint64_t umax64;
|
||||
int rc;
|
||||
|
||||
// &work.data[19]
|
||||
int wcmplen = 76;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
|
||||
if (have_stratum) {
|
||||
while (time(NULL) >= (g_work_time + opt_scantime) && !work_done)
|
||||
usleep(500*1000);
|
||||
work_done = false;
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
uint32_t sleeptime = 0;
|
||||
while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
|
||||
usleep(100*1000);
|
||||
if (sleeptime > 4) {
|
||||
extrajob = true;
|
||||
break;
|
||||
}
|
||||
sleeptime++;
|
||||
}
|
||||
if (sleeptime && opt_debug && !opt_quiet)
|
||||
applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
|
||||
nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
|
||||
if ((*nonceptr) >= end_nonce)
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
extrajob |= work_done;
|
||||
if ((*nonceptr) >= end_nonce || extrajob) {
|
||||
work_done = false;
|
||||
extrajob = false;
|
||||
stratum_gen_work(&stratum, &g_work);
|
||||
}
|
||||
} else {
|
||||
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
|
||||
/* obtain new work from internal workio thread */
|
||||
@ -920,34 +941,54 @@ static void *miner_thread(void *userdata)
|
||||
g_work_time = time(NULL);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
if (!opt_benchmark && g_work.xnonce2_len == 0) {
|
||||
applog(LOG_ERR, "work data not read yet");
|
||||
extrajob = true;
|
||||
work_done = true;
|
||||
sleep(1);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (rc > 1) {
|
||||
/* if we found more than one on last loop */
|
||||
/* todo: handle an array to get them directly */
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
goto continue_scan;
|
||||
}
|
||||
|
||||
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
||||
if (opt_debug) {
|
||||
applog(LOG_DEBUG, "job %s target change:", g_work.job_id);
|
||||
applog_hash((uint8_t*) work.target);
|
||||
applog_compare_hash((uint8_t*) g_work.target, (uint8_t*) work.target);
|
||||
}
|
||||
memcpy(work.target, g_work.target, sizeof(work.target));
|
||||
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
||||
/* on new target, ignoring nonce, clear sent data (hashlog) */
|
||||
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
||||
hashlog_purge_job(work.job_id);
|
||||
}
|
||||
}
|
||||
if (memcmp(work.data, g_work.data, wcmplen)) {
|
||||
if (opt_debug) {
|
||||
applog(LOG_DEBUG, "job %s work updated", g_work.job_id);
|
||||
for (int n=0; n<wcmplen; n+=8) {
|
||||
for (int n=0; n <= (wcmplen-8); n+=8) {
|
||||
if (memcmp(work.data + n, g_work.data + n, 8)) {
|
||||
applog(LOG_DEBUG, "diff detected at offset %d", n);
|
||||
applog(LOG_DEBUG, "job %s work updated at offset %d:", g_work.job_id, n);
|
||||
applog_hash((uint8_t*) work.data + n);
|
||||
applog_hash((uint8_t*) g_work.data + n);
|
||||
applog_compare_hash((uint8_t*) g_work.data + n, (uint8_t*) work.data + n);
|
||||
}
|
||||
}
|
||||
}
|
||||
memcpy(&work, &g_work, sizeof(struct work));
|
||||
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
||||
} else if (memcmp(work.target, g_work.target, sizeof(work.target))) {
|
||||
if (opt_debug) {
|
||||
applog(LOG_DEBUG, "job %s target change", g_work.job_id);
|
||||
applog_hash((uint8_t*) work.target);
|
||||
applog_hash((uint8_t*) g_work.target);
|
||||
}
|
||||
memcpy(work.target, g_work.target, sizeof(work.target));
|
||||
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
|
||||
} else
|
||||
(*nonceptr)++; //??
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
if (opt_debug)
|
||||
applog(LOG_WARNING, "job %s %08x", g_work.job_id, (*nonceptr));
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
|
||||
/* adjust max_nonce to meet target scan time */
|
||||
if (have_stratum)
|
||||
@ -959,15 +1000,18 @@ static void *miner_thread(void *userdata)
|
||||
max64 *= (int64_t)thr_hashrates[thr_id];
|
||||
|
||||
if (max64 <= 0) {
|
||||
/* should not be set too high,
|
||||
else you can miss multiple nounces */
|
||||
switch (opt_algo) {
|
||||
case ALGO_JACKPOT:
|
||||
max64 = 0x1fffLL;
|
||||
break;
|
||||
case ALGO_BLAKECOIN:
|
||||
max64 = 0x3ffffffLL;
|
||||
break;
|
||||
case ALGO_BLAKE:
|
||||
/* based on the 750Ti hashrate (100kH) */
|
||||
max64 = 0x3ffffffLL;
|
||||
max64 = 0x1ffffffLL;
|
||||
break;
|
||||
default:
|
||||
max64 = 0xfffffLL;
|
||||
@ -1000,12 +1044,12 @@ static void *miner_thread(void *userdata)
|
||||
stall |= (start_nonce > range.scanned[0] && start_nonce < range.scanned[1]);
|
||||
|
||||
if (stall) {
|
||||
if (opt_algo)
|
||||
if (opt_debug && !opt_quiet)
|
||||
applog(LOG_DEBUG, "job done, wait for a new one...");
|
||||
work_restart[thr_id].restart = 1;
|
||||
hashlog_purge_old();
|
||||
// wait a bit for a new job...
|
||||
sleep(1);
|
||||
usleep(500*1000);
|
||||
(*nonceptr) = end_nonce + 1;
|
||||
work_done = true;
|
||||
continue;
|
||||
@ -1023,6 +1067,7 @@ static void *miner_thread(void *userdata)
|
||||
(*nonceptr) = start_nonce;
|
||||
|
||||
hashes_done = 0;
|
||||
continue_scan:
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
/* scan nonces for a proof-of-work hash */
|
||||
@ -1089,6 +1134,11 @@ static void *miner_thread(void *userdata)
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_PENTABLAKE:
|
||||
rc = scanhash_pentablake(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
break;
|
||||
|
||||
case ALGO_WHC:
|
||||
rc = scanhash_whc(thr_id, work.data, work.target,
|
||||
max_nonce, &hashes_done);
|
||||
@ -1133,8 +1183,11 @@ static void *miner_thread(void *userdata)
|
||||
timeval_subtract(&diff, &tv_end, &tv_start);
|
||||
if (diff.tv_usec || diff.tv_sec) {
|
||||
pthread_mutex_lock(&stats_lock);
|
||||
thr_hashrates[thr_id] =
|
||||
hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
||||
if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) {
|
||||
thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
||||
if (rc > 1)
|
||||
thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
|
||||
}
|
||||
pthread_mutex_unlock(&stats_lock);
|
||||
}
|
||||
if (!opt_quiet) {
|
||||
@ -1342,7 +1395,6 @@ static void *stratum_thread(void *userdata)
|
||||
pthread_mutex_lock(&g_work_lock);
|
||||
stratum_gen_work(&stratum, &g_work);
|
||||
time(&g_work_time);
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
if (stratum.job.clean) {
|
||||
if (!opt_quiet)
|
||||
applog(LOG_BLUE, "%s send a new %s block %d", short_url, algo_names[opt_algo],
|
||||
@ -1353,6 +1405,7 @@ static void *stratum_thread(void *userdata)
|
||||
applog(LOG_BLUE, "%s send job %d for block %d", short_url,
|
||||
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
|
||||
}
|
||||
pthread_mutex_unlock(&g_work_lock);
|
||||
}
|
||||
|
||||
if (!stratum_socket_full(&stratum, 120)) {
|
||||
@ -1374,7 +1427,7 @@ out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define PROGRAM_VERSION "1.4.1"
|
||||
#define PROGRAM_VERSION "1.4.2"
|
||||
static void show_version_and_exit(void)
|
||||
{
|
||||
printf("%s v%s\n"
|
||||
@ -1438,12 +1491,11 @@ static void parse_arg (int key, char *arg)
|
||||
case 'C':
|
||||
use_colors = true;
|
||||
break;
|
||||
case 'q':
|
||||
opt_quiet = true;
|
||||
break;
|
||||
case 'D':
|
||||
opt_debug = true;
|
||||
opt_debug_rpc = true;
|
||||
break;
|
||||
case 'q':
|
||||
opt_quiet = true;
|
||||
break;
|
||||
case 'p':
|
||||
free(rpc_pass);
|
||||
|
119
crc32.c
Normal file
119
crc32.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*-
|
||||
* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
|
||||
* code or tables extracted from it, as desired without restriction.
|
||||
*
|
||||
* First, the polynomial itself and its table of feedback terms. The
|
||||
* polynomial is
|
||||
* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
|
||||
*
|
||||
* Note that we take it "backwards" and put the highest-order term in
|
||||
* the lowest-order bit. The X^32 term is "implied"; the LSB is the
|
||||
* X^31 term, etc. The X^0 term (usually shown as "+1") results in
|
||||
* the MSB being 1
|
||||
*
|
||||
* Note that the usual hardware shift register implementation, which
|
||||
* is what we're using (we're merely optimizing it by doing eight-bit
|
||||
* chunks at a time) shifts bits into the lowest-order term. In our
|
||||
* implementation, that means shifting towards the right. Why do we
|
||||
* do it this way? Because the calculated CRC must be transmitted in
|
||||
* order from highest-order term to lowest-order term. UARTs transmit
|
||||
* characters in order from LSB to MSB. By storing the CRC this way
|
||||
* we hand it to the UART in the order low-byte to high-byte; the UART
|
||||
* sends each low-bit to hight-bit; and the result is transmission bit
|
||||
* by bit from highest- to lowest-order term without requiring any bit
|
||||
* shuffling on our part. Reception works similarly
|
||||
*
|
||||
* The feedback terms table consists of 256, 32-bit entries. Notes
|
||||
*
|
||||
* The table can be generated at runtime if desired; code to do so
|
||||
* is shown later. It might not be obvious, but the feedback
|
||||
* terms simply represent the results of eight shift/xor opera
|
||||
* tions for all combinations of data and CRC register values
|
||||
*
|
||||
* The values must be right-shifted by eight bits by the "updcrc
|
||||
* logic; the shift must be unsigned (bring in zeroes). On some
|
||||
* hardware you could probably optimize the shift in assembler by
|
||||
* using byte-swap instructions
|
||||
* polynomial $edb88320
|
||||
*
|
||||
*
|
||||
* CRC32 code derived from work by Gary S. Brown.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint32_t crc32_tab[] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
|
||||
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
|
||||
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
||||
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
|
||||
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
|
||||
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
|
||||
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
|
||||
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
||||
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
|
||||
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
|
||||
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
|
||||
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
|
||||
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
||||
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
|
||||
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
|
||||
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
|
||||
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
|
||||
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
||||
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
|
||||
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
|
||||
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
|
||||
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
||||
};
|
||||
|
||||
/* Real CRC32 Function */
|
||||
extern uint32_t crc32(uint32_t crc, const void *buf, size_t size)
|
||||
{
|
||||
const uint8_t *p;
|
||||
|
||||
p = buf;
|
||||
crc = crc ^ ~0U;
|
||||
|
||||
while (size--)
|
||||
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
||||
|
||||
return crc ^ ~0U;
|
||||
}
|
||||
|
||||
/* CRC32 Function simplified for ccminer */
|
||||
extern uint32_t crc32_u32t(const uint32_t *buf, size_t size)
|
||||
{
|
||||
const uint8_t *p;
|
||||
uint32_t crc = 0;
|
||||
|
||||
p = (uint8_t *) buf;
|
||||
crc = crc ^ ~0U;
|
||||
|
||||
while (size--)
|
||||
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
|
||||
|
||||
return crc ^ ~0U;
|
||||
}
|
@ -12,6 +12,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||
|
||||
extern __device__ __device_builtin__ void __syncthreads(void);
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
|
@ -5,10 +5,12 @@ extern "C"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_keccak.h"
|
||||
#include "miner.h"
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
// aus cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
@ -74,9 +76,6 @@ extern "C" void nist5hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
@ -84,7 +83,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
((uint32_t*)ptarget)[7] = 0x00FF;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
|
8
miner.h
8
miner.h
@ -249,6 +249,10 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
|
||||
extern int scanhash_whc(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done);
|
||||
@ -284,8 +288,8 @@ struct work_restart {
|
||||
char padding[128 - sizeof(unsigned long)];
|
||||
};
|
||||
|
||||
extern bool opt_benchmark;
|
||||
extern bool opt_debug;
|
||||
extern bool opt_debug_rpc;
|
||||
extern bool opt_quiet;
|
||||
extern bool opt_protocol;
|
||||
extern int opt_timeout;
|
||||
@ -417,6 +421,7 @@ size_t time2str(char* buf, time_t timer);
|
||||
char* atime2str(time_t timer);
|
||||
|
||||
void applog_hash(unsigned char *hash);
|
||||
void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
|
||||
|
||||
void print_hash_tests(void);
|
||||
void animehash(void *state, const void *input);
|
||||
@ -428,6 +433,7 @@ unsigned int jackpothash(void *state, const void *input);
|
||||
void groestlhash(void *state, const void *input);
|
||||
void myriadhash(void *state, const void *input);
|
||||
void nist5hash(void *state, const void *input);
|
||||
void pentablakehash(void *output, const void *input);
|
||||
void quarkhash(void *state, const void *input);
|
||||
void wcoinhash(void *state, const void *input);
|
||||
void x11hash(void *output, const void *input);
|
||||
|
600
pentablake.cu
Normal file
600
pentablake.cu
Normal file
@ -0,0 +1,600 @@
|
||||
/**
|
||||
* Penta Blake-512 Cuda Kernel (Tested on SM 5.0)
|
||||
*
|
||||
* Tanguy Pruvot - Aug. 2014
|
||||
*/
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
extern "C" {
|
||||
#include "sph/sph_blake.h"
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
}
|
||||
|
||||
/* threads per block */
|
||||
#define TPB 192
|
||||
|
||||
/* hash by cpu with blake 256 */
|
||||
extern "C" void pentablakehash(void *output, const void *input)
|
||||
{
|
||||
unsigned char hash[128];
|
||||
#define hashB hash + 64
|
||||
sph_blake512_context ctx;
|
||||
|
||||
sph_blake512_init(&ctx);
|
||||
sph_blake512(&ctx, input, 80);
|
||||
sph_blake512_close(&ctx, hash);
|
||||
|
||||
sph_blake512(&ctx, hash, 64);
|
||||
sph_blake512_close(&ctx, hashB);
|
||||
|
||||
sph_blake512(&ctx, hashB, 64);
|
||||
sph_blake512_close(&ctx, hash);
|
||||
|
||||
sph_blake512(&ctx, hash, 64);
|
||||
sph_blake512_close(&ctx, hashB);
|
||||
|
||||
sph_blake512(&ctx, hashB, 64);
|
||||
sph_blake512_close(&ctx, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#define MAXU 0xffffffffU
|
||||
|
||||
// in cpu-miner.c
|
||||
extern bool opt_n_threads;
|
||||
extern bool opt_benchmark;
|
||||
extern int device_map[8];
|
||||
|
||||
__constant__
|
||||
static uint32_t __align__(32) c_Target[8];
|
||||
|
||||
__constant__
|
||||
static uint64_t __align__(32) c_data[32];
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
static uint32_t *d_resNounce[8];
|
||||
static uint32_t *h_resNounce[8];
|
||||
static uint32_t extra_results[2] = { MAXU, MAXU };
|
||||
|
||||
/* prefer uint32_t to prevent size conversions = speed +5/10 % */
|
||||
__constant__
|
||||
static uint32_t __align__(32) c_sigma[16][16];
|
||||
const uint32_t host_sigma[16][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
|
||||
};
|
||||
|
||||
__device__ __constant__
|
||||
static const uint64_t __align__(32) c_IV512[8] = {
|
||||
0x6a09e667f3bcc908ULL,
|
||||
0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL,
|
||||
0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL,
|
||||
0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL,
|
||||
0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
__device__ __constant__
|
||||
const uint64_t c_u512[16] =
|
||||
{
|
||||
0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL,
|
||||
0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL,
|
||||
0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL,
|
||||
0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL,
|
||||
0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL,
|
||||
0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL,
|
||||
0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL,
|
||||
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||
};
|
||||
|
||||
#define G(a,b,c,d,x) { \
|
||||
uint32_t idx1 = c_sigma[i][x]; \
|
||||
uint32_t idx2 = c_sigma[i][x+1]; \
|
||||
v[a] += (m[idx1] ^ c_u512[idx2]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 25); \
|
||||
v[a] += (m[idx2] ^ c_u512[idx1]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 11); \
|
||||
}
|
||||
|
||||
// Hash-Padding
|
||||
__device__ __constant__
|
||||
static const uint64_t d_constHashPadding[8] = {
|
||||
0x0000000000000080ull,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0x0100000000000000ull,
|
||||
0,
|
||||
0x0002000000000000ull
|
||||
};
|
||||
|
||||
#if 0
|
||||
|
||||
__device__ __constant__
|
||||
static const uint64_t __align__(32) c_Padding[16] = {
|
||||
0, 0, 0, 0,
|
||||
0x80000000ULL, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 1, 0, 640,
|
||||
};
|
||||
|
||||
__device__ static
|
||||
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint32_t T0)
|
||||
{
|
||||
uint64_t v[16], m[16];
|
||||
|
||||
m[0] = block[0];
|
||||
m[1] = block[1];
|
||||
m[2] = block[2];
|
||||
m[3] = block[3];
|
||||
|
||||
for (uint32_t i = 4; i < 16; i++) {
|
||||
m[i] = (T0 == 0x200) ? block[i] : c_Padding[i];
|
||||
}
|
||||
|
||||
//#pragma unroll 8
|
||||
for(uint32_t i = 0; i < 8; i++)
|
||||
v[i] = h[i];
|
||||
|
||||
v[ 8] = c_u512[0];
|
||||
v[ 9] = c_u512[1];
|
||||
v[10] = c_u512[2];
|
||||
v[11] = c_u512[3];
|
||||
|
||||
v[12] = xor1(c_u512[4], T0);
|
||||
v[13] = xor1(c_u512[5], T0);
|
||||
v[14] = c_u512[6];
|
||||
v[15] = c_u512[7];
|
||||
|
||||
for (uint32_t i = 0; i < 16; i++) {
|
||||
/* column step */
|
||||
G(0, 4, 0x8, 0xC, 0x0);
|
||||
G(1, 5, 0x9, 0xD, 0x2);
|
||||
G(2, 6, 0xA, 0xE, 0x4);
|
||||
G(3, 7, 0xB, 0xF, 0x6);
|
||||
/* diagonal step */
|
||||
G(0, 5, 0xA, 0xF, 0x8);
|
||||
G(1, 6, 0xB, 0xC, 0xA);
|
||||
G(2, 7, 0x8, 0xD, 0xC);
|
||||
G(3, 4, 0x9, 0xE, 0xE);
|
||||
}
|
||||
|
||||
//#pragma unroll 16
|
||||
for (uint32_t i = 0; i < 16; i++) {
|
||||
uint32_t j = i % 8;
|
||||
h[j] ^= v[i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void pentablake_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
|
||||
{
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
const uint32_t nounce = startNounce + thread;
|
||||
uint64_t h[8];
|
||||
|
||||
#pragma unroll
|
||||
for(int i=0; i<8; i++) {
|
||||
h[i] = c_IV512[i];
|
||||
}
|
||||
|
||||
uint64_t ending[4];
|
||||
ending[0] = c_data[16];
|
||||
ending[1] = c_data[17];
|
||||
ending[2] = c_data[18];
|
||||
ending[3] = nounce; /* our tested value */
|
||||
|
||||
pentablake_compress(h, ending, 640);
|
||||
|
||||
// -----------------------------------
|
||||
|
||||
for (int r = 0; r < 4; r++) {
|
||||
uint64_t data[8];
|
||||
for (int i = 0; i < 7; i++) {
|
||||
data[i] = h[i];
|
||||
}
|
||||
pentablake_compress(h, data, 512); /* todo: use h,h when ok*/
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__device__ static
|
||||
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint64_t T0)
|
||||
{
|
||||
uint64_t v[16], m[16], i;
|
||||
|
||||
#pragma unroll 16
|
||||
for(i = 0; i < 16; i++) {
|
||||
m[i] = cuda_swab64(block[i]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for (i = 0; i < 8; i++)
|
||||
v[i] = h[i];
|
||||
|
||||
v[ 8] = c_u512[0];
|
||||
v[ 9] = c_u512[1];
|
||||
v[10] = c_u512[2];
|
||||
v[11] = c_u512[3];
|
||||
v[12] = c_u512[4] ^ T0;
|
||||
v[13] = c_u512[5] ^ T0;
|
||||
v[14] = c_u512[6];
|
||||
v[15] = c_u512[7];
|
||||
|
||||
//#pragma unroll 16
|
||||
for( i = 0; i < 16; i++)
|
||||
{
|
||||
/* column step */
|
||||
G(0, 4, 0x8, 0xC, 0x0);
|
||||
G(1, 5, 0x9, 0xD, 0x2);
|
||||
G(2, 6, 0xA, 0xE, 0x4);
|
||||
G(3, 7, 0xB, 0xF, 0x6);
|
||||
/* diagonal step */
|
||||
G(0, 5, 0xA, 0xF, 0x8);
|
||||
G(1, 6, 0xB, 0xC, 0xA);
|
||||
G(2, 7, 0x8, 0xD, 0xC);
|
||||
G(3, 4, 0x9, 0xE, 0xE);
|
||||
}
|
||||
|
||||
//#pragma unroll 16
|
||||
for (i = 0; i < 16; i++) {
|
||||
uint32_t idx = i % 8;
|
||||
h[idx] ^= v[i];
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void pentablake_gpu_hash_80(int threads, const uint32_t startNounce, void *outputHash)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint64_t h[8];
|
||||
uint64_t buf[16];
|
||||
uint32_t nounce = startNounce + thread;
|
||||
|
||||
//#pragma unroll 8
|
||||
for(int i=0; i<8; i++)
|
||||
h[i] = c_IV512[i];
|
||||
|
||||
//#pragma unroll 16
|
||||
for (int i=0; i < 16; i++)
|
||||
buf[i] = c_data[i];
|
||||
|
||||
// The test Nonce
|
||||
((uint32_t*)buf)[19] = cuda_swab32(nounce);
|
||||
|
||||
pentablake_compress(h, buf, 640ULL);
|
||||
|
||||
#if __CUDA_ARCH__ < 300
|
||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||
#pragma unroll 8
|
||||
for (uint32_t i=0; i < 8; i++) {
|
||||
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
|
||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||
}
|
||||
#else
|
||||
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
|
||||
for (uint32_t i=0; i < 8; i++) {
|
||||
outHash[i] = cuda_swab64( h[i] );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||
{
|
||||
const int threadsperblock = TPB;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
||||
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__
|
||||
void pentablake_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash)
|
||||
{
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
|
||||
if (thread < threads)
|
||||
{
|
||||
uint64_t *inpHash = &g_hash[thread<<3]; // hashPosition * 8
|
||||
uint64_t buf[16]; // 128 Bytes
|
||||
uint64_t h[8]; // State
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i<8; i++)
|
||||
h[i] = c_IV512[i];
|
||||
|
||||
// Message for first round
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; ++i)
|
||||
buf[i] = inpHash[i];
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++)
|
||||
buf[i+8] = d_constHashPadding[i];
|
||||
|
||||
// Ending round
|
||||
pentablake_compress(h, buf, 512);
|
||||
|
||||
#if __CUDA_ARCH__ < 300
|
||||
uint32_t *outHash = (uint32_t*)&g_hash[thread<<3];
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++) {
|
||||
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||
}
|
||||
#else
|
||||
uint64_t *outHash = &g_hash[thread<<3];
|
||||
for (int i=0; i < 8; i++) {
|
||||
outHash[i] = cuda_swab64(h[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
|
||||
{
|
||||
const int threadsperblock = TPB;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
|
||||
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
__host__
|
||||
uint32_t pentablake_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce)
|
||||
{
|
||||
const int threadsperblock = TPB;
|
||||
uint32_t result = MAXU;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
|
||||
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
|
||||
return result;
|
||||
|
||||
pentablake_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]);
|
||||
cudaDeviceSynchronize();
|
||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
cudaThreadSynchronize();
|
||||
result = h_resNounce[thr_id][0];
|
||||
extra_results[0] = h_resNounce[thr_id][1];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
__global__
|
||||
void pentablake_gpu_check_hash(uint32_t threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *resNounce)
|
||||
{
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
uint32_t nounce = startNounce + thread;
|
||||
uint32_t *inpHash = &g_hash[thread<<4];
|
||||
uint32_t h[8];
|
||||
|
||||
#pragma unroll 8
|
||||
for (int i=0; i < 8; i++)
|
||||
h[i] = inpHash[i];
|
||||
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
uint32_t hash = h[i]; // cuda_swab32(h[i]);
|
||||
if (hash > c_Target[i]) {
|
||||
return;
|
||||
}
|
||||
if (hash < c_Target[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* keep the smallest nounce, + extra one if found */
|
||||
if (resNounce[0] > nounce) {
|
||||
resNounce[1] = resNounce[0];
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
else
|
||||
resNounce[1] = nounce;
|
||||
}
|
||||
}
|
||||
|
||||
__host__ static
|
||||
uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, int order)
|
||||
{
|
||||
const int threadsperblock = TPB;
|
||||
uint32_t result = MAXU;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
|
||||
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
|
||||
return result;
|
||||
|
||||
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
|
||||
|
||||
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
cudaThreadSynchronize();
|
||||
result = h_resNounce[thr_id][0];
|
||||
extra_results[0] = h_resNounce[thr_id][1];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
__host__
|
||||
void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
|
||||
{
|
||||
uint8_t data[128];
|
||||
memcpy((void*) data, (void*) pdata, 80);
|
||||
memset(data+80, 0, 48);
|
||||
|
||||
// to swab...
|
||||
data[80] = 0x80;
|
||||
data[111] = 1;
|
||||
data[126] = 0x02;
|
||||
data[127] = 0x80;
|
||||
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_sigma, host_sigma, sizeof(host_sigma), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
uint32_t throughput = min(128 * 2560, max_nonce - first_nonce);
|
||||
uint32_t endiandata[20];
|
||||
int rc = 0;
|
||||
|
||||
if (extra_results[0] != MAXU) {
|
||||
// possible extra result found in previous call
|
||||
if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) {
|
||||
pdata[19] = extra_results[0];
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
extra_results[0] = MAXU;
|
||||
rc = 1;
|
||||
goto exit_scan;
|
||||
}
|
||||
}
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x000F;
|
||||
|
||||
if (!init[thr_id]) {
|
||||
if (opt_n_threads > 1) {
|
||||
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||
}
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
|
||||
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
for (int k=0; k < 20; k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
|
||||
pentablake_cpu_setBlock_80(endiandata, ptarget);
|
||||
|
||||
do {
|
||||
int order = 0;
|
||||
|
||||
// GPU HASH
|
||||
pentablake_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
|
||||
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
|
||||
uint32_t foundNonce = pentablake_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
|
||||
if (foundNonce != MAXU)
|
||||
{
|
||||
uint32_t vhashcpu[8];
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
|
||||
pentablakehash(vhashcpu, endiandata);
|
||||
|
||||
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
|
||||
{
|
||||
pdata[19] = foundNonce;
|
||||
rc = 1;
|
||||
|
||||
// Rare but possible if the throughput is big
|
||||
be32enc(&endiandata[19], extra_results[0]);
|
||||
pentablakehash(vhashcpu, endiandata);
|
||||
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
|
||||
applog(LOG_NOTICE, "GPU found more than one result yippee!");
|
||||
rc = 2;
|
||||
} else {
|
||||
extra_results[0] = MAXU;
|
||||
}
|
||||
|
||||
goto exit_scan;
|
||||
}
|
||||
else if (vhashcpu[7] > Htarg) {
|
||||
applog(LOG_WARNING, "GPU #%d: result for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[7], Htarg);
|
||||
}
|
||||
else if (vhashcpu[6] > ptarget[6]) {
|
||||
applog(LOG_WARNING, "GPU #%d: hash[6] for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[6], ptarget[6]);
|
||||
}
|
||||
else {
|
||||
applog(LOG_WARNING, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce);
|
||||
}
|
||||
}
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
exit_scan:
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
#if 0
|
||||
/* reset the device to allow multiple instances
|
||||
* could be made in cpu-miner... check later if required */
|
||||
if (opt_n_threads == 1) {
|
||||
CUDA_SAFE_CALL(cudaDeviceReset());
|
||||
init[thr_id] = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
return rc;
|
||||
}
|
@ -50,59 +50,60 @@ const uint64_t c_u512[16] =
|
||||
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||
};
|
||||
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\
|
||||
v[d] = ROTR( v[d] ^ v[a],32); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR( v[b] ^ v[c],25); \
|
||||
v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \
|
||||
v[d] = ROTR( v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR( v[b] ^ v[c],11);
|
||||
|
||||
#define G(a,b,c,d,x) { \
|
||||
uint32_t idx1 = sigma[i][x]; \
|
||||
uint32_t idx2 = sigma[i][x+1]; \
|
||||
v[a] += (m[idx1] ^ u512[idx2]) + v[b]; \
|
||||
v[d] = ROTR( v[d] ^ v[a], 32); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR( v[b] ^ v[c], 25); \
|
||||
v[a] += (m[idx2] ^ u512[idx1]) + v[b]; \
|
||||
v[d] = ROTR( v[d] ^ v[a], 16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR( v[b] ^ v[c], 11); \
|
||||
}
|
||||
|
||||
__device__ static
|
||||
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int bits )
|
||||
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
|
||||
{
|
||||
uint64_t v[16], m[16], i;
|
||||
|
||||
#pragma unroll 16
|
||||
for( i = 0; i < 16; ++i ) {
|
||||
m[i] = cuda_swab64(block[i]);
|
||||
}
|
||||
#pragma unroll 16
|
||||
for( i = 0; i < 16; i++) {
|
||||
m[i] = cuda_swab64(block[i]);
|
||||
}
|
||||
|
||||
#pragma unroll 8
|
||||
for( i = 0; i < 8; ++i ) v[i] = h[i];
|
||||
#pragma unroll 8
|
||||
for (i = 0; i < 8; i++)
|
||||
v[i] = h[i];
|
||||
|
||||
v[ 8] = u512[0];
|
||||
v[ 9] = u512[1];
|
||||
v[10] = u512[2];
|
||||
v[11] = u512[3];
|
||||
v[12] = u512[4];
|
||||
v[13] = u512[5];
|
||||
v[14] = u512[6];
|
||||
v[15] = u512[7];
|
||||
v[ 8] = u512[0];
|
||||
v[ 9] = u512[1];
|
||||
v[10] = u512[2];
|
||||
v[11] = u512[3];
|
||||
v[12] = u512[4] ^ T0;
|
||||
v[13] = u512[5] ^ T0;
|
||||
v[14] = u512[6];
|
||||
v[15] = u512[7];
|
||||
|
||||
v[12] ^= bits;
|
||||
v[13] ^= bits;
|
||||
//#pragma unroll 16
|
||||
for( i = 0; i < 16; ++i )
|
||||
{
|
||||
/* column step */
|
||||
G( 0, 4, 8, 12, 0 );
|
||||
G( 1, 5, 9, 13, 2 );
|
||||
G( 2, 6, 10, 14, 4 );
|
||||
G( 3, 7, 11, 15, 6 );
|
||||
/* diagonal step */
|
||||
G( 0, 5, 10, 15, 8 );
|
||||
G( 1, 6, 11, 12, 10 );
|
||||
G( 2, 7, 8, 13, 12 );
|
||||
G( 3, 4, 9, 14, 14 );
|
||||
}
|
||||
|
||||
//#pragma unroll 16
|
||||
for( i = 0; i < 16; ++i )
|
||||
{
|
||||
/* column step */
|
||||
G( 0, 4, 8, 12, 0 );
|
||||
G( 1, 5, 9, 13, 2 );
|
||||
G( 2, 6, 10, 14, 4 );
|
||||
G( 3, 7, 11, 15, 6 );
|
||||
/* diagonal step */
|
||||
G( 0, 5, 10, 15, 8 );
|
||||
G( 1, 6, 11, 12, 10 );
|
||||
G( 2, 7, 8, 13, 12 );
|
||||
G( 3, 4, 9, 14, 14 );
|
||||
}
|
||||
|
||||
#pragma unroll 16
|
||||
for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i];
|
||||
#pragma unroll 16
|
||||
for( i = 0; i < 16; ++i )
|
||||
h[i % 8] ^= v[i];
|
||||
}
|
||||
|
||||
__device__ __constant__
|
||||
@ -114,7 +115,8 @@ static const uint64_t d_constMem[8] = {
|
||||
0x510e527fade682d1ULL,
|
||||
0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL,
|
||||
0x5be0cd19137e2179ULL };
|
||||
0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
// Hash-Padding
|
||||
__device__ __constant__
|
||||
@ -126,7 +128,8 @@ static const uint64_t d_constHashPadding[8] = {
|
||||
0,
|
||||
0x0100000000000000ull,
|
||||
0,
|
||||
0x0002000000000000ull };
|
||||
0x0002000000000000ull
|
||||
};
|
||||
|
||||
__global__ __launch_bounds__(256, 4)
|
||||
void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
|
||||
@ -145,48 +148,42 @@ void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_n
|
||||
if (thread < threads)
|
||||
#endif
|
||||
{
|
||||
uint8_t i;
|
||||
// bestimme den aktuellen Zähler
|
||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||
|
||||
int hashPosition = nounce - startNounce;
|
||||
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
|
||||
|
||||
// 128 Byte für die Message
|
||||
// 128 Bytes
|
||||
uint64_t buf[16];
|
||||
|
||||
// State vorbereiten
|
||||
// State
|
||||
uint64_t h[8];
|
||||
#pragma unroll 8
|
||||
for (i=0;i<8;i++)
|
||||
for (int i=0;i<8;i++)
|
||||
h[i] = d_constMem[i];
|
||||
|
||||
// Message für die erste Runde in Register holen
|
||||
// Message for first round
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; ++i)
|
||||
for (int i=0; i < 8; ++i)
|
||||
buf[i] = inpHash[i];
|
||||
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; i++)
|
||||
for (int i=0; i < 8; i++)
|
||||
buf[i+8] = d_constHashPadding[i];
|
||||
|
||||
// die einzige Hashing-Runde
|
||||
// Ending round
|
||||
quark_blake512_compress( h, buf, c_sigma, c_u512, 512 );
|
||||
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
|
||||
#if __CUDA_ARCH__ <= 350
|
||||
uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition];
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; ++i) {
|
||||
for (int i=0; i < 8; i++) {
|
||||
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||
}
|
||||
#else
|
||||
// in dieser Version passieren auch ein paar 64 Bit Shifts
|
||||
uint64_t *outHash = &g_hash[8 * hashPosition];
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; ++i)
|
||||
{
|
||||
for (int i=0; i < 8; i++) {
|
||||
outHash[i] = cuda_swab64(h[i]);
|
||||
}
|
||||
#endif
|
||||
@ -198,45 +195,38 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
// State vorbereiten
|
||||
uint64_t h[8];
|
||||
// 128 Byte für die Message
|
||||
uint64_t buf[16];
|
||||
uint8_t i;
|
||||
// bestimme den aktuellen Zähler
|
||||
uint32_t nounce = startNounce + thread;
|
||||
|
||||
#pragma unroll 8
|
||||
for(i=0;i<8;i++)
|
||||
for(int i=0; i<8; i++)
|
||||
h[i] = d_constMem[i];
|
||||
|
||||
// Message für die erste Runde in Register holen
|
||||
#pragma unroll 16
|
||||
for (i=0; i < 16; ++i) buf[i] = c_PaddedMessage80[i];
|
||||
for (int i=0; i < 16; ++i)
|
||||
buf[i] = c_PaddedMessage80[i];
|
||||
|
||||
// die Nounce durch die thread-spezifische ersetzen
|
||||
buf[9] = REPLACE_HIWORD(buf[9], cuda_swab32(nounce));
|
||||
// The test Nonce
|
||||
((uint32_t*)buf)[19] = cuda_swab32(nounce);
|
||||
|
||||
// die einzige Hashing-Runde
|
||||
quark_blake512_compress( h, buf, c_sigma, c_u512, 640 );
|
||||
|
||||
// Hash rauslassen
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
|
||||
#if __CUDA_ARCH__ <= 350
|
||||
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; ++i) {
|
||||
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
|
||||
for (uint32_t i=0; i < 8; i++) {
|
||||
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
|
||||
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
|
||||
}
|
||||
#else
|
||||
// in dieser Version passieren auch ein paar 64 Bit Shifts
|
||||
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
|
||||
#pragma unroll 8
|
||||
for (i=0; i < 8; ++i) {
|
||||
for (uint32_t i=0; i < 8; i++) {
|
||||
outHash[i] = cuda_swab64( h[i] );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,12 +6,12 @@ extern "C"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_keccak.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// aus cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
@ -70,76 +70,64 @@ extern "C" void quarkhash(void *state, const void *input)
|
||||
unsigned char hash[64];
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
// ZBLAKE;
|
||||
sph_blake512 (&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, (void*) hash);
|
||||
|
||||
sph_bmw512_init(&ctx_bmw);
|
||||
// ZBMW;
|
||||
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_groestl512_init(&ctx_groestl);
|
||||
// ZGROESTL;
|
||||
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
||||
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_skein512_init(&ctx_skein);
|
||||
// ZSKEIN;
|
||||
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx_skein, (void*) hash);
|
||||
}
|
||||
|
||||
sph_groestl512_init(&ctx_groestl);
|
||||
// ZGROESTL;
|
||||
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
|
||||
sph_groestl512_close(&ctx_groestl, (void*) hash);
|
||||
|
||||
sph_jh512_init(&ctx_jh);
|
||||
// ZJH;
|
||||
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx_jh, (void*) hash);
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_blake512_init(&ctx_blake);
|
||||
// ZBLAKE;
|
||||
sph_blake512 (&ctx_blake, (const void*) hash, 64);
|
||||
sph_blake512_close(&ctx_blake, (void*) hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_bmw512_init(&ctx_bmw);
|
||||
// ZBMW;
|
||||
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
|
||||
sph_bmw512_close(&ctx_bmw, (void*) hash);
|
||||
}
|
||||
|
||||
sph_keccak512_init(&ctx_keccak);
|
||||
// ZKECCAK;
|
||||
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
||||
|
||||
sph_skein512_init(&ctx_skein);
|
||||
// SKEIN;
|
||||
sph_skein512 (&ctx_skein, (const void*) hash, 64);
|
||||
sph_skein512_close(&ctx_skein, (void*) hash);
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_keccak512_init(&ctx_keccak);
|
||||
// ZKECCAK;
|
||||
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
|
||||
sph_keccak512_close(&ctx_keccak, (void*) hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_jh512_init(&ctx_jh);
|
||||
// ZJH;
|
||||
sph_jh512 (&ctx_jh, (const void*) hash, 64);
|
||||
sph_jh512_close(&ctx_jh, (void*) hash);
|
||||
}
|
||||
@ -147,23 +135,17 @@ extern "C" void quarkhash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*4096; // 100;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
((uint32_t*)ptarget)[7] = 0x00FF;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
const int throughput = 256*4096; // 100;
|
||||
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
if (!init[thr_id])
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
@ -252,11 +234,12 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
|
||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
quarkhash(vhash64, endiandata);
|
||||
|
||||
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||
|
||||
pdata[19] = foundNonce;
|
||||
*hashes_done = (foundNonce - first_nonce + 1)/2;
|
||||
|
29
util.c
29
util.c
@ -1020,7 +1020,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
int merkle_count, i;
|
||||
json_t *merkle_arr;
|
||||
unsigned char **merkle;
|
||||
int ntime;
|
||||
int ntime, hoffset;
|
||||
|
||||
job_id = json_string_value(json_array_get(params, 0));
|
||||
prevhash = json_string_value(json_array_get(params, 1));
|
||||
@ -1078,7 +1078,8 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
|
||||
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
|
||||
|
||||
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + 43);
|
||||
hoffset = coinb1_size - 15; // 43;
|
||||
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + hoffset);
|
||||
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
|
||||
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
|
||||
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
|
||||
@ -1125,7 +1126,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
|
||||
sctx->next_diff = diff;
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
|
||||
applog(LOG_INFO, "Stratum difficulty set to %g", diff);
|
||||
applog(LOG_WARNING, "Stratum difficulty set to %g", diff);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1221,10 +1222,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
|
||||
id = json_object_get(val, "id");
|
||||
params = json_object_get(val, "params");
|
||||
|
||||
if (opt_debug_rpc) {
|
||||
applog(LOG_DEBUG, "method: %s", s);
|
||||
}
|
||||
|
||||
if (!strcasecmp(method, "mining.notify")) {
|
||||
ret = stratum_notify(sctx, params);
|
||||
goto out;
|
||||
@ -1400,6 +1397,20 @@ static char* format_hash(char* buf, unsigned char *hash)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* to debug diff in data */
|
||||
extern void applog_compare_hash(unsigned char *hash, unsigned char *hash2)
|
||||
{
|
||||
char s[256] = "";
|
||||
int len = 0;
|
||||
for (int i=0; i < 32; i += 4) {
|
||||
char *color = memcmp(hash+i, hash2+i, 4) ? CL_RED : CL_GRY;
|
||||
len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color,
|
||||
hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||
s[len] = '\0';
|
||||
}
|
||||
applog(LOG_DEBUG, "%s", s);
|
||||
}
|
||||
|
||||
extern void applog_hash(unsigned char *hash)
|
||||
{
|
||||
char s[128] = {'\0'};
|
||||
@ -1457,6 +1468,10 @@ void print_hash_tests(void)
|
||||
nist5hash(&hash[0], &buf[0]);
|
||||
printpfx("nist5", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
pentablakehash(&hash[0], &buf[0]);
|
||||
printpfx("pentablake", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
quarkhash(&hash[0], &buf[0]);
|
||||
printpfx("quark", hash);
|
||||
|
18
x11/x11.cu
18
x11/x11.cu
@ -21,10 +21,9 @@ extern "C"
|
||||
#include <memory.h>
|
||||
}
|
||||
|
||||
// aus cpu-miner.c
|
||||
// in cpu-miner.c
|
||||
extern int device_map[8];
|
||||
|
||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||
@ -140,22 +139,17 @@ extern "C" void x11hash(void *output, const void *input)
|
||||
}
|
||||
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int throughput = 256*256*8;
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
const int throughput = 256*256*8;
|
||||
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
if (!init[thr_id])
|
||||
{
|
||||
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
|
||||
@ -186,8 +180,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||
cuda_check_cpu_setTarget(ptarget);
|
||||
|
||||
do {
|
||||
uint32_t foundNonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
int order = 0;
|
||||
uint32_t foundNonce;
|
||||
|
||||
// Hash with CUDA
|
||||
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||
@ -204,7 +200,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||
|
||||
// Scan nach Gewinner Hashes auf der GPU
|
||||
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
if (foundNonce != 0xffffffff)
|
||||
if (foundNonce != 0xffffffff)
|
||||
{
|
||||
uint32_t vhash64[8];
|
||||
be32enc(&endiandata[19], foundNonce);
|
||||
|
@ -20,11 +20,11 @@ extern "C" {
|
||||
#include "sph/sph_hamsi.h"
|
||||
#include "sph/sph_fugue.h"
|
||||
#include "sph/sph_shabal.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// from cpu-miner.c
|
||||
extern int device_map[8];
|
||||
@ -167,8 +167,6 @@ extern "C" void x14hash(void *output, const void *input)
|
||||
}
|
||||
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
|
31
x15/x15.cu
31
x15/x15.cu
@ -21,14 +21,11 @@ extern "C" {
|
||||
#include "sph/sph_fugue.h"
|
||||
#include "sph/sph_shabal.h"
|
||||
#include "sph/sph_whirlpool.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#include "cuda_helper.h"
|
||||
}
|
||||
|
||||
// to test gpu hash on a null buffer
|
||||
#define NULLTEST 0
|
||||
|
||||
// from cpu-miner.c
|
||||
extern int device_map[8];
|
||||
@ -92,8 +89,6 @@ extern void quark_compactTest_cpu_init(int thr_id, int threads);
|
||||
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
|
||||
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||
|
||||
// X15 CPU Hash function
|
||||
extern "C" void x15hash(void *output, const void *input)
|
||||
{
|
||||
@ -181,17 +176,6 @@ extern "C" void x15hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
#if NULLTEST
|
||||
static void print_hash(unsigned char *hash)
|
||||
{
|
||||
for (int i=0; i < 32; i += 4) {
|
||||
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern bool opt_benchmark;
|
||||
|
||||
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce,
|
||||
unsigned long *hashes_done)
|
||||
@ -203,12 +187,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||
|
||||
#if NULLTEST
|
||||
for (int k=0; k < 20; k++)
|
||||
pdata[k] = 0;
|
||||
#endif
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
@ -259,12 +238,6 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
|
||||
#if NULLTEST
|
||||
uint32_t buf[8]; memset(buf, 0, sizeof buf);
|
||||
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
|
||||
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||
print_hash((unsigned char*)buf); printf("\n");
|
||||
#endif
|
||||
/* Scan with GPU */
|
||||
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||
|
||||
|
18
x17/x17.cu
18
x17/x17.cu
@ -26,17 +26,15 @@ extern "C"
|
||||
|
||||
#include "sph/sph_sha2.h"
|
||||
#include "sph/sph_haval.h"
|
||||
}
|
||||
|
||||
#include "miner.h"
|
||||
}
|
||||
#include "cuda_helper.h"
|
||||
|
||||
static uint32_t *d_hash[8];
|
||||
|
||||
|
||||
// cpu-miner.c
|
||||
// in cpu-miner.c
|
||||
extern int device_map[8];
|
||||
extern bool opt_benchmark;
|
||||
|
||||
|
||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||
@ -204,20 +202,12 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
|
||||
unsigned long *hashes_done)
|
||||
{
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
const int throughput = 256*256*8;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||
uint32_t Htarg = ptarget[7];
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||
((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
|
||||
|
||||
if (!init[thr_id])
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user