Merge branch 'blake-dev' into blake

This commit is contained in:
Tanguy Pruvot 2014-09-09 22:02:19 +02:00
commit 474ee97d6e
19 changed files with 974 additions and 289 deletions

View File

@ -16,7 +16,7 @@ bin_PROGRAMS = ccminer
ccminer_SOURCES = elist.h miner.h compat.h \ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c hefty1.c scrypt.c \ cpu-miner.c util.c crc32.c hefty1.c scrypt.c \
hashlog.cpp \ hashlog.cpp \
heavy/heavy.cu \ heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
@ -33,7 +33,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \ quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \ quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
quark/cuda_quark_compactionTest.cu \ quark/cuda_quark_compactionTest.cu \
cuda_nist5.cu blake32.cu \ cuda_nist5.cu blake32.cu pentablake.cu \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \ sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \

View File

@ -1,5 +1,5 @@
ccMiner release 1.4.1-tpruvot (Sep 06th 2014) - "Cached Blake" ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
--------------------------------------------------------------- ---------------------------------------------------------------
*************************************************************** ***************************************************************
@ -35,6 +35,7 @@ TalkCoin
DarkCoin and other X11 coins DarkCoin and other X11 coins
NEOS blake (256 14-rounds) NEOS blake (256 14-rounds)
BlakeCoin (256 8-rounds) BlakeCoin (256 8-rounds)
Pentablake (Blake 512 x5)
where some of these coins have a VERY NOTABLE nVidia advantage where some of these coins have a VERY NOTABLE nVidia advantage
over competing AMD (OpenCL) implementations. over competing AMD (OpenCL) implementations.
@ -65,6 +66,7 @@ its command line interface and options.
blake use to mine NEOS (Blake 256) blake use to mine NEOS (Blake 256)
blakecoin use to mine Old Blake 256 blakecoin use to mine Old Blake 256
nist5 use to mine TalkCoin nist5 use to mine TalkCoin
penta use to mine Joincoin / Pentablake
fresh use to mine Freshcoin fresh use to mine Freshcoin
whirl use to mine Whirlcoin whirl use to mine Whirlcoin
x11 use to mine DarkCoin x11 use to mine DarkCoin

View File

@ -15,6 +15,9 @@ extern "C" {
/* threads per block */ /* threads per block */
#define TPB 128 #define TPB 128
/* crc32.c */
extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size);
extern "C" int blake256_rounds = 14; extern "C" int blake256_rounds = 14;
/* hash by cpu with blake 256 */ /* hash by cpu with blake 256 */
@ -41,8 +44,6 @@ extern bool opt_n_threads;
extern bool opt_benchmark; extern bool opt_benchmark;
extern int device_map[8]; extern int device_map[8];
uint32_t crc32(const uint32_t *buf, size_t size);
__constant__ __constant__
static uint32_t __align__(32) c_Target[8]; static uint32_t __align__(32) c_Target[8];
@ -331,7 +332,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
blake256_cpu_setBlock_80(pdata, ptarget); blake256_cpu_setBlock_80(pdata, ptarget);
#if USE_CACHE #if USE_CACHE
crcsum = crc32(pdata, 64); crcsum = crc32_u32t(pdata, 64);
#endif #endif
do { do {
@ -360,7 +361,8 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
be32enc(&endiandata[19], extra_results[0]); be32enc(&endiandata[19], extra_results[0]);
blake256hash(vhashcpu, endiandata, blakerounds); blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) { if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
applog(LOG_NOTICE, "GPU found more than one result yippee!"); applog(LOG_NOTICE, "GPU found more than one result " CL_GRN "yippee!");
rc = 2;
} else { } else {
extra_results[0] = MAXU; extra_results[0] = MAXU;
} }
@ -379,9 +381,14 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
} }
} }
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
pdata[19] = max_nonce - first_nonce + 1;
break;
}
pdata[19] += throughput; pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);
exit_scan: exit_scan:
*hashes_done = pdata[19] - first_nonce + 1; *hashes_done = pdata[19] - first_nonce + 1;
@ -394,66 +401,6 @@ exit_scan:
} }
#endif #endif
// wait proper end of all threads // wait proper end of all threads
cudaDeviceSynchronize(); //cudaDeviceSynchronize();
return rc; return rc;
} }
static uint32_t crc32_tab[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
uint32_t crc32(const uint32_t *buf, size_t size)
{
const uint8_t *p;
uint32_t crc = 0;
p = (uint8_t *) buf;
crc = crc ^ ~0U;
while (size--)
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
return crc ^ ~0U;
}

View File

@ -241,6 +241,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType> <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization> <Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
</ClCompile> </ClCompile>
<ClCompile Include="crc32.c" />
<ClCompile Include="fuguecoin.cpp" /> <ClCompile Include="fuguecoin.cpp" />
<ClCompile Include="groestlcoin.cpp" /> <ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hashlog.cpp" /> <ClCompile Include="hashlog.cpp" />
@ -404,6 +405,12 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath> <FastMath>true</FastMath>
</CudaCompile> </CudaCompile>
<CudaCompile Include="pentablake.cu">
<MaxRegCount>80</MaxRegCount>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath>
</CudaCompile>
<CudaCompile Include="quark\animecoin.cu"> <CudaCompile Include="quark\animecoin.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>

View File

@ -96,6 +96,9 @@
<ClCompile Include="cpu-miner.c"> <ClCompile Include="cpu-miner.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="crc32.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="hefty1.c"> <ClCompile Include="hefty1.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
@ -442,5 +445,8 @@
<CudaCompile Include="blake32.cu"> <CudaCompile Include="blake32.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="pentablake.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.09.06]) AC_INIT([ccminer], [2014.09.09])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@ -136,8 +136,9 @@ typedef enum {
ALGO_JACKPOT, ALGO_JACKPOT,
ALGO_MJOLLNIR, /* Mjollnir hash */ ALGO_MJOLLNIR, /* Mjollnir hash */
ALGO_MYR_GR, ALGO_MYR_GR,
ALGO_QUARK,
ALGO_NIST5, ALGO_NIST5,
ALGO_PENTABLAKE,
ALGO_QUARK,
ALGO_WHC, ALGO_WHC,
ALGO_X11, ALGO_X11,
ALGO_X13, ALGO_X13,
@ -159,6 +160,7 @@ static const char *algo_names[] = {
"mjollnir", "mjollnir",
"myr-gr", "myr-gr",
"nist5", "nist5",
"penta",
"quark", "quark",
"whirl", "whirl",
"x11", "x11",
@ -170,7 +172,6 @@ static const char *algo_names[] = {
}; };
bool opt_debug = false; bool opt_debug = false;
bool opt_debug_rpc = false;
bool opt_protocol = false; bool opt_protocol = false;
bool opt_benchmark = false; bool opt_benchmark = false;
bool want_longpoll = true; bool want_longpoll = true;
@ -179,7 +180,7 @@ bool want_stratum = true;
bool have_stratum = false; bool have_stratum = false;
static bool submit_old = false; static bool submit_old = false;
bool use_syslog = false; bool use_syslog = false;
bool use_colors = false; bool use_colors = true;
static bool opt_background = false; static bool opt_background = false;
bool opt_quiet = false; bool opt_quiet = false;
static int opt_retries = -1; static int opt_retries = -1;
@ -242,6 +243,7 @@ Options:\n\
mjollnir Mjollnircoin hash\n\ mjollnir Mjollnircoin hash\n\
myr-gr Myriad-Groestl hash\n\ myr-gr Myriad-Groestl hash\n\
nist5 NIST5 (TalkCoin) hash\n\ nist5 NIST5 (TalkCoin) hash\n\
penta Pentablake hash (5x Blake 512)\n\
quark Quark hash\n\ quark Quark hash\n\
whirl Whirlcoin (old whirlpool)\n\ whirl Whirlcoin (old whirlpool)\n\
x11 X11 (DarkCoin) hash\n\ x11 X11 (DarkCoin) hash\n\
@ -437,7 +439,7 @@ static int share_result(int result, const char *reason)
(result ? CL_GRN "yay!!!" : CL_RED "booooo") (result ? CL_GRN "yay!!!" : CL_RED "booooo")
: (result ? "(yay!!!)" : "(booooo)")); : (result ? "(yay!!!)" : "(booooo)"));
if (reason && !opt_quiet) { if (reason) {
applog(LOG_WARNING, "reject reason: %s", reason); applog(LOG_WARNING, "reject reason: %s", reason);
if (strncmp(reason, "low difficulty share", 20) == 0) { if (strncmp(reason, "low difficulty share", 20) == 0) {
opt_difficulty = (opt_difficulty * 2.0) / 3.0; opt_difficulty = (opt_difficulty * 2.0) / 3.0;
@ -457,11 +459,14 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
bool rc = false; bool rc = false;
/* pass if the previous hash is not the current previous hash */ /* pass if the previous hash is not the current previous hash */
pthread_mutex_lock(&g_work_lock);
if (memcmp(work->data + 1, g_work.data + 1, 32)) { if (memcmp(work->data + 1, g_work.data + 1, 32)) {
pthread_mutex_unlock(&g_work_lock);
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding"); applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
return true; return true;
} }
pthread_mutex_unlock(&g_work_lock);
if (have_stratum) { if (have_stratum) {
uint32_t sent; uint32_t sent;
@ -544,10 +549,6 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
json_decref(val); json_decref(val);
} }
if (opt_debug_rpc) {
applog(LOG_DEBUG, "submit: %s", s);
}
rc = true; rc = true;
out: out:
@ -786,13 +787,20 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size); memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
/* Generate merkle root */ /* Generate merkle root */
if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR) switch (opt_algo) {
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); case ALGO_HEAVY:
else case ALGO_MJOLLNIR:
if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WHC || opt_algo == ALGO_BLAKECOIN) heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root); break;
else case ALGO_FUGUE256:
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); case ALGO_GROESTL:
case ALGO_BLAKECOIN:
case ALGO_WHC:
SHA256((uint8_t*)sctx->job.coinbase, sctx->job.coinbase_size, (uint8_t*)merkle_root);
break;
default:
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
}
for (i = 0; i < sctx->job.merkle_count; i++) { for (i = 0; i < sctx->job.merkle_count; i++) {
memcpy(merkle_root + 32, sctx->job.merkle[i], 32); memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
@ -864,7 +872,9 @@ static void *miner_thread(void *userdata)
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1); uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - (thr_id + 1);
unsigned char *scratchbuf = NULL; unsigned char *scratchbuf = NULL;
bool work_done = false; bool work_done = false;
bool extrajob = false;
char s[16]; char s[16];
int rc = 0;
memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
@ -891,20 +901,31 @@ static void *miner_thread(void *userdata)
struct timeval tv_start, tv_end, diff; struct timeval tv_start, tv_end, diff;
int64_t max64; int64_t max64;
uint64_t umax64; uint64_t umax64;
int rc;
// &work.data[19] // &work.data[19]
int wcmplen = 76; int wcmplen = 76;
uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen); uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
if (have_stratum) { if (have_stratum) {
while (time(NULL) >= (g_work_time + opt_scantime) && !work_done) uint32_t sleeptime = 0;
usleep(500*1000); while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
work_done = false; usleep(100*1000);
pthread_mutex_lock(&g_work_lock); if (sleeptime > 4) {
extrajob = true;
break;
}
sleeptime++;
}
if (sleeptime && opt_debug && !opt_quiet)
applog(LOG_DEBUG, "sleeptime: %u ms", sleeptime*100);
nonceptr = (uint32_t*) (((char*)work.data) + wcmplen); nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
if ((*nonceptr) >= end_nonce) pthread_mutex_lock(&g_work_lock);
extrajob |= work_done;
if ((*nonceptr) >= end_nonce || extrajob) {
work_done = false;
extrajob = false;
stratum_gen_work(&stratum, &g_work); stratum_gen_work(&stratum, &g_work);
}
} else { } else {
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime; int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
/* obtain new work from internal workio thread */ /* obtain new work from internal workio thread */
@ -920,34 +941,54 @@ static void *miner_thread(void *userdata)
g_work_time = time(NULL); g_work_time = time(NULL);
} }
} }
#if 0
if (!opt_benchmark && g_work.xnonce2_len == 0) {
applog(LOG_ERR, "work data not read yet");
extrajob = true;
work_done = true;
sleep(1);
continue;
}
#endif
if (rc > 1) {
/* if we found more than one on last loop */
/* todo: handle an array to get them directly */
pthread_mutex_unlock(&g_work_lock);
goto continue_scan;
}
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
if (opt_debug) {
applog(LOG_DEBUG, "job %s target change:", g_work.job_id);
applog_hash((uint8_t*) work.target);
applog_compare_hash((uint8_t*) g_work.target, (uint8_t*) work.target);
}
memcpy(work.target, g_work.target, sizeof(work.target));
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
/* on new target, ignoring nonce, clear sent data (hashlog) */
if (memcmp(work.target, g_work.target, sizeof(work.target))) {
hashlog_purge_job(work.job_id);
}
}
if (memcmp(work.data, g_work.data, wcmplen)) { if (memcmp(work.data, g_work.data, wcmplen)) {
if (opt_debug) { if (opt_debug) {
applog(LOG_DEBUG, "job %s work updated", g_work.job_id); for (int n=0; n <= (wcmplen-8); n+=8) {
for (int n=0; n<wcmplen; n+=8) {
if (memcmp(work.data + n, g_work.data + n, 8)) { if (memcmp(work.data + n, g_work.data + n, 8)) {
applog(LOG_DEBUG, "diff detected at offset %d", n); applog(LOG_DEBUG, "job %s work updated at offset %d:", g_work.job_id, n);
applog_hash((uint8_t*) work.data + n); applog_hash((uint8_t*) work.data + n);
applog_hash((uint8_t*) g_work.data + n); applog_compare_hash((uint8_t*) g_work.data + n, (uint8_t*) work.data + n);
} }
} }
} }
memcpy(&work, &g_work, sizeof(struct work)); memcpy(&work, &g_work, sizeof(struct work));
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr (*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
} else if (memcmp(work.target, g_work.target, sizeof(work.target))) {
if (opt_debug) {
applog(LOG_DEBUG, "job %s target change", g_work.job_id);
applog_hash((uint8_t*) work.target);
applog_hash((uint8_t*) g_work.target);
}
memcpy(work.target, g_work.target, sizeof(work.target));
(*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr
} else } else
(*nonceptr)++; //?? (*nonceptr)++; //??
pthread_mutex_unlock(&g_work_lock);
work_restart[thr_id].restart = 0; work_restart[thr_id].restart = 0;
if (opt_debug) if (opt_debug)
applog(LOG_WARNING, "job %s %08x", g_work.job_id, (*nonceptr)); applog(LOG_WARNING, "job %s %08x", g_work.job_id, (*nonceptr));
pthread_mutex_unlock(&g_work_lock);
/* adjust max_nonce to meet target scan time */ /* adjust max_nonce to meet target scan time */
if (have_stratum) if (have_stratum)
@ -959,15 +1000,18 @@ static void *miner_thread(void *userdata)
max64 *= (int64_t)thr_hashrates[thr_id]; max64 *= (int64_t)thr_hashrates[thr_id];
if (max64 <= 0) { if (max64 <= 0) {
/* should not be set too high,
else you can miss multiple nounces */
switch (opt_algo) { switch (opt_algo) {
case ALGO_JACKPOT: case ALGO_JACKPOT:
max64 = 0x1fffLL; max64 = 0x1fffLL;
break; break;
case ALGO_BLAKECOIN: case ALGO_BLAKECOIN:
max64 = 0x3ffffffLL; max64 = 0x3ffffffLL;
break;
case ALGO_BLAKE: case ALGO_BLAKE:
/* based on the 750Ti hashrate (100kH) */ /* based on the 750Ti hashrate (100kH) */
max64 = 0x3ffffffLL; max64 = 0x1ffffffLL;
break; break;
default: default:
max64 = 0xfffffLL; max64 = 0xfffffLL;
@ -1000,12 +1044,12 @@ static void *miner_thread(void *userdata)
stall |= (start_nonce > range.scanned[0] && start_nonce < range.scanned[1]); stall |= (start_nonce > range.scanned[0] && start_nonce < range.scanned[1]);
if (stall) { if (stall) {
if (opt_algo) if (opt_debug && !opt_quiet)
applog(LOG_DEBUG, "job done, wait for a new one..."); applog(LOG_DEBUG, "job done, wait for a new one...");
work_restart[thr_id].restart = 1; work_restart[thr_id].restart = 1;
hashlog_purge_old(); hashlog_purge_old();
// wait a bit for a new job... // wait a bit for a new job...
sleep(1); usleep(500*1000);
(*nonceptr) = end_nonce + 1; (*nonceptr) = end_nonce + 1;
work_done = true; work_done = true;
continue; continue;
@ -1023,6 +1067,7 @@ static void *miner_thread(void *userdata)
(*nonceptr) = start_nonce; (*nonceptr) = start_nonce;
hashes_done = 0; hashes_done = 0;
continue_scan:
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
/* scan nonces for a proof-of-work hash */ /* scan nonces for a proof-of-work hash */
@ -1089,6 +1134,11 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_PENTABLAKE:
rc = scanhash_pentablake(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_WHC: case ALGO_WHC:
rc = scanhash_whc(thr_id, work.data, work.target, rc = scanhash_whc(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done);
@ -1133,8 +1183,11 @@ static void *miner_thread(void *userdata)
timeval_subtract(&diff, &tv_end, &tv_start); timeval_subtract(&diff, &tv_end, &tv_start);
if (diff.tv_usec || diff.tv_sec) { if (diff.tv_usec || diff.tv_sec) {
pthread_mutex_lock(&stats_lock); pthread_mutex_lock(&stats_lock);
thr_hashrates[thr_id] = if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) {
hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
if (rc > 1)
thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec);
}
pthread_mutex_unlock(&stats_lock); pthread_mutex_unlock(&stats_lock);
} }
if (!opt_quiet) { if (!opt_quiet) {
@ -1342,7 +1395,6 @@ static void *stratum_thread(void *userdata)
pthread_mutex_lock(&g_work_lock); pthread_mutex_lock(&g_work_lock);
stratum_gen_work(&stratum, &g_work); stratum_gen_work(&stratum, &g_work);
time(&g_work_time); time(&g_work_time);
pthread_mutex_unlock(&g_work_lock);
if (stratum.job.clean) { if (stratum.job.clean) {
if (!opt_quiet) if (!opt_quiet)
applog(LOG_BLUE, "%s send a new %s block %d", short_url, algo_names[opt_algo], applog(LOG_BLUE, "%s send a new %s block %d", short_url, algo_names[opt_algo],
@ -1353,6 +1405,7 @@ static void *stratum_thread(void *userdata)
applog(LOG_BLUE, "%s send job %d for block %d", short_url, applog(LOG_BLUE, "%s send job %d for block %d", short_url,
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height); strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
} }
pthread_mutex_unlock(&g_work_lock);
} }
if (!stratum_socket_full(&stratum, 120)) { if (!stratum_socket_full(&stratum, 120)) {
@ -1374,7 +1427,7 @@ out:
return NULL; return NULL;
} }
#define PROGRAM_VERSION "1.4.1" #define PROGRAM_VERSION "1.4.2"
static void show_version_and_exit(void) static void show_version_and_exit(void)
{ {
printf("%s v%s\n" printf("%s v%s\n"
@ -1438,12 +1491,11 @@ static void parse_arg (int key, char *arg)
case 'C': case 'C':
use_colors = true; use_colors = true;
break; break;
case 'q':
opt_quiet = true;
break;
case 'D': case 'D':
opt_debug = true; opt_debug = true;
opt_debug_rpc = true; break;
case 'q':
opt_quiet = true;
break; break;
case 'p': case 'p':
free(rpc_pass); free(rpc_pass);

119
crc32.c Normal file
View File

@ -0,0 +1,119 @@
/*-
* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
* code or tables extracted from it, as desired without restriction.
*
* First, the polynomial itself and its table of feedback terms. The
* polynomial is
* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
*
* Note that we take it "backwards" and put the highest-order term in
* the lowest-order bit. The X^32 term is "implied"; the LSB is the
* X^31 term, etc. The X^0 term (usually shown as "+1") results in
* the MSB being 1
*
* Note that the usual hardware shift register implementation, which
* is what we're using (we're merely optimizing it by doing eight-bit
* chunks at a time) shifts bits into the lowest-order term. In our
* implementation, that means shifting towards the right. Why do we
* do it this way? Because the calculated CRC must be transmitted in
* order from highest-order term to lowest-order term. UARTs transmit
* characters in order from LSB to MSB. By storing the CRC this way
* we hand it to the UART in the order low-byte to high-byte; the UART
* sends each low-bit to hight-bit; and the result is transmission bit
* by bit from highest- to lowest-order term without requiring any bit
* shuffling on our part. Reception works similarly
*
* The feedback terms table consists of 256, 32-bit entries. Notes
*
* The table can be generated at runtime if desired; code to do so
* is shown later. It might not be obvious, but the feedback
* terms simply represent the results of eight shift/xor opera
* tions for all combinations of data and CRC register values
*
* The values must be right-shifted by eight bits by the "updcrc
* logic; the shift must be unsigned (bring in zeroes). On some
* hardware you could probably optimize the shift in assembler by
* using byte-swap instructions
* polynomial $edb88320
*
*
* CRC32 code derived from work by Gary S. Brown.
*/
#include <stdlib.h>
#include <stdint.h>
static uint32_t crc32_tab[] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
/* Real CRC32 Function */
extern uint32_t crc32(uint32_t crc, const void *buf, size_t size)
{
const uint8_t *p;
p = buf;
crc = crc ^ ~0U;
while (size--)
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
return crc ^ ~0U;
}
/* CRC32 Function simplified for ccminer */
extern uint32_t crc32_u32t(const uint32_t *buf, size_t size)
{
const uint8_t *p;
uint32_t crc = 0;
p = (uint8_t *) buf;
crc = crc ^ ~0U;
while (size--)
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
return crc ^ ~0U;
}

View File

@ -12,6 +12,8 @@
#include <stdint.h> #include <stdint.h>
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
extern __device__ __device_builtin__ void __syncthreads(void); extern __device__ __device_builtin__ void __syncthreads(void);
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__

View File

@ -5,10 +5,12 @@ extern "C"
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
#include "sph/sph_jh.h" #include "sph/sph_jh.h"
#include "sph/sph_keccak.h" #include "sph/sph_keccak.h"
#include "miner.h"
#include "cuda_helper.h"
} }
#include "miner.h"
#include "cuda_helper.h"
// aus cpu-miner.c // aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -74,9 +76,6 @@ extern "C" void nist5hash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
extern bool opt_benchmark;
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)
@ -84,7 +83,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff; ((uint32_t*)ptarget)[7] = 0x00FF;
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];

View File

@ -249,6 +249,10 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_whc(int thr_id, uint32_t *pdata, extern int scanhash_whc(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
@ -284,8 +288,8 @@ struct work_restart {
char padding[128 - sizeof(unsigned long)]; char padding[128 - sizeof(unsigned long)];
}; };
extern bool opt_benchmark;
extern bool opt_debug; extern bool opt_debug;
extern bool opt_debug_rpc;
extern bool opt_quiet; extern bool opt_quiet;
extern bool opt_protocol; extern bool opt_protocol;
extern int opt_timeout; extern int opt_timeout;
@ -417,6 +421,7 @@ size_t time2str(char* buf, time_t timer);
char* atime2str(time_t timer); char* atime2str(time_t timer);
void applog_hash(unsigned char *hash); void applog_hash(unsigned char *hash);
void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
void print_hash_tests(void); void print_hash_tests(void);
void animehash(void *state, const void *input); void animehash(void *state, const void *input);
@ -428,6 +433,7 @@ unsigned int jackpothash(void *state, const void *input);
void groestlhash(void *state, const void *input); void groestlhash(void *state, const void *input);
void myriadhash(void *state, const void *input); void myriadhash(void *state, const void *input);
void nist5hash(void *state, const void *input); void nist5hash(void *state, const void *input);
void pentablakehash(void *output, const void *input);
void quarkhash(void *state, const void *input); void quarkhash(void *state, const void *input);
void wcoinhash(void *state, const void *input); void wcoinhash(void *state, const void *input);
void x11hash(void *output, const void *input); void x11hash(void *output, const void *input);

600
pentablake.cu Normal file
View File

@ -0,0 +1,600 @@
/**
* Penta Blake-512 Cuda Kernel (Tested on SM 5.0)
*
* Tanguy Pruvot - Aug. 2014
*/
#include "miner.h"
extern "C" {
#include "sph/sph_blake.h"
#include <stdint.h>
#include <memory.h>
}
/* threads per block */
#define TPB 192
/* hash by cpu with blake 256 */
extern "C" void pentablakehash(void *output, const void *input)
{
unsigned char hash[128];
#define hashB hash + 64
sph_blake512_context ctx;
sph_blake512_init(&ctx);
sph_blake512(&ctx, input, 80);
sph_blake512_close(&ctx, hash);
sph_blake512(&ctx, hash, 64);
sph_blake512_close(&ctx, hashB);
sph_blake512(&ctx, hashB, 64);
sph_blake512_close(&ctx, hash);
sph_blake512(&ctx, hash, 64);
sph_blake512_close(&ctx, hashB);
sph_blake512(&ctx, hashB, 64);
sph_blake512_close(&ctx, hash);
memcpy(output, hash, 32);
}
#include "cuda_helper.h"
#define MAXU 0xffffffffU
// in cpu-miner.c
extern bool opt_n_threads;
extern bool opt_benchmark;
extern int device_map[8];
__constant__
static uint32_t __align__(32) c_Target[8];
__constant__
static uint64_t __align__(32) c_data[32];
static uint32_t *d_hash[8];
static uint32_t *d_resNounce[8];
static uint32_t *h_resNounce[8];
static uint32_t extra_results[2] = { MAXU, MAXU };
/* prefer uint32_t to prevent size conversions = speed +5/10 % */
__constant__
static uint32_t __align__(32) c_sigma[16][16];
const uint32_t host_sigma[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};
__device__ __constant__
static const uint64_t __align__(32) c_IV512[8] = {
0x6a09e667f3bcc908ULL,
0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL,
0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL,
0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL,
0x5be0cd19137e2179ULL
};
__device__ __constant__
const uint64_t c_u512[16] =
{
0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL,
0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL,
0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL,
0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL,
0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL,
0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL,
0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL,
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
};
#define G(a,b,c,d,x) { \
uint32_t idx1 = c_sigma[i][x]; \
uint32_t idx2 = c_sigma[i][x+1]; \
v[a] += (m[idx1] ^ c_u512[idx2]) + v[b]; \
v[d] = ROTR64(v[d] ^ v[a], 32); \
v[c] += v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 25); \
v[a] += (m[idx2] ^ c_u512[idx1]) + v[b]; \
v[d] = ROTR64(v[d] ^ v[a], 16); \
v[c] += v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 11); \
}
// Hash-Padding
__device__ __constant__
static const uint64_t d_constHashPadding[8] = {
0x0000000000000080ull,
0,
0,
0,
0,
0x0100000000000000ull,
0,
0x0002000000000000ull
};
#if 0
__device__ __constant__
static const uint64_t __align__(32) c_Padding[16] = {
0, 0, 0, 0,
0x80000000ULL, 0, 0, 0,
0, 0, 0, 0,
0, 1, 0, 640,
};
__device__ static
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint32_t T0)
{
uint64_t v[16], m[16];
m[0] = block[0];
m[1] = block[1];
m[2] = block[2];
m[3] = block[3];
for (uint32_t i = 4; i < 16; i++) {
m[i] = (T0 == 0x200) ? block[i] : c_Padding[i];
}
//#pragma unroll 8
for(uint32_t i = 0; i < 8; i++)
v[i] = h[i];
v[ 8] = c_u512[0];
v[ 9] = c_u512[1];
v[10] = c_u512[2];
v[11] = c_u512[3];
v[12] = xor1(c_u512[4], T0);
v[13] = xor1(c_u512[5], T0);
v[14] = c_u512[6];
v[15] = c_u512[7];
for (uint32_t i = 0; i < 16; i++) {
/* column step */
G(0, 4, 0x8, 0xC, 0x0);
G(1, 5, 0x9, 0xD, 0x2);
G(2, 6, 0xA, 0xE, 0x4);
G(3, 7, 0xB, 0xF, 0x6);
/* diagonal step */
G(0, 5, 0xA, 0xF, 0x8);
G(1, 6, 0xB, 0xC, 0xA);
G(2, 7, 0x8, 0xD, 0xC);
G(3, 4, 0x9, 0xE, 0xE);
}
//#pragma unroll 16
for (uint32_t i = 0; i < 16; i++) {
uint32_t j = i % 8;
h[j] ^= v[i];
}
}
__global__
void pentablake_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
const uint32_t nounce = startNounce + thread;
uint64_t h[8];
#pragma unroll
for(int i=0; i<8; i++) {
h[i] = c_IV512[i];
}
uint64_t ending[4];
ending[0] = c_data[16];
ending[1] = c_data[17];
ending[2] = c_data[18];
ending[3] = nounce; /* our tested value */
pentablake_compress(h, ending, 640);
// -----------------------------------
for (int r = 0; r < 4; r++) {
uint64_t data[8];
for (int i = 0; i < 7; i++) {
data[i] = h[i];
}
pentablake_compress(h, data, 512); /* todo: use h,h when ok*/
}
}
}
#endif
__device__ static
void pentablake_compress(uint64_t *h, const uint64_t *block, const uint64_t T0)
{
uint64_t v[16], m[16], i;
#pragma unroll 16
for(i = 0; i < 16; i++) {
m[i] = cuda_swab64(block[i]);
}
#pragma unroll 8
for (i = 0; i < 8; i++)
v[i] = h[i];
v[ 8] = c_u512[0];
v[ 9] = c_u512[1];
v[10] = c_u512[2];
v[11] = c_u512[3];
v[12] = c_u512[4] ^ T0;
v[13] = c_u512[5] ^ T0;
v[14] = c_u512[6];
v[15] = c_u512[7];
//#pragma unroll 16
for( i = 0; i < 16; i++)
{
/* column step */
G(0, 4, 0x8, 0xC, 0x0);
G(1, 5, 0x9, 0xD, 0x2);
G(2, 6, 0xA, 0xE, 0x4);
G(3, 7, 0xB, 0xF, 0x6);
/* diagonal step */
G(0, 5, 0xA, 0xF, 0x8);
G(1, 6, 0xB, 0xC, 0xA);
G(2, 7, 0x8, 0xD, 0xC);
G(3, 4, 0x9, 0xE, 0xE);
}
//#pragma unroll 16
for (i = 0; i < 16; i++) {
uint32_t idx = i % 8;
h[idx] ^= v[i];
}
}
__global__
void pentablake_gpu_hash_80(int threads, const uint32_t startNounce, void *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint64_t h[8];
uint64_t buf[16];
uint32_t nounce = startNounce + thread;
//#pragma unroll 8
for(int i=0; i<8; i++)
h[i] = c_IV512[i];
//#pragma unroll 16
for (int i=0; i < 16; i++)
buf[i] = c_data[i];
// The test Nonce
((uint32_t*)buf)[19] = cuda_swab32(nounce);
pentablake_compress(h, buf, 640ULL);
#if __CUDA_ARCH__ < 300
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
#pragma unroll 8
for (uint32_t i=0; i < 8; i++) {
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
}
#else
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
for (uint32_t i=0; i < 8; i++) {
outHash[i] = cuda_swab64( h[i] );
}
#endif
}
}
__host__
void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce, uint32_t *d_outputHash, int order)
{
const int threadsperblock = TPB;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
//MyStreamSynchronize(NULL, order, thr_id);
cudaDeviceSynchronize();
}
__global__
void pentablake_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint64_t *inpHash = &g_hash[thread<<3]; // hashPosition * 8
uint64_t buf[16]; // 128 Bytes
uint64_t h[8]; // State
#pragma unroll 8
for (int i=0; i<8; i++)
h[i] = c_IV512[i];
// Message for first round
#pragma unroll 8
for (int i=0; i < 8; ++i)
buf[i] = inpHash[i];
#pragma unroll 8
for (int i=0; i < 8; i++)
buf[i+8] = d_constHashPadding[i];
// Ending round
pentablake_compress(h, buf, 512);
#if __CUDA_ARCH__ < 300
uint32_t *outHash = (uint32_t*)&g_hash[thread<<3];
#pragma unroll 8
for (int i=0; i < 8; i++) {
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
}
#else
uint64_t *outHash = &g_hash[thread<<3];
for (int i=0; i < 8; i++) {
outHash[i] = cuda_swab64(h[i]);
}
#endif
}
}
__host__
void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
{
const int threadsperblock = TPB;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
//MyStreamSynchronize(NULL, order, thr_id);
cudaDeviceSynchronize();
}
#if 0
__host__
uint32_t pentablake_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce)
{
const int threadsperblock = TPB;
uint32_t result = MAXU;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
return result;
pentablake_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]);
cudaDeviceSynchronize();
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
cudaThreadSynchronize();
result = h_resNounce[thr_id][0];
extra_results[0] = h_resNounce[thr_id][1];
}
return result;
}
#endif
__global__
void pentablake_gpu_check_hash(uint32_t threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *resNounce)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
uint32_t *inpHash = &g_hash[thread<<4];
uint32_t h[8];
#pragma unroll 8
for (int i=0; i < 8; i++)
h[i] = inpHash[i];
for (int i = 7; i >= 0; i--) {
uint32_t hash = h[i]; // cuda_swab32(h[i]);
if (hash > c_Target[i]) {
return;
}
if (hash < c_Target[i]) {
break;
}
}
/* keep the smallest nounce, + extra one if found */
if (resNounce[0] > nounce) {
resNounce[1] = resNounce[0];
resNounce[0] = nounce;
}
else
resNounce[1] = nounce;
}
}
__host__ static
uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, int order)
{
const int threadsperblock = TPB;
uint32_t result = MAXU;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
/* Check error on Ctrl+C or kill to prevent segfaults on exit */
if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
return result;
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
CUDA_SAFE_CALL(cudaDeviceSynchronize());
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
cudaThreadSynchronize();
result = h_resNounce[thr_id][0];
extra_results[0] = h_resNounce[thr_id][1];
}
return result;
}
__host__
void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
{
uint8_t data[128];
memcpy((void*) data, (void*) pdata, 80);
memset(data+80, 0, 48);
// to swab...
data[80] = 0x80;
data[111] = 1;
data[126] = 0x02;
data[127] = 0x80;
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_sigma, host_sigma, sizeof(host_sigma), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
}
extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
uint32_t throughput = min(128 * 2560, max_nonce - first_nonce);
uint32_t endiandata[20];
int rc = 0;
if (extra_results[0] != MAXU) {
// possible extra result found in previous call
if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) {
pdata[19] = extra_results[0];
*hashes_done = pdata[19] - first_nonce + 1;
extra_results[0] = MAXU;
rc = 1;
goto exit_scan;
}
}
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000F;
if (!init[thr_id]) {
if (opt_n_threads > 1) {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
init[thr_id] = true;
}
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
pentablake_cpu_setBlock_80(endiandata, ptarget);
do {
int order = 0;
// GPU HASH
pentablake_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
uint32_t foundNonce = pentablake_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != MAXU)
{
uint32_t vhashcpu[8];
uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce);
pentablakehash(vhashcpu, endiandata);
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
{
pdata[19] = foundNonce;
rc = 1;
// Rare but possible if the throughput is big
be32enc(&endiandata[19], extra_results[0]);
pentablakehash(vhashcpu, endiandata);
if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
applog(LOG_NOTICE, "GPU found more than one result yippee!");
rc = 2;
} else {
extra_results[0] = MAXU;
}
goto exit_scan;
}
else if (vhashcpu[7] > Htarg) {
applog(LOG_WARNING, "GPU #%d: result for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[7], Htarg);
}
else if (vhashcpu[6] > ptarget[6]) {
applog(LOG_WARNING, "GPU #%d: hash[6] for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[6], ptarget[6]);
}
else {
applog(LOG_WARNING, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
exit_scan:
*hashes_done = pdata[19] - first_nonce + 1;
#if 0
/* reset the device to allow multiple instances
* could be made in cpu-miner... check later if required */
if (opt_n_threads == 1) {
CUDA_SAFE_CALL(cudaDeviceReset());
init[thr_id] = false;
}
#endif
cudaDeviceSynchronize();
return rc;
}

View File

@ -50,59 +50,60 @@ const uint64_t c_u512[16] =
0x0801f2e2858efc16ULL, 0x636920d871574e69ULL 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
}; };
#define G(a,b,c,d,e) \ #define G(a,b,c,d,x) { \
v[a] += (m[sigma[i][e]] ^ u512[sigma[i][e+1]]) + v[b];\ uint32_t idx1 = sigma[i][x]; \
v[d] = ROTR( v[d] ^ v[a],32); \ uint32_t idx2 = sigma[i][x+1]; \
v[c] += v[d]; \ v[a] += (m[idx1] ^ u512[idx2]) + v[b]; \
v[b] = ROTR( v[b] ^ v[c],25); \ v[d] = ROTR( v[d] ^ v[a], 32); \
v[a] += (m[sigma[i][e+1]] ^ u512[sigma[i][e]])+v[b]; \ v[c] += v[d]; \
v[d] = ROTR( v[d] ^ v[a],16); \ v[b] = ROTR( v[b] ^ v[c], 25); \
v[c] += v[d]; \ v[a] += (m[idx2] ^ u512[idx1]) + v[b]; \
v[b] = ROTR( v[b] ^ v[c],11); v[d] = ROTR( v[d] ^ v[a], 16); \
v[c] += v[d]; \
v[b] = ROTR( v[b] ^ v[c], 11); \
}
__device__ static __device__ static
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int bits ) void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
{ {
uint64_t v[16], m[16], i; uint64_t v[16], m[16], i;
#pragma unroll 16 #pragma unroll 16
for( i = 0; i < 16; ++i ) { for( i = 0; i < 16; i++) {
m[i] = cuda_swab64(block[i]); m[i] = cuda_swab64(block[i]);
} }
#pragma unroll 8 #pragma unroll 8
for( i = 0; i < 8; ++i ) v[i] = h[i]; for (i = 0; i < 8; i++)
v[i] = h[i];
v[ 8] = u512[0]; v[ 8] = u512[0];
v[ 9] = u512[1]; v[ 9] = u512[1];
v[10] = u512[2]; v[10] = u512[2];
v[11] = u512[3]; v[11] = u512[3];
v[12] = u512[4]; v[12] = u512[4] ^ T0;
v[13] = u512[5]; v[13] = u512[5] ^ T0;
v[14] = u512[6]; v[14] = u512[6];
v[15] = u512[7]; v[15] = u512[7];
v[12] ^= bits; //#pragma unroll 16
v[13] ^= bits; for( i = 0; i < 16; ++i )
{
/* column step */
G( 0, 4, 8, 12, 0 );
G( 1, 5, 9, 13, 2 );
G( 2, 6, 10, 14, 4 );
G( 3, 7, 11, 15, 6 );
/* diagonal step */
G( 0, 5, 10, 15, 8 );
G( 1, 6, 11, 12, 10 );
G( 2, 7, 8, 13, 12 );
G( 3, 4, 9, 14, 14 );
}
//#pragma unroll 16 #pragma unroll 16
for( i = 0; i < 16; ++i ) for( i = 0; i < 16; ++i )
{ h[i % 8] ^= v[i];
/* column step */
G( 0, 4, 8, 12, 0 );
G( 1, 5, 9, 13, 2 );
G( 2, 6, 10, 14, 4 );
G( 3, 7, 11, 15, 6 );
/* diagonal step */
G( 0, 5, 10, 15, 8 );
G( 1, 6, 11, 12, 10 );
G( 2, 7, 8, 13, 12 );
G( 3, 4, 9, 14, 14 );
}
#pragma unroll 16
for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i];
} }
__device__ __constant__ __device__ __constant__
@ -114,7 +115,8 @@ static const uint64_t d_constMem[8] = {
0x510e527fade682d1ULL, 0x510e527fade682d1ULL,
0x9b05688c2b3e6c1fULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x1f83d9abfb41bd6bULL,
0x5be0cd19137e2179ULL }; 0x5be0cd19137e2179ULL
};
// Hash-Padding // Hash-Padding
__device__ __constant__ __device__ __constant__
@ -126,7 +128,8 @@ static const uint64_t d_constHashPadding[8] = {
0, 0,
0x0100000000000000ull, 0x0100000000000000ull,
0, 0,
0x0002000000000000ull }; 0x0002000000000000ull
};
__global__ __launch_bounds__(256, 4) __global__ __launch_bounds__(256, 4)
void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
@ -145,48 +148,42 @@ void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_n
if (thread < threads) if (thread < threads)
#endif #endif
{ {
uint8_t i;
// bestimme den aktuellen Zähler
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce; int hashPosition = nounce - startNounce;
uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8 uint64_t *inpHash = &g_hash[hashPosition<<3]; // hashPosition * 8
// 128 Byte für die Message // 128 Bytes
uint64_t buf[16]; uint64_t buf[16];
// State vorbereiten // State
uint64_t h[8]; uint64_t h[8];
#pragma unroll 8 #pragma unroll 8
for (i=0;i<8;i++) for (int i=0;i<8;i++)
h[i] = d_constMem[i]; h[i] = d_constMem[i];
// Message für die erste Runde in Register holen // Message for first round
#pragma unroll 8 #pragma unroll 8
for (i=0; i < 8; ++i) for (int i=0; i < 8; ++i)
buf[i] = inpHash[i]; buf[i] = inpHash[i];
#pragma unroll 8 #pragma unroll 8
for (i=0; i < 8; i++) for (int i=0; i < 8; i++)
buf[i+8] = d_constHashPadding[i]; buf[i+8] = d_constHashPadding[i];
// die einzige Hashing-Runde // Ending round
quark_blake512_compress( h, buf, c_sigma, c_u512, 512 ); quark_blake512_compress( h, buf, c_sigma, c_u512, 512 );
#if __CUDA_ARCH__ >= 130 #if __CUDA_ARCH__ <= 350
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition]; uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition];
#pragma unroll 8 #pragma unroll 8
for (i=0; i < 8; ++i) { for (int i=0; i < 8; i++) {
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) ); outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
} }
#else #else
// in dieser Version passieren auch ein paar 64 Bit Shifts
uint64_t *outHash = &g_hash[8 * hashPosition]; uint64_t *outHash = &g_hash[8 * hashPosition];
#pragma unroll 8 for (int i=0; i < 8; i++) {
for (i=0; i < 8; ++i)
{
outHash[i] = cuda_swab64(h[i]); outHash[i] = cuda_swab64(h[i]);
} }
#endif #endif
@ -198,45 +195,38 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo
int thread = (blockDim.x * blockIdx.x + threadIdx.x); int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
// State vorbereiten
uint64_t h[8]; uint64_t h[8];
// 128 Byte für die Message
uint64_t buf[16]; uint64_t buf[16];
uint8_t i;
// bestimme den aktuellen Zähler
uint32_t nounce = startNounce + thread; uint32_t nounce = startNounce + thread;
#pragma unroll 8 #pragma unroll 8
for(i=0;i<8;i++) for(int i=0; i<8; i++)
h[i] = d_constMem[i]; h[i] = d_constMem[i];
// Message für die erste Runde in Register holen // Message für die erste Runde in Register holen
#pragma unroll 16 #pragma unroll 16
for (i=0; i < 16; ++i) buf[i] = c_PaddedMessage80[i]; for (int i=0; i < 16; ++i)
buf[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen // The test Nonce
buf[9] = REPLACE_HIWORD(buf[9], cuda_swab32(nounce)); ((uint32_t*)buf)[19] = cuda_swab32(nounce);
// die einzige Hashing-Runde
quark_blake512_compress( h, buf, c_sigma, c_u512, 640 ); quark_blake512_compress( h, buf, c_sigma, c_u512, 640 );
// Hash rauslassen #if __CUDA_ARCH__ <= 350
#if __CUDA_ARCH__ >= 130
// ausschliesslich 32 bit Operationen sofern die SM1.3 double intrinsics verfügbar sind
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
#pragma unroll 8 #pragma unroll 8
for (i=0; i < 8; ++i) { for (uint32_t i=0; i < 8; i++) {
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) ); outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
} }
#else #else
// in dieser Version passieren auch ein paar 64 Bit Shifts
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread; uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
#pragma unroll 8 for (uint32_t i=0; i < 8; i++) {
for (i=0; i < 8; ++i) {
outHash[i] = cuda_swab64( h[i] ); outHash[i] = cuda_swab64( h[i] );
} }
#endif #endif
} }
} }

View File

@ -6,12 +6,12 @@ extern "C"
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
#include "sph/sph_jh.h" #include "sph/sph_jh.h"
#include "sph/sph_keccak.h" #include "sph/sph_keccak.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen // Speicher für Input/Output der verketteten Hashfunktionen
@ -70,76 +70,64 @@ extern "C" void quarkhash(void *state, const void *input)
unsigned char hash[64]; unsigned char hash[64];
sph_blake512_init(&ctx_blake); sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, input, 80); sph_blake512 (&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, (void*) hash); sph_blake512_close(&ctx_blake, (void*) hash);
sph_bmw512_init(&ctx_bmw); sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64); sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash); sph_bmw512_close(&ctx_bmw, (void*) hash);
if (hash[0] & 0x8) if (hash[0] & 0x8)
{ {
sph_groestl512_init(&ctx_groestl); sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64); sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash); sph_groestl512_close(&ctx_groestl, (void*) hash);
} }
else else
{ {
sph_skein512_init(&ctx_skein); sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64); sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash); sph_skein512_close(&ctx_skein, (void*) hash);
} }
sph_groestl512_init(&ctx_groestl); sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64); sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash); sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_jh512_init(&ctx_jh); sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64); sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash); sph_jh512_close(&ctx_jh, (void*) hash);
if (hash[0] & 0x8) if (hash[0] & 0x8)
{ {
sph_blake512_init(&ctx_blake); sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, (const void*) hash, 64); sph_blake512 (&ctx_blake, (const void*) hash, 64);
sph_blake512_close(&ctx_blake, (void*) hash); sph_blake512_close(&ctx_blake, (void*) hash);
} }
else else
{ {
sph_bmw512_init(&ctx_bmw); sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64); sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash); sph_bmw512_close(&ctx_bmw, (void*) hash);
} }
sph_keccak512_init(&ctx_keccak); sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64); sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash); sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_skein512_init(&ctx_skein); sph_skein512_init(&ctx_skein);
// SKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64); sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash); sph_skein512_close(&ctx_skein, (void*) hash);
if (hash[0] & 0x8) if (hash[0] & 0x8)
{ {
sph_keccak512_init(&ctx_keccak); sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64); sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash); sph_keccak512_close(&ctx_keccak, (void*) hash);
} }
else else
{ {
sph_jh512_init(&ctx_jh); sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64); sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash); sph_jh512_close(&ctx_jh, (void*) hash);
} }
@ -147,23 +135,17 @@ extern "C" void quarkhash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
extern bool opt_benchmark;
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const int throughput = 256*4096; // 100;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff; ((uint32_t*)ptarget)[7] = 0x00FF;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*4096; // 100;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
@ -252,11 +234,12 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], foundNonce);
quarkhash(vhash64, endiandata); quarkhash(vhash64, endiandata);
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) { if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
pdata[19] = foundNonce; pdata[19] = foundNonce;
*hashes_done = (foundNonce - first_nonce + 1)/2; *hashes_done = (foundNonce - first_nonce + 1)/2;

29
util.c
View File

@ -1020,7 +1020,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
int merkle_count, i; int merkle_count, i;
json_t *merkle_arr; json_t *merkle_arr;
unsigned char **merkle; unsigned char **merkle;
int ntime; int ntime, hoffset;
job_id = json_string_value(json_array_get(params, 0)); job_id = json_string_value(json_array_get(params, 0));
prevhash = json_string_value(json_array_get(params, 1)); prevhash = json_string_value(json_array_get(params, 1));
@ -1078,7 +1078,8 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
hex2bin(sctx->job.coinbase, coinb1, coinb1_size); hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size); memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + 43); hoffset = coinb1_size - 15; // 43;
sctx->bloc_height = le16dec((uint8_t*) sctx->job.coinbase + hoffset);
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id)) if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size); memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size); hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
@ -1125,7 +1126,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
sctx->next_diff = diff; sctx->next_diff = diff;
pthread_mutex_unlock(&sctx->work_lock); pthread_mutex_unlock(&sctx->work_lock);
applog(LOG_INFO, "Stratum difficulty set to %g", diff); applog(LOG_WARNING, "Stratum difficulty set to %g", diff);
return true; return true;
} }
@ -1221,10 +1222,6 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
id = json_object_get(val, "id"); id = json_object_get(val, "id");
params = json_object_get(val, "params"); params = json_object_get(val, "params");
if (opt_debug_rpc) {
applog(LOG_DEBUG, "method: %s", s);
}
if (!strcasecmp(method, "mining.notify")) { if (!strcasecmp(method, "mining.notify")) {
ret = stratum_notify(sctx, params); ret = stratum_notify(sctx, params);
goto out; goto out;
@ -1400,6 +1397,20 @@ static char* format_hash(char* buf, unsigned char *hash)
return buf; return buf;
} }
/* to debug diff in data */
extern void applog_compare_hash(unsigned char *hash, unsigned char *hash2)
{
char s[256] = "";
int len = 0;
for (int i=0; i < 32; i += 4) {
char *color = memcmp(hash+i, hash2+i, 4) ? CL_RED : CL_GRY;
len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color,
hash[i], hash[i+1], hash[i+2], hash[i+3]);
s[len] = '\0';
}
applog(LOG_DEBUG, "%s", s);
}
extern void applog_hash(unsigned char *hash) extern void applog_hash(unsigned char *hash)
{ {
char s[128] = {'\0'}; char s[128] = {'\0'};
@ -1457,6 +1468,10 @@ void print_hash_tests(void)
nist5hash(&hash[0], &buf[0]); nist5hash(&hash[0], &buf[0]);
printpfx("nist5", hash); printpfx("nist5", hash);
memset(hash, 0, sizeof hash);
pentablakehash(&hash[0], &buf[0]);
printpfx("pentablake", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
quarkhash(&hash[0], &buf[0]); quarkhash(&hash[0], &buf[0]);
printpfx("quark", hash); printpfx("quark", hash);

View File

@ -21,10 +21,9 @@ extern "C"
#include <memory.h> #include <memory.h>
} }
// aus cpu-miner.c // in cpu-miner.c
extern int device_map[8]; extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
@ -140,22 +139,17 @@ extern "C" void x11hash(void *output, const void *input)
} }
extern bool opt_benchmark;
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff; ((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*256*8;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
@ -186,8 +180,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
uint32_t foundNonce; const uint32_t Htarg = ptarget[7];
int order = 0; int order = 0;
uint32_t foundNonce;
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
@ -204,7 +200,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
// Scan nach Gewinner Hashes auf der GPU // Scan nach Gewinner Hashes auf der GPU
foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
uint32_t vhash64[8]; uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], foundNonce);

View File

@ -20,11 +20,11 @@ extern "C" {
#include "sph/sph_hamsi.h" #include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h" #include "sph/sph_fugue.h"
#include "sph/sph_shabal.h" #include "sph/sph_shabal.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// from cpu-miner.c // from cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -167,8 +167,6 @@ extern "C" void x14hash(void *output, const void *input)
} }
extern bool opt_benchmark;
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)

View File

@ -21,14 +21,11 @@ extern "C" {
#include "sph/sph_fugue.h" #include "sph/sph_fugue.h"
#include "sph/sph_shabal.h" #include "sph/sph_shabal.h"
#include "sph/sph_whirlpool.h" #include "sph/sph_whirlpool.h"
}
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
}
// to test gpu hash on a null buffer
#define NULLTEST 0
// from cpu-miner.c // from cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -92,8 +89,6 @@ extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// X15 CPU Hash function // X15 CPU Hash function
extern "C" void x15hash(void *output, const void *input) extern "C" void x15hash(void *output, const void *input)
{ {
@ -181,17 +176,6 @@ extern "C" void x15hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
#if NULLTEST
static void print_hash(unsigned char *hash)
{
for (int i=0; i < 32; i += 4) {
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
}
}
#endif
extern bool opt_benchmark;
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done) unsigned long *hashes_done)
@ -203,12 +187,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
uint32_t Htarg = ptarget[7]; uint32_t Htarg = ptarget[7];
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; ((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
#if NULLTEST
for (int k=0; k < 20; k++)
pdata[k] = 0;
#endif
if (!init[thr_id]) if (!init[thr_id])
{ {
@ -259,12 +238,6 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
#if NULLTEST
uint32_t buf[8]; memset(buf, 0, sizeof buf);
CUDA_SAFE_CALL(cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaThreadSynchronize());
print_hash((unsigned char*)buf); printf("\n");
#endif
/* Scan with GPU */ /* Scan with GPU */
uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);

View File

@ -26,17 +26,15 @@ extern "C"
#include "sph/sph_sha2.h" #include "sph/sph_sha2.h"
#include "sph/sph_haval.h" #include "sph/sph_haval.h"
}
#include "miner.h" #include "miner.h"
} #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[8];
// in cpu-miner.c
// cpu-miner.c
extern int device_map[8]; extern int device_map[8];
extern bool opt_benchmark;
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -204,20 +202,12 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
unsigned long *hashes_done) unsigned long *hashes_done)
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const int throughput = 256*256*8; const int throughput = 256*256*8;
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = {0,0,0,0,0,0,0,0};
uint32_t Htarg = ptarget[7]; uint32_t Htarg = ptarget[7];
if (opt_benchmark) if (opt_benchmark)
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; ((uint32_t*)ptarget)[7] = Htarg = 0x00FF;
if (!init[thr_id]) if (!init[thr_id])
{ {