mirror of
https://github.com/GOSTSec/ccminer
synced 2025-08-26 05:42:02 +00:00
x15 now works
This commit is contained in:
parent
8e32e5ea09
commit
81d7e0421d
@ -941,7 +941,6 @@ static void *miner_thread(void *userdata)
|
|||||||
case ALGO_X15:
|
case ALGO_X15:
|
||||||
rc = scanhash_x15(thr_id, work.data, work.target,
|
rc = scanhash_x15(thr_id, work.data, work.target,
|
||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done);
|
||||||
exit(0);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -21,16 +21,6 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
|
|||||||
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
|
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
|
||||||
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
|
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
|
||||||
|
|
||||||
#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
|
|
||||||
|
|
||||||
#if __CUDA_ARCH__ < 350
|
|
||||||
// Kepler (Compute 3.0)
|
|
||||||
#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
|
|
||||||
#else
|
|
||||||
// Kepler (Compute 3.5)
|
|
||||||
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static __constant__ uint64_t d_plain_T0[256];
|
static __constant__ uint64_t d_plain_T0[256];
|
||||||
#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL
|
#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL
|
||||||
@ -1239,26 +1229,24 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin
|
|||||||
{
|
{
|
||||||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||||||
uint32_t hashPosition = nounce - startNounce;
|
uint32_t hashPosition = nounce - startNounce;
|
||||||
uint64_t *hash = (g_hash + hashPosition);
|
uint64_t *pHash = &g_hash[hashPosition<<3];
|
||||||
#if NULLTEST
|
|
||||||
for (int i = 0; i < 8; i++)
|
|
||||||
hash[i] = 0;
|
|
||||||
#endif
|
|
||||||
// whirlpool
|
// whirlpool
|
||||||
uint64_t n0, n1, n2, n3, n4, n5, n6, n7;
|
uint64_t n0, n1, n2, n3, n4, n5, n6, n7;
|
||||||
uint64_t h0, h1, h2, h3, h4, h5, h6, h7;
|
uint64_t h0=0, h1=0, h2=0, h3=0, h4=0, h5=0, h6=0, h7=0;
|
||||||
uint64_t state[8];
|
uint64_t state[8];
|
||||||
|
|
||||||
n0 = (hash[0]);
|
#if NULLTEST
|
||||||
n1 = (hash[1]);
|
for (uint8_t i = 0; i < 8; i++)
|
||||||
n2 = (hash[2]);
|
pHash[i] = 0;
|
||||||
n3 = (hash[3]);
|
#endif
|
||||||
n4 = (hash[4]);
|
n0 = pHash[0];
|
||||||
n5 = (hash[5]);
|
n1 = pHash[1];
|
||||||
n6 = (hash[6]);
|
n2 = pHash[2];
|
||||||
n7 = (hash[7]);
|
n3 = pHash[3];
|
||||||
|
n4 = pHash[4];
|
||||||
h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0;
|
n5 = pHash[5];
|
||||||
|
n6 = pHash[6];
|
||||||
|
n7 = pHash[7];
|
||||||
|
|
||||||
n0 ^= h0;
|
n0 ^= h0;
|
||||||
n1 ^= h1;
|
n1 ^= h1;
|
||||||
@ -1270,7 +1258,7 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin
|
|||||||
n7 ^= h7;
|
n7 ^= h7;
|
||||||
|
|
||||||
#pragma unroll 10
|
#pragma unroll 10
|
||||||
for (unsigned r = 0; r < 10; r++)
|
for (uint8_t r = 0; r < 10; r++)
|
||||||
{
|
{
|
||||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||||
|
|
||||||
@ -1280,14 +1268,14 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin
|
|||||||
TRANSFER(n, tmp);
|
TRANSFER(n, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
state[0] = n0 ^ (hash[0]);
|
state[0] = n0 ^ pHash[0];
|
||||||
state[1] = n1 ^ (hash[1]);
|
state[1] = n1 ^ pHash[1];
|
||||||
state[2] = n2 ^ (hash[2]);
|
state[2] = n2 ^ pHash[2];
|
||||||
state[3] = n3 ^ (hash[3]);
|
state[3] = n3 ^ pHash[3];
|
||||||
state[4] = n4 ^ (hash[4]);
|
state[4] = n4 ^ pHash[4];
|
||||||
state[5] = n5 ^ (hash[5]);
|
state[5] = n5 ^ pHash[5];
|
||||||
state[6] = n6 ^ (hash[6]);
|
state[6] = n6 ^ pHash[6];
|
||||||
state[7] = n7 ^ (hash[7]);
|
state[7] = n7 ^ pHash[7];
|
||||||
|
|
||||||
n0 = 0x80;
|
n0 = 0x80;
|
||||||
n1 = n2 = n3 = n4 = n5 = n6 = 0;
|
n1 = n2 = n3 = n4 = n5 = n6 = 0;
|
||||||
@ -1312,7 +1300,7 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin
|
|||||||
n7 ^= h7;
|
n7 ^= h7;
|
||||||
|
|
||||||
#pragma unroll 10
|
#pragma unroll 10
|
||||||
for (unsigned r = 0; r < 10; r++)
|
for (uint8_t r = 0; r < 10; r++)
|
||||||
{
|
{
|
||||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||||
|
|
||||||
@ -1322,19 +1310,14 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin
|
|||||||
TRANSFER(n, tmp);
|
TRANSFER(n, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
state[0] ^= n0 ^ 0x80;
|
pHash[0] = state[0] ^ (n0 ^ 0x80);
|
||||||
state[1] ^= n1;
|
pHash[1] = state[1] ^ n1;
|
||||||
state[2] ^= n2;
|
pHash[2] = state[2] ^ n2;
|
||||||
state[3] ^= n3;
|
pHash[3] = state[3] ^ n3;
|
||||||
state[4] ^= n4;
|
pHash[4] = state[4] ^ n4;
|
||||||
state[5] ^= n5;
|
pHash[5] = state[5] ^ n5;
|
||||||
state[6] ^= n6;
|
pHash[6] = state[6] ^ n6;
|
||||||
state[7] ^= n7 ^ 0x2000000000000;
|
pHash[7] = state[7] ^ (n7 ^ 0x2000000000000);
|
||||||
|
|
||||||
for (unsigned i = 0; i < 8; i++)
|
|
||||||
hash[i] = state[i];
|
|
||||||
|
|
||||||
// bool result = (hash[3] <= target);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
|
|||||||
uint32_t Htarg = ptarget[7];
|
uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0xff;
|
((uint32_t*)ptarget)[7] = Htarg = 0xff;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
|
42
x15/x15.cu
42
x15/x15.cu
@ -181,6 +181,15 @@ extern "C" void x15hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
static void print_hash(unsigned char *hash)
|
||||||
|
{
|
||||||
|
for (int i=0; i < 32; i += 4) {
|
||||||
|
printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
extern bool opt_benchmark;
|
extern bool opt_benchmark;
|
||||||
|
|
||||||
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
||||||
@ -196,6 +205,11 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
|||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
for (int k=0; k < 20; k++)
|
||||||
|
pdata[k] = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
@ -223,11 +237,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int k=0; k < 20; k++)
|
for (int k=0; k < 20; k++)
|
||||||
#if NULLTEST
|
|
||||||
endiandata[k] = 0;
|
|
||||||
#else
|
|
||||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||||
#endif
|
|
||||||
|
|
||||||
quark_blake512_cpu_setBlock_80((void*)endiandata);
|
quark_blake512_cpu_setBlock_80((void*)endiandata);
|
||||||
quark_check_cpu_setTarget(ptarget);
|
quark_check_cpu_setTarget(ptarget);
|
||||||
@ -250,36 +260,28 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
|
|||||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
#if NULLTEST
|
||||||
|
uint32_t buf[8]; memset(buf, 0, sizeof buf);
|
||||||
|
cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost);
|
||||||
|
cudaThreadSynchronize();
|
||||||
|
print_hash((unsigned char*)buf); printf("\n");
|
||||||
|
#endif
|
||||||
/* Scan with GPU */
|
/* Scan with GPU */
|
||||||
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
#if NULLTEST
|
|
||||||
uint32_t buf[16]; memset(buf, 0, sizeof(buf));
|
|
||||||
cudaMemcpy(buf, d_hash[thr_id], 16 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
|
||||||
MyStreamSynchronize(NULL, order, thr_id);
|
|
||||||
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[0], buf[1], buf[2], buf[3]);
|
|
||||||
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[4], buf[5], buf[6], buf[7]);
|
|
||||||
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[8], buf[9], buf[10], buf[11]);
|
|
||||||
applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[12], buf[13], buf[14], buf[15]);
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
if (foundNonce != 0xffffffff)
|
if (foundNonce != 0xffffffff)
|
||||||
{
|
{
|
||||||
/* check now with the CPU to confirm */
|
/* check now with the CPU to confirm */
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
x15hash(vhash64, endiandata);
|
x15hash(vhash64, endiandata);
|
||||||
if ((vhash64[7] <= Htarg) /* && fulltest(vhash64, ptarget) */) {
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
*hashes_done = foundNonce - first_nonce + 1;
|
*hashes_done = foundNonce - first_nonce + 1;
|
||||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X is in wanted range, %x <= %x", thr_id, foundNonce, vhash64[7], Htarg);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else if (vhash64[7] > Htarg) {
|
else if (vhash64[7] > Htarg) {
|
||||||
applog(LOG_NOTICE, "Hash0 %08x %08x %08x %08x", vhash64[0], vhash64[1], vhash64[2], vhash64[3]);
|
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);
|
||||||
applog(LOG_NOTICE, "Hash1 %08x %08x %08x %08x", vhash64[4], vhash64[5], vhash64[6], vhash64[7]);
|
|
||||||
applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x",
|
|
||||||
thr_id, foundNonce, vhash64[7], Htarg);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
|
applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user