diff --git a/cpu-miner.c b/cpu-miner.c index 4d97db8..040899e 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -941,7 +941,6 @@ static void *miner_thread(void *userdata) case ALGO_X15: rc = scanhash_x15(thr_id, work.data, work.target, max_nonce, &hashes_done); - exit(0); break; default: diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index 8e24115..35d7896 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -21,16 +21,6 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) -#define SWAB32(x) ( __byte_perm(x, x, 0x0123) ) - -#if __CUDA_ARCH__ < 350 -// Kepler (Compute 3.0) -#define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) -#else -// Kepler (Compute 3.5) -#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) -#endif - #if 0 static __constant__ uint64_t d_plain_T0[256]; #if !SPH_SMALL_FOOTPRINT_WHIRLPOOL @@ -1239,26 +1229,24 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); uint32_t hashPosition = nounce - startNounce; - uint64_t *hash = (g_hash + hashPosition); -#if NULLTEST - for (int i = 0; i < 8; i++) - hash[i] = 0; -#endif + uint64_t *pHash = &g_hash[hashPosition<<3]; // whirlpool uint64_t n0, n1, n2, n3, n4, n5, n6, n7; - uint64_t h0, h1, h2, h3, h4, h5, h6, h7; + uint64_t h0=0, h1=0, h2=0, h3=0, h4=0, h5=0, h6=0, h7=0; uint64_t state[8]; - n0 = (hash[0]); - n1 = (hash[1]); - n2 = (hash[2]); - n3 = (hash[3]); - n4 = (hash[4]); - n5 = (hash[5]); - n6 = (hash[6]); - n7 = (hash[7]); - - h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; +#if NULLTEST + for (uint8_t i = 0; i < 8; i++) + pHash[i] = 0; +#endif + n0 = pHash[0]; + n1 = pHash[1]; + n2 = pHash[2]; + n3 = pHash[3]; + n4 = pHash[4]; + n5 = pHash[5]; + n6 = pHash[6]; + n7 = pHash[7]; n0 ^= h0; n1 ^= h1; @@ -1270,7 +1258,7 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin n7 ^= h7; #pragma unroll 10 - for (unsigned r = 0; r < 10; r++) + for (uint8_t r = 0; r < 10; r++) { uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; @@ -1280,14 +1268,14 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin TRANSFER(n, tmp); } - state[0] = n0 ^ (hash[0]); - state[1] = n1 ^ (hash[1]); - state[2] = n2 ^ (hash[2]); - state[3] = n3 ^ (hash[3]); - state[4] = n4 ^ (hash[4]); - state[5] = n5 ^ (hash[5]); - state[6] = n6 ^ (hash[6]); - state[7] = n7 ^ (hash[7]); + state[0] = n0 ^ pHash[0]; + state[1] = n1 ^ pHash[1]; + state[2] = n2 ^ pHash[2]; + state[3] = n3 ^ pHash[3]; + state[4] = n4 ^ pHash[4]; + state[5] = n5 ^ pHash[5]; + state[6] = n6 ^ pHash[6]; + state[7] = n7 ^ pHash[7]; n0 = 0x80; n1 = n2 = n3 = n4 = n5 = n6 = 0; @@ -1312,7 +1300,7 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin n7 ^= h7; #pragma unroll 10 - for (unsigned r = 0; r < 10; r++) + for (uint8_t r = 0; r < 10; r++) { uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; @@ -1322,19 +1310,14 @@ __global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uin TRANSFER(n, tmp); } - state[0] ^= n0 ^ 0x80; - state[1] ^= n1; - state[2] ^= n2; - state[3] ^= n3; - state[4] ^= n4; - state[5] ^= n5; - state[6] ^= n6; - state[7] ^= n7 ^ 0x2000000000000; - - for (unsigned i = 0; i < 8; i++) - hash[i] = state[i]; - - // bool result = (hash[3] <= target); + pHash[0] = state[0] ^ (n0 ^ 0x80); + pHash[1] = state[1] ^ n1; + pHash[2] = state[2] ^ n2; + pHash[3] = state[3] ^ n3; + pHash[4] = state[4] ^ n4; + pHash[5] = state[5] ^ n5; + pHash[6] = state[6] ^ n6; + pHash[7] = state[7] ^ (n7 ^ 0x2000000000000); } } diff --git a/x15/x14.cu b/x15/x14.cu index a179e5e..3e600f6 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -181,7 +181,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, uint32_t Htarg = ptarget[7]; if (opt_benchmark) - ((uint32_t*)ptarget)[7] = 0xff; + ((uint32_t*)ptarget)[7] = Htarg = 0xff; if (!init[thr_id]) { diff --git a/x15/x15.cu b/x15/x15.cu index 53a1b58..8049f02 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -181,6 +181,15 @@ extern "C" void x15hash(void *output, const void *input) memcpy(output, hash, 32); } +#if NULLTEST +static void print_hash(unsigned char *hash) +{ + for (int i=0; i < 32; i += 4) { + printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]); + } +} +#endif + extern bool opt_benchmark; extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, @@ -196,6 +205,11 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; +#if NULLTEST + for (int k=0; k < 20; k++) + pdata[k] = 0; +#endif + if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); @@ -223,11 +237,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, } for (int k=0; k < 20; k++) -#if NULLTEST - endiandata[k] = 0; -#else be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); -#endif quark_blake512_cpu_setBlock_80((void*)endiandata); quark_check_cpu_setTarget(ptarget); @@ -250,36 +260,28 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); +#if NULLTEST + uint32_t buf[8]; memset(buf, 0, sizeof buf); + cudaMemcpy(buf, d_hash[thr_id], sizeof buf, cudaMemcpyDeviceToHost); + cudaThreadSynchronize(); + print_hash((unsigned char*)buf); printf("\n"); +#endif /* Scan with GPU */ uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); -#if NULLTEST - uint32_t buf[16]; memset(buf, 0, sizeof(buf)); - cudaMemcpy(buf, d_hash[thr_id], 16 * sizeof(uint32_t), cudaMemcpyDeviceToHost); - MyStreamSynchronize(NULL, order, thr_id); - applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[0], buf[1], buf[2], buf[3]); - applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[4], buf[5], buf[6], buf[7]); - applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[8], buf[9], buf[10], buf[11]); - applog(LOG_NOTICE, "Hash %08x %08x %08x %08x", buf[12], buf[13], buf[14], buf[15]); - return 0; -#endif if (foundNonce != 0xffffffff) { /* check now with the CPU to confirm */ uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); x15hash(vhash64, endiandata); - if ((vhash64[7] <= Htarg) /* && fulltest(vhash64, ptarget) */) { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { pdata[19] = foundNonce; *hashes_done = foundNonce - first_nonce + 1; - applog(LOG_INFO, "GPU #%d: result for nonce $%08X is in wanted range, %x <= %x", thr_id, foundNonce, vhash64[7], Htarg); return 1; } else if (vhash64[7] > Htarg) { - applog(LOG_NOTICE, "Hash0 %08x %08x %08x %08x", vhash64[0], vhash64[1], vhash64[2], vhash64[3]); - applog(LOG_NOTICE, "Hash1 %08x %08x %08x %08x", vhash64[4], vhash64[5], vhash64[6], vhash64[7]); - applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", - thr_id, foundNonce, vhash64[7], Htarg); + applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); } else { applog(LOG_INFO, "GPU #%d: result for %08x does not validate on CPU!", thr_id, foundNonce);