From 9c4158aadbb401300f40e2370ff00fa3464a2cd6 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Mon, 2 Mar 2015 14:14:32 +0100 Subject: [PATCH] debug: x11 algo traces for cuda 7 problem --- api.cpp | 4 ++-- cuda.cpp | 1 + util.cpp | 7 +++++-- x11/x11.cu | 27 ++++++++++++++++++++++++++- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/api.cpp b/api.cpp index 64364e7..99bb9bf 100644 --- a/api.cpp +++ b/api.cpp @@ -932,8 +932,8 @@ void *api_thread(void *userdata) /* to be able to report the default value set in each algo */ void api_set_throughput(int thr_id, uint32_t throughput) { - struct cgpu_info *cgpu = &thr_info[thr_id].gpu; - if (cgpu) { + if (&thr_info[thr_id]) { + struct cgpu_info *cgpu = &thr_info[thr_id].gpu; uint32_t ws = throughput; uint8_t i = 0; cgpu->throughput = throughput; diff --git a/cuda.cpp b/cuda.cpp index 320eafc..54dcb4c 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -119,6 +119,7 @@ uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount) } // Zeitsynchronisations-Routine von cudaminer mit CPU sleep +// Note: if you disable all of these calls, CPU usage will hit 100% typedef struct { double value[8]; } tsumarray; cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) { diff --git a/util.cpp b/util.cpp index 9056d16..e2d5211 100644 --- a/util.cpp +++ b/util.cpp @@ -1659,12 +1659,15 @@ void do_gpu_tests(void) work_restart[0].restart = 1; tgt[7] = 0xffff; + memset(buf, 0, sizeof buf); + scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done); + memset(buf, 0, sizeof buf); // buf[0] = 1; buf[64] = 2; // for endian tests scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14); - memset(buf, 0, sizeof buf); - scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84 + //memset(buf, 0, sizeof buf); + //scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84 free(work_restart); work_restart = NULL; diff --git a/x11/x11.cu b/x11/x11.cu index df072b2..61acb7d 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -125,6 +125,21 @@ extern "C" void x11hash(void *output, const void *input) memcpy(output, hash, 32); } +#ifdef _DEBUG +#define TRACE(algo) { \ + if (max_nonce == 1 && pdata[19] <= 1) { \ + uint32_t* debugbuf = NULL; \ + cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \ + cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \ + printf("%s %08x %08x %08x %08x...\n", algo, htobe32(debugbuf[0]), htobe32(debugbuf[1]), \ + htobe32(debugbuf[2]), htobe32(debugbuf[3])); \ + cudaFree(debugbuf); \ + } \ +} +#else +#define TRACE(algo) {} +#endif + static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -133,7 +148,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; + uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8; throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) @@ -176,15 +191,25 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + TRACE("blake :"); quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("bmw :"); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("groestl:"); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("skein :"); quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("jh512 :"); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("keccak :"); x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("luffa+c:"); x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("shavite:"); x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("simd :"); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("echo X11 =>"); foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); if (foundNonce != UINT32_MAX)