Browse Source

debug: x11 algo traces for cuda 7 problem

2upstream
Tanguy Pruvot 10 years ago
parent
commit
9c4158aadb
  1. 4
      api.cpp
  2. 1
      cuda.cpp
  3. 7
      util.cpp
  4. 27
      x11/x11.cu

4
api.cpp

@ -932,8 +932,8 @@ void *api_thread(void *userdata)
/* to be able to report the default value set in each algo */ /* to be able to report the default value set in each algo */
void api_set_throughput(int thr_id, uint32_t throughput) void api_set_throughput(int thr_id, uint32_t throughput)
{ {
struct cgpu_info *cgpu = &thr_info[thr_id].gpu; if (&thr_info[thr_id]) {
if (cgpu) { struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
uint32_t ws = throughput; uint32_t ws = throughput;
uint8_t i = 0; uint8_t i = 0;
cgpu->throughput = throughput; cgpu->throughput = throughput;

1
cuda.cpp

@ -119,6 +119,7 @@ uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
} }
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep // Zeitsynchronisations-Routine von cudaminer mit CPU sleep
// Note: if you disable all of these calls, CPU usage will hit 100%
typedef struct { double value[8]; } tsumarray; typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{ {

7
util.cpp

@ -1659,12 +1659,15 @@ void do_gpu_tests(void)
work_restart[0].restart = 1; work_restart[0].restart = 1;
tgt[7] = 0xffff; tgt[7] = 0xffff;
memset(buf, 0, sizeof buf);
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
memset(buf, 0, sizeof buf); memset(buf, 0, sizeof buf);
// buf[0] = 1; buf[64] = 2; // for endian tests // buf[0] = 1; buf[64] = 2; // for endian tests
scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14); scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14);
memset(buf, 0, sizeof buf); //memset(buf, 0, sizeof buf);
scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84 //scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
free(work_restart); free(work_restart);
work_restart = NULL; work_restart = NULL;

27
x11/x11.cu

@ -125,6 +125,21 @@ extern "C" void x11hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
#ifdef _DEBUG
#define TRACE(algo) { \
if (max_nonce == 1 && pdata[19] <= 1) { \
uint32_t* debugbuf = NULL; \
cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
printf("%s %08x %08x %08x %08x...\n", algo, htobe32(debugbuf[0]), htobe32(debugbuf[1]), \
htobe32(debugbuf[2]), htobe32(debugbuf[3])); \
cudaFree(debugbuf); \
} \
}
#else
#define TRACE(algo) {}
#endif
static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
@ -133,7 +148,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8;
throughput = min(throughput, max_nonce - first_nonce); throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark) if (opt_benchmark)
@ -176,15 +191,25 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("blake :");
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("bmw :");
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("groestl:");
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("skein :");
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("jh512 :");
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("keccak :");
x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("luffa+c:");
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("shavite:");
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("simd :");
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo X11 =>");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (foundNonce != UINT32_MAX)

Loading…
Cancel
Save