debug: x11 algo traces for cuda 7 problem

This commit is contained in:
Tanguy Pruvot 2015-03-02 14:14:32 +01:00
parent 7fd594c922
commit 9c4158aadb
4 changed files with 34 additions and 5 deletions

View File

@ -932,8 +932,8 @@ void *api_thread(void *userdata)
/* to be able to report the default value set in each algo */
void api_set_throughput(int thr_id, uint32_t throughput)
{
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
if (cgpu) {
if (&thr_info[thr_id]) {
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
uint32_t ws = throughput;
uint8_t i = 0;
cgpu->throughput = throughput;

View File

@ -119,6 +119,7 @@ uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
}
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
// Note: if you disable all of these calls, CPU usage will hit 100%
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
{

View File

@ -1659,12 +1659,15 @@ void do_gpu_tests(void)
work_restart[0].restart = 1;
tgt[7] = 0xffff;
memset(buf, 0, sizeof buf);
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
memset(buf, 0, sizeof buf);
// buf[0] = 1; buf[64] = 2; // for endian tests
scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14);
memset(buf, 0, sizeof buf);
scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
//memset(buf, 0, sizeof buf);
//scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
free(work_restart);
work_restart = NULL;

View File

@ -125,6 +125,21 @@ extern "C" void x11hash(void *output, const void *input)
memcpy(output, hash, 32);
}
#ifdef _DEBUG
#define TRACE(algo) { \
if (max_nonce == 1 && pdata[19] <= 1) { \
uint32_t* debugbuf = NULL; \
cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
printf("%s %08x %08x %08x %08x...\n", algo, htobe32(debugbuf[0]), htobe32(debugbuf[1]), \
htobe32(debugbuf[2]), htobe32(debugbuf[3])); \
cudaFree(debugbuf); \
} \
}
#else
#define TRACE(algo) {}
#endif
static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
@ -133,7 +148,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8;
uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8;
throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
@ -176,15 +191,25 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("blake :");
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("bmw :");
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("groestl:");
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("skein :");
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("jh512 :");
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("keccak :");
x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("luffa+c:");
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("shavite:");
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("simd :");
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo X11 =>");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)