debug: x11 algo traces for cuda 7 problem
This commit is contained in:
parent
7fd594c922
commit
9c4158aadb
4
api.cpp
4
api.cpp
@ -932,8 +932,8 @@ void *api_thread(void *userdata)
|
|||||||
/* to be able to report the default value set in each algo */
|
/* to be able to report the default value set in each algo */
|
||||||
void api_set_throughput(int thr_id, uint32_t throughput)
|
void api_set_throughput(int thr_id, uint32_t throughput)
|
||||||
{
|
{
|
||||||
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
|
if (&thr_info[thr_id]) {
|
||||||
if (cgpu) {
|
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
|
||||||
uint32_t ws = throughput;
|
uint32_t ws = throughput;
|
||||||
uint8_t i = 0;
|
uint8_t i = 0;
|
||||||
cgpu->throughput = throughput;
|
cgpu->throughput = throughput;
|
||||||
|
1
cuda.cpp
1
cuda.cpp
@ -119,6 +119,7 @@ uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
|
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
|
||||||
|
// Note: if you disable all of these calls, CPU usage will hit 100%
|
||||||
typedef struct { double value[8]; } tsumarray;
|
typedef struct { double value[8]; } tsumarray;
|
||||||
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
|
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
|
||||||
{
|
{
|
||||||
|
7
util.cpp
7
util.cpp
@ -1659,12 +1659,15 @@ void do_gpu_tests(void)
|
|||||||
work_restart[0].restart = 1;
|
work_restart[0].restart = 1;
|
||||||
tgt[7] = 0xffff;
|
tgt[7] = 0xffff;
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof buf);
|
||||||
|
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
|
||||||
|
|
||||||
memset(buf, 0, sizeof buf);
|
memset(buf, 0, sizeof buf);
|
||||||
// buf[0] = 1; buf[64] = 2; // for endian tests
|
// buf[0] = 1; buf[64] = 2; // for endian tests
|
||||||
scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14);
|
scanhash_blake256(0, (uint32_t*)buf, tgt, 1, &done, 14);
|
||||||
|
|
||||||
memset(buf, 0, sizeof buf);
|
//memset(buf, 0, sizeof buf);
|
||||||
scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
|
//scanhash_heavy(0, (uint32_t*)buf, tgt, 1, &done, 1, 84); // HEAVYCOIN_BLKHDR_SZ=84
|
||||||
|
|
||||||
free(work_restart);
|
free(work_restart);
|
||||||
work_restart = NULL;
|
work_restart = NULL;
|
||||||
|
27
x11/x11.cu
27
x11/x11.cu
@ -125,6 +125,21 @@ extern "C" void x11hash(void *output, const void *input)
|
|||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
#define TRACE(algo) { \
|
||||||
|
if (max_nonce == 1 && pdata[19] <= 1) { \
|
||||||
|
uint32_t* debugbuf = NULL; \
|
||||||
|
cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
|
||||||
|
cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
|
||||||
|
printf("%s %08x %08x %08x %08x...\n", algo, htobe32(debugbuf[0]), htobe32(debugbuf[1]), \
|
||||||
|
htobe32(debugbuf[2]), htobe32(debugbuf[3])); \
|
||||||
|
cudaFree(debugbuf); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define TRACE(algo) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
||||||
@ -133,7 +148,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
|||||||
{
|
{
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
|
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
|
||||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8;
|
uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8;
|
||||||
throughput = min(throughput, max_nonce - first_nonce);
|
throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
@ -176,15 +191,25 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
// Hash with CUDA
|
// Hash with CUDA
|
||||||
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
TRACE("blake :");
|
||||||
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("bmw :");
|
||||||
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("groestl:");
|
||||||
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("skein :");
|
||||||
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("jh512 :");
|
||||||
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("keccak :");
|
||||||
x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("luffa+c:");
|
||||||
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("shavite:");
|
||||||
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("simd :");
|
||||||
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
TRACE("echo X11 =>");
|
||||||
|
|
||||||
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
|
Loading…
Reference in New Issue
Block a user