xmr: default settings with card attributes
This commit is contained in:
parent
94aa6b8e91
commit
214f392778
@ -128,6 +128,7 @@ int active_gpus;
|
||||
char * device_name[MAX_GPUS];
|
||||
short device_map[MAX_GPUS] = { 0 };
|
||||
long device_sm[MAX_GPUS] = { 0 };
|
||||
short device_mpcount[MAX_GPUS] = { 0 };
|
||||
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
|
||||
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
|
||||
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
|
||||
|
@ -3,8 +3,13 @@
|
||||
|
||||
extern char *device_config[MAX_GPUS]; // -l 32x16
|
||||
|
||||
static __thread uint32_t cn_blocks = 32;
|
||||
static __thread uint32_t cn_threads = 16;
|
||||
static __thread uint32_t cn_blocks;
|
||||
static __thread uint32_t cn_threads;
|
||||
|
||||
// used for gpu intensity on algo init
|
||||
static __thread bool gpu_init_shown = false;
|
||||
#define gpulog_init(p,thr,fmt, ...) if (!gpu_init_shown) \
|
||||
gpulog(p, thr, fmt, ##__VA_ARGS__)
|
||||
|
||||
static uint32_t *d_long_state[MAX_GPUS];
|
||||
static uint64_t *d_ctx_state[MAX_GPUS];
|
||||
@ -26,6 +31,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
|
||||
const uint32_t first_nonce = *nonceptr;
|
||||
uint32_t nonce = first_nonce;
|
||||
int dev_id = device_map[thr_id];
|
||||
|
||||
if(opt_benchmark) {
|
||||
ptarget[7] = 0x00ff;
|
||||
@ -33,19 +39,29 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
|
||||
if(!init[thr_id])
|
||||
{
|
||||
int mem = cuda_available_memory(thr_id);
|
||||
int mul = device_sm[dev_id] >= 300 ? 4 : 1; // see cryptonight-core.cu
|
||||
cn_threads = device_sm[dev_id] >= 600 ? 16 : 8; // real TPB is x4 on SM3+
|
||||
cn_blocks = device_mpcount[dev_id] * 4;
|
||||
if (cn_blocks*cn_threads*2.2 > mem) cn_blocks = device_mpcount[dev_id] * 2;
|
||||
|
||||
if (!opt_quiet)
|
||||
gpulog_init(LOG_INFO, thr_id, "%s, %d MB available, %hd SMX", device_name[dev_id],
|
||||
mem, device_mpcount[dev_id]);
|
||||
|
||||
if (device_config[thr_id]) {
|
||||
sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
|
||||
int res = sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
|
||||
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
|
||||
gpulog(LOG_INFO, thr_id, "Using %u x %u kernel launch config, %u threads",
|
||||
cn_blocks, cn_threads, throughput);
|
||||
gpulog_init(LOG_INFO, thr_id, "Using %ux%u(x%d) kernel launch config, %u threads",
|
||||
cn_blocks, cn_threads, mul, throughput);
|
||||
} else {
|
||||
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
|
||||
if (throughput != cn_blocks*cn_threads && cn_threads) {
|
||||
cn_blocks = throughput / cn_threads;
|
||||
throughput = cn_threads * cn_blocks;
|
||||
}
|
||||
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u threads (%ux%u)",
|
||||
throughput2intensity(throughput), throughput, cn_blocks, cn_threads);
|
||||
gpulog_init(LOG_INFO, thr_id, "%u threads (%g) with %u blocks",// of %ux%d",
|
||||
throughput, throughput2intensity(throughput), cn_blocks);//, cn_threads, mul);
|
||||
}
|
||||
|
||||
if(sizeof(size_t) == 4 && throughput > UINT32_MAX / MEMORY) {
|
||||
@ -67,7 +83,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
|
||||
cudaMalloc(&d_long_state[thr_id], alloc);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 200 is aligned 8, not 16
|
||||
cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 52*4 (200 is not aligned 16)
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
cudaMalloc(&d_ctx_key1[thr_id], 40 * sizeof(uint32_t) * throughput);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
@ -80,6 +96,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
|
||||
gpu_init_shown = true;
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
|
9
cuda.cpp
9
cuda.cpp
@ -78,6 +78,7 @@ void cuda_devicenames()
|
||||
cudaGetDeviceProperties(&props, dev_id);
|
||||
|
||||
device_sm[dev_id] = (props.major * 100 + props.minor * 10);
|
||||
device_mpcount[dev_id] = (short) props.multiProcessorCount;
|
||||
|
||||
if (device_name[dev_id]) {
|
||||
free(device_name[dev_id]);
|
||||
@ -105,8 +106,10 @@ void cuda_print_devices()
|
||||
cudaDeviceProp props;
|
||||
cudaGetDeviceProperties(&props, dev_id);
|
||||
if (!opt_n_threads || n < opt_n_threads) {
|
||||
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
|
||||
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
|
||||
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id,
|
||||
props.major, props.minor, device_name[dev_id],
|
||||
(double) props.clockRate/1000,
|
||||
(double) props.memoryClockRate/1000);
|
||||
#ifdef USE_WRAPNVML
|
||||
if (opt_debug) nvml_print_device_info(dev_id);
|
||||
#ifdef WIN32
|
||||
@ -224,7 +227,7 @@ int cuda_available_memory(int thr_id)
|
||||
uint64_t tot64 = 0, free64 = 0;
|
||||
// cuda (6.5) one can crash on pascal and dont handle 8GB
|
||||
nvapiMemGetInfo(dev_id, &free64, &tot64);
|
||||
return (int) (free64 / (1024 * 1024));
|
||||
return (int) (free64 / (1024));
|
||||
#else
|
||||
size_t mtotal = 0, mfree = 0;
|
||||
cudaSetDevice(dev_id);
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
extern "C" short device_map[MAX_GPUS];
|
||||
extern "C" long device_sm[MAX_GPUS];
|
||||
|
||||
extern short device_mpcount[MAX_GPUS];
|
||||
extern int cuda_arch[MAX_GPUS];
|
||||
|
||||
// common functions
|
||||
|
2
miner.h
2
miner.h
@ -519,6 +519,7 @@ extern double stratum_diff;
|
||||
//#define MAX_THREADS 32 todo
|
||||
extern char* device_name[MAX_GPUS];
|
||||
extern short device_map[MAX_GPUS];
|
||||
extern short device_mpcount[MAX_GPUS];
|
||||
extern long device_sm[MAX_GPUS];
|
||||
extern uint32_t gpus_intensity[MAX_GPUS];
|
||||
extern int opt_cudaschedule;
|
||||
@ -576,6 +577,7 @@ void cuda_clear_lasterror();
|
||||
extern void format_hashrate(double hashrate, char *output);
|
||||
extern void applog(int prio, const char *fmt, ...);
|
||||
extern void gpulog(int prio, int thr_id, const char *fmt, ...);
|
||||
|
||||
void get_defconfig_path(char *out, size_t bufsize, char *argv0);
|
||||
extern void cbin2hex(char *out, const char *in, size_t len);
|
||||
extern char *bin2hex(const unsigned char *in, size_t len);
|
||||
|
Loading…
x
Reference in New Issue
Block a user