Browse Source

xmr: default settings with card attributes

master
Tanguy Pruvot 8 years ago
parent
commit
214f392778
  1. 1
      ccminer.cpp
  2. 33
      crypto/cryptonight.cu
  3. 9
      cuda.cpp
  4. 2
      cuda_helper.h
  5. 2
      miner.h

1
ccminer.cpp

@ -128,6 +128,7 @@ int active_gpus;
char * device_name[MAX_GPUS]; char * device_name[MAX_GPUS];
short device_map[MAX_GPUS] = { 0 }; short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 };
short device_mpcount[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 };

33
crypto/cryptonight.cu

@ -3,8 +3,13 @@
extern char *device_config[MAX_GPUS]; // -l 32x16 extern char *device_config[MAX_GPUS]; // -l 32x16
static __thread uint32_t cn_blocks = 32; static __thread uint32_t cn_blocks;
static __thread uint32_t cn_threads = 16; static __thread uint32_t cn_threads;
// used for gpu intensity on algo init
static __thread bool gpu_init_shown = false;
#define gpulog_init(p,thr,fmt, ...) if (!gpu_init_shown) \
gpulog(p, thr, fmt, ##__VA_ARGS__)
static uint32_t *d_long_state[MAX_GPUS]; static uint32_t *d_long_state[MAX_GPUS];
static uint64_t *d_ctx_state[MAX_GPUS]; static uint64_t *d_ctx_state[MAX_GPUS];
@ -26,6 +31,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
uint32_t *nonceptr = (uint32_t*) (&pdata[39]); uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
const uint32_t first_nonce = *nonceptr; const uint32_t first_nonce = *nonceptr;
uint32_t nonce = first_nonce; uint32_t nonce = first_nonce;
int dev_id = device_map[thr_id];
if(opt_benchmark) { if(opt_benchmark) {
ptarget[7] = 0x00ff; ptarget[7] = 0x00ff;
@ -33,19 +39,29 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
if(!init[thr_id]) if(!init[thr_id])
{ {
int mem = cuda_available_memory(thr_id);
int mul = device_sm[dev_id] >= 300 ? 4 : 1; // see cryptonight-core.cu
cn_threads = device_sm[dev_id] >= 600 ? 16 : 8; // real TPB is x4 on SM3+
cn_blocks = device_mpcount[dev_id] * 4;
if (cn_blocks*cn_threads*2.2 > mem) cn_blocks = device_mpcount[dev_id] * 2;
if (!opt_quiet)
gpulog_init(LOG_INFO, thr_id, "%s, %d MB available, %hd SMX", device_name[dev_id],
mem, device_mpcount[dev_id]);
if (device_config[thr_id]) { if (device_config[thr_id]) {
sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads); int res = sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads); throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
gpulog(LOG_INFO, thr_id, "Using %u x %u kernel launch config, %u threads", gpulog_init(LOG_INFO, thr_id, "Using %ux%u(x%d) kernel launch config, %u threads",
cn_blocks, cn_threads, throughput); cn_blocks, cn_threads, mul, throughput);
} else { } else {
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads); throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
if (throughput != cn_blocks*cn_threads && cn_threads) { if (throughput != cn_blocks*cn_threads && cn_threads) {
cn_blocks = throughput / cn_threads; cn_blocks = throughput / cn_threads;
throughput = cn_threads * cn_blocks; throughput = cn_threads * cn_blocks;
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u threads (%ux%u)", gpulog_init(LOG_INFO, thr_id, "%u threads (%g) with %u blocks",// of %ux%d",
throughput2intensity(throughput), throughput, cn_blocks, cn_threads); throughput, throughput2intensity(throughput), cn_blocks);//, cn_threads, mul);
} }
if(sizeof(size_t) == 4 && throughput > UINT32_MAX / MEMORY) { if(sizeof(size_t) == 4 && throughput > UINT32_MAX / MEMORY) {
@ -67,7 +83,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
cudaMalloc(&d_long_state[thr_id], alloc); cudaMalloc(&d_long_state[thr_id], alloc);
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__); exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 200 is aligned 8, not 16 cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 52*4 (200 is not aligned 16)
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__); exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
cudaMalloc(&d_ctx_key1[thr_id], 40 * sizeof(uint32_t) * throughput); cudaMalloc(&d_ctx_key1[thr_id], 40 * sizeof(uint32_t) * throughput);
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__); exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
@ -80,6 +96,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput); cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__); exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
gpu_init_shown = true;
init[thr_id] = true; init[thr_id] = true;
} }

9
cuda.cpp

@ -78,6 +78,7 @@ void cuda_devicenames()
cudaGetDeviceProperties(&props, dev_id); cudaGetDeviceProperties(&props, dev_id);
device_sm[dev_id] = (props.major * 100 + props.minor * 10); device_sm[dev_id] = (props.major * 100 + props.minor * 10);
device_mpcount[dev_id] = (short) props.multiProcessorCount;
if (device_name[dev_id]) { if (device_name[dev_id]) {
free(device_name[dev_id]); free(device_name[dev_id]);
@ -105,8 +106,10 @@ void cuda_print_devices()
cudaDeviceProp props; cudaDeviceProp props;
cudaGetDeviceProperties(&props, dev_id); cudaGetDeviceProperties(&props, dev_id);
if (!opt_n_threads || n < opt_n_threads) { if (!opt_n_threads || n < opt_n_threads) {
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor, fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id,
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000); props.major, props.minor, device_name[dev_id],
(double) props.clockRate/1000,
(double) props.memoryClockRate/1000);
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
if (opt_debug) nvml_print_device_info(dev_id); if (opt_debug) nvml_print_device_info(dev_id);
#ifdef WIN32 #ifdef WIN32
@ -224,7 +227,7 @@ int cuda_available_memory(int thr_id)
uint64_t tot64 = 0, free64 = 0; uint64_t tot64 = 0, free64 = 0;
// cuda (6.5) one can crash on pascal and dont handle 8GB // cuda (6.5) one can crash on pascal and dont handle 8GB
nvapiMemGetInfo(dev_id, &free64, &tot64); nvapiMemGetInfo(dev_id, &free64, &tot64);
return (int) (free64 / (1024 * 1024)); return (int) (free64 / (1024));
#else #else
size_t mtotal = 0, mfree = 0; size_t mtotal = 0, mfree = 0;
cudaSetDevice(dev_id); cudaSetDevice(dev_id);

2
cuda_helper.h

@ -25,7 +25,7 @@
extern "C" short device_map[MAX_GPUS]; extern "C" short device_map[MAX_GPUS];
extern "C" long device_sm[MAX_GPUS]; extern "C" long device_sm[MAX_GPUS];
extern short device_mpcount[MAX_GPUS];
extern int cuda_arch[MAX_GPUS]; extern int cuda_arch[MAX_GPUS];
// common functions // common functions

2
miner.h

@ -519,6 +519,7 @@ extern double stratum_diff;
//#define MAX_THREADS 32 todo //#define MAX_THREADS 32 todo
extern char* device_name[MAX_GPUS]; extern char* device_name[MAX_GPUS];
extern short device_map[MAX_GPUS]; extern short device_map[MAX_GPUS];
extern short device_mpcount[MAX_GPUS];
extern long device_sm[MAX_GPUS]; extern long device_sm[MAX_GPUS];
extern uint32_t gpus_intensity[MAX_GPUS]; extern uint32_t gpus_intensity[MAX_GPUS];
extern int opt_cudaschedule; extern int opt_cudaschedule;
@ -576,6 +577,7 @@ void cuda_clear_lasterror();
extern void format_hashrate(double hashrate, char *output); extern void format_hashrate(double hashrate, char *output);
extern void applog(int prio, const char *fmt, ...); extern void applog(int prio, const char *fmt, ...);
extern void gpulog(int prio, int thr_id, const char *fmt, ...); extern void gpulog(int prio, int thr_id, const char *fmt, ...);
void get_defconfig_path(char *out, size_t bufsize, char *argv0); void get_defconfig_path(char *out, size_t bufsize, char *argv0);
extern void cbin2hex(char *out, const char *in, size_t len); extern void cbin2hex(char *out, const char *in, size_t len);
extern char *bin2hex(const unsigned char *in, size_t len); extern char *bin2hex(const unsigned char *in, size_t len);

Loading…
Cancel
Save