xmr: default settings with card attributes

8 years ago · 214f392778
5 changed files with 35 additions and 12 deletions
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -128,6 +128,7 @@ int active_gpus;
 char * device_name[MAX_GPUS];
 short device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
 short device_mpcount[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
 uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
 uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
--- a/crypto/cryptonight.cu
+++ b/crypto/cryptonight.cu
@ -3,8 +3,13 @@
 extern char *device_config[MAX_GPUS]; // -l 32x16
-static __thread uint32_t cn_blocks  = 32;
+static __thread uint32_t cn_blocks;
-static __thread uint32_t cn_threads = 16;
+static __thread uint32_t cn_threads;
 // used for gpu intensity on algo init
 static __thread bool gpu_init_shown = false;
 #define gpulog_init(p,thr,fmt, ...) if (!gpu_init_shown) \
 	gpulog(p, thr, fmt, ##__VA_ARGS__)
 static uint32_t *d_long_state[MAX_GPUS];
 static uint64_t *d_ctx_state[MAX_GPUS];
@ -26,6 +31,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 	uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
 	const uint32_t first_nonce = *nonceptr;
 	uint32_t nonce = first_nonce;
 	int dev_id = device_map[thr_id];
 	if(opt_benchmark) {
 		ptarget[7] = 0x00ff;
@ -33,19 +39,29 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 	if(!init[thr_id])
 	{
 		int mem = cuda_available_memory(thr_id);
 		int mul = device_sm[dev_id] >= 300 ? 4 : 1; // see cryptonight-core.cu
 		cn_threads = device_sm[dev_id] >= 600 ? 16 : 8; // real TPB is x4 on SM3+
 		cn_blocks = device_mpcount[dev_id] * 4;
 		if (cn_blocks*cn_threads*2.2 > mem) cn_blocks = device_mpcount[dev_id] * 2;
 		if (!opt_quiet)
 			gpulog_init(LOG_INFO, thr_id, "%s, %d MB available, %hd SMX", device_name[dev_id],
 				mem, device_mpcount[dev_id]);
 		if (device_config[thr_id]) {
-			sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
+			int res = sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
 			throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
-			gpulog(LOG_INFO, thr_id, "Using %u x %u kernel launch config, %u threads",
+			gpulog_init(LOG_INFO, thr_id, "Using %ux%u(x%d) kernel launch config, %u threads",
-				cn_blocks, cn_threads, throughput);
+				cn_blocks, cn_threads, mul, throughput);
 		} else {
 			throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
 			if (throughput != cn_blocks*cn_threads && cn_threads) {
 				cn_blocks = throughput / cn_threads;
 				throughput = cn_threads * cn_blocks;
 			}
-			gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u threads (%ux%u)",
+			gpulog_init(LOG_INFO, thr_id, "%u threads (%g) with %u blocks",// of %ux%d",
-				throughput2intensity(throughput), throughput, cn_blocks, cn_threads);
+				throughput, throughput2intensity(throughput), cn_blocks);//, cn_threads, mul);
 		}
 		if(sizeof(size_t) == 4 && throughput > UINT32_MAX / MEMORY) {
@ -67,7 +83,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 		cudaMalloc(&d_long_state[thr_id], alloc);
 		exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
-		cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 200 is aligned 8, not 16
+		cudaMalloc(&d_ctx_state[thr_id], 208 * throughput); // 52*4 (200 is not aligned 16)
 		exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 		cudaMalloc(&d_ctx_key1[thr_id], 40 * sizeof(uint32_t) * throughput);
 		exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
@ -80,6 +96,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 		cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
 		exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 		gpu_init_shown = true;
 		init[thr_id] = true;
 	}
--- a/cuda.cpp
+++ b/cuda.cpp
@ -78,6 +78,7 @@ void cuda_devicenames()
 		cudaGetDeviceProperties(&props, dev_id);
 		device_sm[dev_id] = (props.major * 100 + props.minor * 10);
 		device_mpcount[dev_id] = (short) props.multiProcessorCount;
 		if (device_name[dev_id]) {
 			free(device_name[dev_id]);
@ -105,8 +106,10 @@ void cuda_print_devices()
 		cudaDeviceProp props;
 		cudaGetDeviceProperties(&props, dev_id);
 		if (!opt_n_threads || n < opt_n_threads) {
-			fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
+			fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id,
-				device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
+				props.major, props.minor, device_name[dev_id],
 				(double) props.clockRate/1000,
 				(double) props.memoryClockRate/1000);
 #ifdef USE_WRAPNVML
 			if (opt_debug) nvml_print_device_info(dev_id);
 #ifdef WIN32
@ -224,7 +227,7 @@ int cuda_available_memory(int thr_id)
 	uint64_t tot64 = 0, free64 = 0;
 	// cuda (6.5) one can crash on pascal and dont handle 8GB
 	nvapiMemGetInfo(dev_id, &free64, &tot64);
-	return (int) (free64 / (1024 * 1024));
+	return (int) (free64 / (1024));
 #else
 	size_t mtotal = 0, mfree = 0;
 	cudaSetDevice(dev_id);
--- a/cuda_helper.h
+++ b/cuda_helper.h
@ -25,7 +25,7 @@
 extern "C" short device_map[MAX_GPUS];
 extern "C"  long device_sm[MAX_GPUS];
-
+extern short device_mpcount[MAX_GPUS];
 extern int cuda_arch[MAX_GPUS];
 // common functions
--- a/miner.h
+++ b/miner.h
@ -519,6 +519,7 @@ extern double stratum_diff;
 //#define MAX_THREADS 32 todo
 extern char* device_name[MAX_GPUS];
 extern short device_map[MAX_GPUS];
 extern short device_mpcount[MAX_GPUS];
 extern long  device_sm[MAX_GPUS];
 extern uint32_t gpus_intensity[MAX_GPUS];
 extern int opt_cudaschedule;
@ -576,6 +577,7 @@ void cuda_clear_lasterror();
 extern void format_hashrate(double hashrate, char *output);
 extern void applog(int prio, const char *fmt, ...);
 extern void gpulog(int prio, int thr_id, const char *fmt, ...);
 void get_defconfig_path(char *out, size_t bufsize, char *argv0);
 extern void cbin2hex(char *out, const char *in, size_t len);
 extern char *bin2hex(const unsigned char *in, size_t len);