From 124dc6ea5777ab19a132ad9039b182ee8bbf5e4a Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Fri, 14 Nov 2014 19:05:34 +0100
Subject: [PATCH] nvapi: fix mapping of devices

---
 api.cpp     | 16 ++++++++--------
 ccminer.cpp |  5 ++---
 nvml.cpp    | 47 +++++++++++++++++++++++++++++++----------------
 3 files changed, 41 insertions(+), 27 deletions(-)
diff --git a/api.cpp b/api.cpp
index 239eab6..16449ec 100644
--- a/api.cpp
+++ b/api.cpp
@@ -118,8 +118,8 @@ static void gpustatus(int thr_id)
 {
 	if (thr_id >= 0 && thr_id < gpu_threads) {
 		struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
-		char buf[512];
-		char pstate[4];
+		char buf[512]; *buf = '\0';
+		char pstate[8]; *pstate = '\0';
 
 		cgpu->thr_id = thr_id;
 
@@ -148,13 +148,13 @@ static void gpustatus(int thr_id)
 
 		cgpu->khashes = stats_get_speed(thr_id, 0.0) / 1000.0;
 
-		sprintf(pstate, "P%u", cgpu->gpu_pstate);
+		snprintf(pstate, sizeof(pstate), "P%u", cgpu->gpu_pstate);
 		if (cgpu->gpu_pstate == -1)
-			sprintf(pstate, "");
+			*pstate= '\0';
 
-		sprintf(buf, "GPU=%d;TEMP=%.1f;FAN=%d;FREQ=%d;PST=%s;KHS=%.2f;"
-			"HWF=%d;I=%d|",
-			thr_id, cgpu->gpu_temp, cgpu->gpu_fan, 
+		snprintf(buf, sizeof(buf), "GPU=%d;TEMP=%.1f;FAN=%d;FREQ=%d;"
+			"PST=%s;KHS=%.2f;HWF=%d;I=%d|",
+			thr_id, cgpu->gpu_temp, cgpu->gpu_fan,
 			cgpu->gpu_clock, pstate, cgpu->khashes,
 			cgpu->hw_errors, cgpu->intensity);
 
@@ -170,7 +170,7 @@ static void gpustatus(int thr_id)
 */
 static char *getsummary(char *params)
 {
-	char algo[64] = "";
+	char algo[64]; *algo = '\0';
 	time_t ts = time(NULL);
 	double uptime = difftime(ts, startup);
 	double accps = (60.0 * accepted_count) / (uptime ? uptime : 1.0);
diff --git a/ccminer.cpp b/ccminer.cpp
index 27de517..4b7ea59 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -997,7 +997,7 @@ static void *miner_thread(void *userdata)
 	 * of the number of CPUs */
 	if (num_processors > 1 && opt_n_threads % num_processors == 0) {
 		if (!opt_quiet)
-			applog(LOG_DEBUG, "Binding thread %d to cpu %d", thr_id,
+			applog(LOG_DEBUG, "Binding thread %d to gpu %d", thr_id,
 					thr_id % num_processors);
 		affine_to_cpu(thr_id, thr_id % num_processors);
 	}
@@ -2064,8 +2064,7 @@ int main(int argc, char *argv[])
 	SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE);
 #endif
 
-	if (num_processors == 0)
-	{
+	if (num_processors == 0) {
 		applog(LOG_ERR, "No CUDA devices found! terminating.");
 		exit(1);
 	}
diff --git a/nvml.cpp b/nvml.cpp
index 2829937..7d402bf 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -29,7 +29,11 @@
 #include "nvml.h"
 
 extern wrap_nvml_handle *hnvml;
+extern int num_processors; // gpus
 extern int device_map[8];
+extern char* device_name[8];
+
+static uint32_t device_bus_ids[8] = { 0 };
 
 /*
  * Wrappers to emulate dlopen() on other systems like Windows
@@ -199,7 +203,8 @@ wrap_nvml_handle * wrap_nvml_create()
 
 		if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
 			int j;
-			for (j=0; j<nvmlh->nvml_gpucount; j++) {
+			device_bus_ids[i] = props.pciBusID;
+			for (j = 0; j<nvmlh->nvml_gpucount; j++) {
 				if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
 				    (nvmlh->nvml_pci_bus_id[j]    == (uint32_t) props.pciBusID) &&
 				    (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
@@ -297,8 +302,8 @@ int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned i
 
 	wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
 	if (res != WRAPNVML_SUCCESS) {
-		//if (opt_debug)
-		//	applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
+		if (opt_debug)
+			applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
 
@@ -314,8 +319,8 @@ int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate)
 
 	wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
 	if (res != WRAPNVML_SUCCESS) {
-		if (opt_debug)
-			applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
+		//if (opt_debug)
+		//	applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
 		return -1;
 	}
 
@@ -457,22 +462,32 @@ int wrap_nvapi_init()
 		return -1;
 	}
 
-	for (int i = 0; i < 8; i++) {
-		// to fix
-		nvapi_dev_map[i] = i;
+	for (NvU8 i = 0; i < nvapi_dev_cnt; i++) {
+		NvAPI_ShortString name;
+		nvapi_dev_map[i] = i; // default mapping
+		ret = NvAPI_GPU_GetFullName(phys[i], name);
+		if (ret == NVAPI_OK) {
+			for (int g = 0; g < num_processors; g++) {
+				//todo : device_bus_ids, could be wrong on rigs
+				if (strcmp(device_name[g], name) == 0 && nvapi_dev_map[i] == i) {
+					nvapi_dev_map[i] = g;
+					break;
+				}
+			}
+			if (opt_debug)
+				applog(LOG_DEBUG, "NVAPI dev %d: %s - mapped to CUDA device %d",
+					i, name, nvapi_dev_map[i]);
+		} else {
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
+		}
 	}
+
 #if 0
 	NvAPI_ShortString ver;
 	NvAPI_GetInterfaceVersionString(ver);
 	applog(LOG_DEBUG, "NVAPI Version: %s", ver);
-
-	NvAPI_ShortString name;
-	ret = NvAPI_GPU_GetFullName(phys[devNum], name);
-	if (ret != NVAPI_OK){
-		NvAPI_ShortString string;
-		NvAPI_GetErrorMessage(ret, string);
-		applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
-	}
 #endif
 
 	return 0;