diff --git a/ccminer.cpp b/ccminer.cpp index 6654c3e..051f70e 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -206,6 +206,7 @@ int opt_n_threads = 0; static double opt_difficulty = 1; // CH bool opt_trust_pool = false; uint16_t opt_vote = 9999; +int num_cpus; int num_processors; int device_map[8] = {0,1,2,3,4,5,6,7}; // CB char *device_name[8]; // CB @@ -986,13 +987,12 @@ static void *miner_thread(void *userdata) drop_policy(); } - /* Cpu affinity only makes sense if the number of threads is a multiple - * of the number of CPUs */ - if (num_processors > 1 && opt_n_threads % num_processors == 0) { + /* Cpu thread affinity */ + if (num_cpus > 1) { if (!opt_quiet) - applog(LOG_DEBUG, "Binding thread %d to gpu %d", thr_id, - thr_id % num_processors); - affine_to_cpu(thr_id, thr_id % num_processors); + applog(LOG_DEBUG, "Binding thread %d to cpu %d", thr_id, + thr_id % num_cpus); + affine_to_cpu(thr_id, thr_id % num_cpus); } while (1) { @@ -1846,12 +1846,13 @@ static void parse_arg(int key, char *arg) break; case 'd': // CB { + int ngpus = cuda_num_devices(); char * pch = strtok (arg,","); opt_n_threads = 0; while (pch != NULL) { if (pch[0] >= '0' && pch[0] <= '9' && pch[1] == '\0') { - if (atoi(pch) < num_processors) + if (atoi(pch) < ngpus) device_map[opt_n_threads++] = atoi(pch); else { applog(LOG_ERR, "Non-existant CUDA device #%d specified in -d option", atoi(pch)); @@ -1859,13 +1860,15 @@ static void parse_arg(int key, char *arg) } } else { int device = cuda_finddevice(pch); - if (device >= 0 && device < num_processors) + if (device >= 0 && device < ngpus) device_map[opt_n_threads++] = device; else { applog(LOG_ERR, "Non-existant CUDA device '%s' specified in -d option", pch); proper_exit(1); } } + // set number of active gpus + num_processors = opt_n_threads; pch = strtok (NULL, ","); } } @@ -2026,6 +2029,25 @@ int main(int argc, char *argv[]) rpc_pass = strdup(""); pthread_mutex_init(&applog_lock, NULL); + + // number of cpus for thread affinity +#if defined(WIN32) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + num_cpus = sysinfo.dwNumberOfProcessors; +#elif defined(_SC_NPROCESSORS_CONF) + num_cpus = sysconf(_SC_NPROCESSORS_CONF); +#elif defined(CTL_HW) && defined(HW_NCPU) + int req[] = { CTL_HW, HW_NCPU }; + size_t len = sizeof(num_cpus); + sysctl(req, 2, &num_cpus, &len, NULL, 0); +#else + num_cpus = 1; +#endif + if (num_cpus < 1) + num_cpus = 1; + + // number of gpus num_processors = cuda_num_devices(); cuda_devicenames(); diff --git a/nvml.cpp b/nvml.cpp index 33263c9..e13f8f2 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -30,7 +30,6 @@ #include "nvml.h" extern wrap_nvml_handle *hnvml; -extern int num_processors; // gpus static uint32_t device_bus_ids[8] = { 0 }; @@ -531,6 +530,7 @@ int nvapi_getbusid(unsigned int devNum, int *busid) int wrap_nvapi_init() { + int num_gpus = cuda_num_devices(); NvAPI_Status ret = NvAPI_Initialize(); if (!ret == NVAPI_OK){ NvAPI_ShortString string; @@ -549,7 +549,7 @@ int wrap_nvapi_init() return -1; } - for (int g = 0; g < num_processors; g++) { + for (int g = 0; g < num_gpus; g++) { cudaDeviceProp props; if (cudaGetDeviceProperties(&props, g) == cudaSuccess) { device_bus_ids[g] = props.pciBusID; @@ -561,7 +561,7 @@ int wrap_nvapi_init() NvAPI_ShortString name; ret = NvAPI_GPU_GetFullName(phys[i], name); if (ret == NVAPI_OK) { - for (int g = 0; g < num_processors; g++) { + for (int g = 0; g < num_gpus; g++) { NvU32 busId; ret = NvAPI_GPU_GetBusId(phys[i], &busId); if (ret == NVAPI_OK && busId == device_bus_ids[g]) {