diff --git a/Makefile.am b/Makefile.am
index 683e21d..c409f01 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,6 +21,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
crc32.c hefty1.c \
ccminer.cpp pools.cpp util.cpp bench.cpp bignum.cpp \
api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
+ nvsettings.cpp \
equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \
equi/equihash.cpp equi/cuda_equi.cu \
heavy/heavy.cu \
diff --git a/ccminer.cpp b/ccminer.cpp
index 85ec68e..cc33d61 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -126,6 +126,7 @@ bool opt_trust_pool = false;
uint16_t opt_vote = 9999;
int num_cpus;
int active_gpus;
+bool need_nvsettings = false;
char * device_name[MAX_GPUS];
short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 };
@@ -133,6 +134,7 @@ short device_mpcount[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
+int32_t device_mem_offsets[MAX_GPUS] = { 0 };
uint32_t device_plimit[MAX_GPUS] = { 0 };
uint8_t device_tlimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1, -1 };
@@ -2058,8 +2060,10 @@ static void *miner_thread(void *userdata)
}
/* conditional mining */
- if (!wanna_mine(thr_id)) {
-
+ if (!wanna_mine(thr_id))
+ {
+ // reset default mem offset before idle..
+ if (need_nvsettings) nvs_reset_clocks(dev_id);
// free gpu resources
algo_free_all(thr_id);
// clear any free error (algo switch)
@@ -2084,7 +2088,11 @@ static void *miner_thread(void *userdata)
sleep(5);
if (!thr_id) pools[cur_pooln].wait_time += 5;
continue;
+ } else {
+ // reapply mem offset if needed
+ if (need_nvsettings) nvs_set_clocks(dev_id);
}
+
pool_on_hold = false;
work_restart[thr_id].restart = 0;
@@ -3164,6 +3172,7 @@ void parse_arg(int key, char *arg)
nvapi_init_settings();
#endif
#endif
+ nvs_init();
cuda_print_devices();
proper_exit(EXIT_CODE_OK);
break;
@@ -3389,7 +3398,11 @@ void parse_arg(int key, char *arg)
int n = 0;
while (pch != NULL && n < MAX_GPUS) {
int dev_id = device_map[n++];
- device_mem_clocks[dev_id] = atoi(pch);
+ if (*pch == '+' || *pch == '-')
+ device_mem_offsets[dev_id] = atoi(pch);
+ else
+ device_mem_clocks[dev_id] = atoi(pch);
+ need_nvsettings = true;
pch = strtok(NULL, ",");
}
}
@@ -4061,6 +4074,13 @@ int main(int argc, char *argv[])
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
}
+#ifdef __linux__
+ if (need_nvsettings) {
+ if (nvs_init() < 0)
+ need_nvsettings = false;
+ }
+#endif
+
#ifdef USE_WRAPNVML
#if defined(__linux__) || defined(_WIN64)
/* nvml is currently not the best choice on Windows (only in x64) */
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 1316052..6715336 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -243,6 +243,7 @@
+
@@ -597,4 +598,4 @@
-
+
\ No newline at end of file
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index a6b39c4..12c0072 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -297,6 +297,9 @@
Source Files
+
+ Source Files
+
Source Files\sia
@@ -935,4 +938,4 @@
Ressources
-
\ No newline at end of file
+
diff --git a/nvml.cpp b/nvml.cpp
index 8063d29..daa570c 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -310,6 +310,8 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
+ //if (need_nvsettings) /* prefer later than init time */
+ // nvs_set_clocks(dev_id);
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
@@ -395,6 +397,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
+ if (need_nvsettings)
+ nvs_reset_clocks(dev_id);
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
diff --git a/nvml.h b/nvml.h
index bb311a5..de5d16d 100644
--- a/nvml.h
+++ b/nvml.h
@@ -250,3 +250,15 @@ void gpu_led_off(int dev_id);
#define LED_MODE_SHARES 1
#define LED_MODE_MINING 2
+/* ------ nvidia-settings stuff for linux -------------------- */
+
+int nvs_init();
+int nvs_set_clocks(int dev_id);
+void nvs_reset_clocks(int dev_id);
+
+// nvidia-settings (X) devNum from dev_id (cuda GPU #N)
+int8_t nvs_devnum(int dev_id);
+int nvs_devid(int8_t devNum);
+
+extern bool need_nvsettings;
+
diff --git a/nvsettings.cpp b/nvsettings.cpp
new file mode 100644
index 0000000..61c90d1
--- /dev/null
+++ b/nvsettings.cpp
@@ -0,0 +1,247 @@
+/**
+ * nvidia-settings command line interface for linux - tpruvot 2017
+ *
+ * Notes: need X setup and running, with an opened X session.
+ * init speed could be improved, running multiple threads
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include // pid_t
+
+#include "miner.h"
+#include "nvml.h"
+#include "cuda_runtime.h"
+
+#ifdef __linux__
+
+#define NVS_PATH "/usr/bin/nvidia-settings"
+
+static int8_t nvs_dev_map[MAX_GPUS] = { 0 };
+static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 };
+static int32_t nvs_clocks_set[MAX_GPUS] = { 0 };
+
+extern int32_t device_mem_offsets[MAX_GPUS];
+
+#if 0 /* complicated exec way and not better in fine */
+int nvs_query_fork_int(int nvs_id, const char* field)
+{
+ pid_t pid;
+ int pipes[2] = { 0 };
+ if (pipe(pipes) < 0)
+ return -1;
+
+ if ((pid = fork()) == -1) {
+ close(pipes[0]);
+ close(pipes[1]);
+ return -1;
+ } else if (pid == 0) {
+ char gpu_field[128] = { 0 };
+ sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field);
+
+ dup2(pipes[1], STDOUT_FILENO);
+ close(pipes[0]);
+ //close(pipes[1]);
+
+ if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) {
+ exit(-1);
+ }
+ } else {
+ int intval = -1;
+ FILE *p = fdopen(pipes[0], "r");
+ close(pipes[1]);
+ if (!p) {
+ applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]);
+ return -1;
+ }
+ int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42
+ if (rc > 0) {
+ //applog(LOG_BLUE, "%s res=%d", field, intval);
+ }
+ fclose(p);
+ close(pipes[0]);
+ return intval;
+ }
+ return -1;
+}
+#endif
+
+int nvs_query_int(int nvs_id, const char* field, int showerr)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+ fp = popen(command, "r");
+ if (fp) {
+ int intval = -1;
+ if (!showerr)
+ fscanf(fp, "%d", &intval);
+ else {
+ char msg[512] = { 0 };
+ char buf[64] = { 0 };
+ ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ if (strstr(msg, "ERROR")) {
+ char *xtra = strstr(msg, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", msg);
+ intval = -1;
+ } else {
+ sscanf(msg, "%d", &intval);
+ }
+ }
+ pclose(fp);
+ return intval;
+ }
+ return -1;
+}
+
+int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ *output = '\0';
+ sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+ fp = popen(command, "r");
+ if (fp) {
+ char buf[256] = { 0 };
+ ssize_t len=0;
+ ssize_t bytes=0;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ //applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes);
+ len += snprintf(&output[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ pclose(fp);
+ if (strstr(output, "ERROR")) {
+ char *xtra = strstr(output, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", output);
+ *output='\0';
+ }
+ return (int) len;
+ }
+ return -1;
+}
+
+int nvs_set_int(int nvs_id, const char* field, int value)
+{
+ FILE *fp;
+ char command[256] = { 0 };
+ int res = -1;
+ snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value);
+ fp = popen(command, "r");
+ if (fp) {
+ char msg[512] = { 0 };
+ char buf[64] = { 0 };
+ ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+ while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+ len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+ if (len >= maxlen) break;
+ }
+ if (strstr(msg, "ERROR")) {
+ char *xtra = strstr(msg, "; please run");
+ if (xtra) *xtra = '\0'; // strip noise
+ applog(LOG_INFO, "%s", msg);
+ } else
+ res = 0;
+ pclose(fp);
+ }
+ return res;
+}
+
+int8_t nvs_devnum(int dev_id)
+{
+ return nvs_dev_map[dev_id];
+}
+
+int nvs_devid(int8_t nvs_id)
+{
+ for (int i=0; i < opt_n_threads; i++) {
+ int dev_id = device_map[i % MAX_GPUS];
+ if (nvs_dev_map[dev_id] == nvs_id)
+ return dev_id;
+ }
+ return 0;
+}
+
+int nvs_init()
+{
+ struct stat info;
+ struct timeval tv_start, tv_end, diff;
+ int x_devices = 0;
+ int n_threads = opt_n_threads;
+ if (stat(NVS_PATH, &info))
+ return -ENOENT;
+
+ gettimeofday(&tv_start, NULL);
+
+ for (int d = 0; d < MAX_GPUS; d++) {
+ // this part can be "slow" (100-200ms per device)
+ int res = nvs_query_int(d, "PCIBus", 1);
+ if (res < 0) break;
+ nvs_bus_ids[d] = 0xFFu & res;
+ x_devices++;
+ }
+
+ if (opt_debug) {
+ gettimeofday(&tv_end, NULL);
+ timeval_subtract(&diff, &tv_end, &tv_start);
+ applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms",
+ (1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
+ }
+
+ if (!x_devices)
+ return -ENODEV;
+ if (!n_threads) n_threads = cuda_num_devices();
+ for (int i = 0; i < n_threads; i++) {
+ int dev_id = device_map[i % MAX_GPUS];
+ cudaDeviceProp props;
+ if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
+ for (int8_t d = 0; d < x_devices; d++) {
+ if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) {
+ gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u",
+ (int) d, (uint) nvs_bus_ids[d]);
+ nvs_dev_map[dev_id] = d;
+ /* char buf[1024] = { 0 };
+ nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1);
+ gpulog(LOG_DEBUG, d, "%s", buf); */
+ break;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int nvs_set_clocks(int dev_id)
+{
+ int res;
+ int8_t d = nvs_devnum(dev_id);
+ if (d < 0) return -ENODEV;
+ if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0;
+ res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2);
+ if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2;
+ return res;
+}
+
+void nvs_reset_clocks(int dev_id)
+{
+ int8_t d = nvs_devnum(dev_id);
+ if (d < 0 || !nvs_clocks_set[d]) return;
+ nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0);
+ nvs_clocks_set[d] = 0;
+}
+
+#else
+int nvs_init() { return -ENOSYS; }
+int nvs_set_clocks(int dev_id) { return -ENOSYS; }
+void nvs_reset_clocks(int dev_id) { }
+#endif