diff --git a/README.txt b/README.txt index 9b4548f..393d766 100644 --- a/README.txt +++ b/README.txt @@ -106,6 +106,7 @@ its command line interface and options. -i, --intensity=N[,N] GPU threads per call 8-25 (2^N + F, default: 0=auto) Decimals and multiple values are allowed for fine tuning + --cuda-schedule Set device threads scheduling mode (default: auto) -f, --diff-factor Divide difficulty by this factor (default 1.0) -m, --diff-multiplier Multiply difficulty by this value (default 1.0) --vote=VOTE block reward vote (for HeavyCoin) @@ -225,6 +226,9 @@ features. >>> RELEASE HISTORY <<< + Under Dev... v1.6.7 + Add --cuda-schedule parameter + Aug. 28th 2015 v1.6.6 Allow to load remote config with curl (-c http://...) Add Lyra2REv2 algo (Vertcoin/Zoom) diff --git a/ccminer.cpp b/ccminer.cpp index 2c9713c..d436ab9 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -206,6 +206,7 @@ uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; uint32_t device_plimit[MAX_GPUS] = { 0 }; int8_t device_pstate[MAX_GPUS] = { -1 }; +int opt_cudaschedule = -1; static bool opt_keep_clocks = false; // un-linked to cmdline scrypt options (useless) @@ -320,6 +321,7 @@ Options:\n\ (matching 2nd gt640 in the PC)\n\ -i --intensity=N[,N] GPU intensity 8.0-25.0 (default: auto) \n\ Decimals are allowed for fine tuning \n\ + --cuda-schedule Set device threads scheduling mode (default: auto)\n\ -f, --diff-factor Divide difficulty by this factor (default 1.0) \n\ -m, --diff-multiplier Multiply difficulty by this value (default 1.0) \n\ --vote=VOTE block reward vote (for HeavyCoin)\n\ @@ -393,6 +395,7 @@ struct option options[] = { { "cputest", 0, NULL, 1006 }, { "cpu-affinity", 1, NULL, 1020 }, { "cpu-priority", 1, NULL, 1021 }, + { "cuda-schedule", 1, NULL, 1025 }, { "debug", 0, NULL, 'D' }, { "help", 0, NULL, 'h' }, { "intensity", 1, NULL, 'i' }, @@ -2860,6 +2863,9 @@ void parse_arg(int key, char *arg) show_usage_and_exit(1); opt_priority = v; break; + case 1025: // cuda-schedule + opt_cudaschedule = atoi(arg); + break; case 1060: // max-temp d = atof(arg); opt_max_temp = d; @@ -3322,7 +3328,7 @@ int main(int argc, char *argv[]) /* nvml is currently not the best choice on Windows (only in x64) */ hnvml = nvml_create(); if (hnvml) { - bool gpu_reinit = false; + bool gpu_reinit = (opt_cudaschedule >= 0); //false cuda_devicenames(); // refresh gpu vendor name applog(LOG_INFO, "NVML GPU monitoring enabled."); for (int n=0; n < opt_n_threads; n++) { @@ -3332,11 +3338,18 @@ int main(int argc, char *argv[]) gpu_reinit = true; if (nvml_set_clocks(hnvml, device_map[n]) == 1) gpu_reinit = true; - if (gpu_reinit) + if (gpu_reinit) { cuda_reset_device(n, NULL); + } } } #endif + // force reinit to set default device flags + if (opt_cudaschedule >= 0 && !hnvml) { + for (int n=0; n < opt_n_threads; n++) { + cuda_reset_device(n, NULL); + } + } #ifdef WIN32 if (!hnvml && nvapi_init() == 0) applog(LOG_INFO, "NVAPI GPU monitoring enabled."); diff --git a/cuda.cpp b/cuda.cpp index 4dc9885..679e77f 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -212,6 +212,8 @@ void cuda_reset_device(int thr_id, bool *init) usleep(1000); } cudaDeviceReset(); + if (opt_cudaschedule >= 0) + cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask)); } void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) diff --git a/miner.h b/miner.h index 3491a54..683a024 100644 --- a/miner.h +++ b/miner.h @@ -529,6 +529,7 @@ extern char* device_name[MAX_GPUS]; extern short device_map[MAX_GPUS]; extern long device_sm[MAX_GPUS]; extern uint32_t gpus_intensity[MAX_GPUS]; +extern int opt_cudaschedule; #define CL_N "\x1B[0m" #define CL_RED "\x1B[31m" diff --git a/qubit/luffa.cu b/qubit/luffa.cu index 35547c5..e0960c9 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -44,6 +44,8 @@ extern "C" int scanhash_luffa(int thr_id, uint32_t *pdata, const uint32_t *ptarg if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1) // to reduce cpu usage... + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));