Add a new cuda-schedule parameter

0: cudaDeviceScheduleAuto
1: cudaDeviceScheduleSpin
2: cudaDeviceScheduleYield
4: cudaDeviceScheduleBlockingSync

Also set the best one (4) for luffa algo by default...
This commit is contained in:
Tanguy Pruvot 2015-09-17 23:39:55 +02:00
parent 8f98bde4fb
commit cd64f9b0e7
5 changed files with 24 additions and 2 deletions

View File

@ -106,6 +106,7 @@ its command line interface and options.
-i, --intensity=N[,N] GPU threads per call 8-25 (2^N + F, default: 0=auto) -i, --intensity=N[,N] GPU threads per call 8-25 (2^N + F, default: 0=auto)
Decimals and multiple values are allowed for fine tuning Decimals and multiple values are allowed for fine tuning
--cuda-schedule Set device threads scheduling mode (default: auto)
-f, --diff-factor Divide difficulty by this factor (default 1.0) -f, --diff-factor Divide difficulty by this factor (default 1.0)
-m, --diff-multiplier Multiply difficulty by this value (default 1.0) -m, --diff-multiplier Multiply difficulty by this value (default 1.0)
--vote=VOTE block reward vote (for HeavyCoin) --vote=VOTE block reward vote (for HeavyCoin)
@ -225,6 +226,9 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Under Dev... v1.6.7
Add --cuda-schedule parameter
Aug. 28th 2015 v1.6.6 Aug. 28th 2015 v1.6.6
Allow to load remote config with curl (-c http://...) Allow to load remote config with curl (-c http://...)
Add Lyra2REv2 algo (Vertcoin/Zoom) Add Lyra2REv2 algo (Vertcoin/Zoom)

View File

@ -206,6 +206,7 @@ uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
uint32_t device_plimit[MAX_GPUS] = { 0 }; uint32_t device_plimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1 }; int8_t device_pstate[MAX_GPUS] = { -1 };
int opt_cudaschedule = -1;
static bool opt_keep_clocks = false; static bool opt_keep_clocks = false;
// un-linked to cmdline scrypt options (useless) // un-linked to cmdline scrypt options (useless)
@ -320,6 +321,7 @@ Options:\n\
(matching 2nd gt640 in the PC)\n\ (matching 2nd gt640 in the PC)\n\
-i --intensity=N[,N] GPU intensity 8.0-25.0 (default: auto) \n\ -i --intensity=N[,N] GPU intensity 8.0-25.0 (default: auto) \n\
Decimals are allowed for fine tuning \n\ Decimals are allowed for fine tuning \n\
--cuda-schedule Set device threads scheduling mode (default: auto)\n\
-f, --diff-factor Divide difficulty by this factor (default 1.0) \n\ -f, --diff-factor Divide difficulty by this factor (default 1.0) \n\
-m, --diff-multiplier Multiply difficulty by this value (default 1.0) \n\ -m, --diff-multiplier Multiply difficulty by this value (default 1.0) \n\
--vote=VOTE block reward vote (for HeavyCoin)\n\ --vote=VOTE block reward vote (for HeavyCoin)\n\
@ -393,6 +395,7 @@ struct option options[] = {
{ "cputest", 0, NULL, 1006 }, { "cputest", 0, NULL, 1006 },
{ "cpu-affinity", 1, NULL, 1020 }, { "cpu-affinity", 1, NULL, 1020 },
{ "cpu-priority", 1, NULL, 1021 }, { "cpu-priority", 1, NULL, 1021 },
{ "cuda-schedule", 1, NULL, 1025 },
{ "debug", 0, NULL, 'D' }, { "debug", 0, NULL, 'D' },
{ "help", 0, NULL, 'h' }, { "help", 0, NULL, 'h' },
{ "intensity", 1, NULL, 'i' }, { "intensity", 1, NULL, 'i' },
@ -2860,6 +2863,9 @@ void parse_arg(int key, char *arg)
show_usage_and_exit(1); show_usage_and_exit(1);
opt_priority = v; opt_priority = v;
break; break;
case 1025: // cuda-schedule
opt_cudaschedule = atoi(arg);
break;
case 1060: // max-temp case 1060: // max-temp
d = atof(arg); d = atof(arg);
opt_max_temp = d; opt_max_temp = d;
@ -3322,7 +3328,7 @@ int main(int argc, char *argv[])
/* nvml is currently not the best choice on Windows (only in x64) */ /* nvml is currently not the best choice on Windows (only in x64) */
hnvml = nvml_create(); hnvml = nvml_create();
if (hnvml) { if (hnvml) {
bool gpu_reinit = false; bool gpu_reinit = (opt_cudaschedule >= 0); //false
cuda_devicenames(); // refresh gpu vendor name cuda_devicenames(); // refresh gpu vendor name
applog(LOG_INFO, "NVML GPU monitoring enabled."); applog(LOG_INFO, "NVML GPU monitoring enabled.");
for (int n=0; n < opt_n_threads; n++) { for (int n=0; n < opt_n_threads; n++) {
@ -3332,11 +3338,18 @@ int main(int argc, char *argv[])
gpu_reinit = true; gpu_reinit = true;
if (nvml_set_clocks(hnvml, device_map[n]) == 1) if (nvml_set_clocks(hnvml, device_map[n]) == 1)
gpu_reinit = true; gpu_reinit = true;
if (gpu_reinit) if (gpu_reinit) {
cuda_reset_device(n, NULL); cuda_reset_device(n, NULL);
}
} }
} }
#endif #endif
// force reinit to set default device flags
if (opt_cudaschedule >= 0 && !hnvml) {
for (int n=0; n < opt_n_threads; n++) {
cuda_reset_device(n, NULL);
}
}
#ifdef WIN32 #ifdef WIN32
if (!hnvml && nvapi_init() == 0) if (!hnvml && nvapi_init() == 0)
applog(LOG_INFO, "NVAPI GPU monitoring enabled."); applog(LOG_INFO, "NVAPI GPU monitoring enabled.");

View File

@ -212,6 +212,8 @@ void cuda_reset_device(int thr_id, bool *init)
usleep(1000); usleep(1000);
} }
cudaDeviceReset(); cudaDeviceReset();
if (opt_cudaschedule >= 0)
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
} }
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)

View File

@ -529,6 +529,7 @@ extern char* device_name[MAX_GPUS];
extern short device_map[MAX_GPUS]; extern short device_map[MAX_GPUS];
extern long device_sm[MAX_GPUS]; extern long device_sm[MAX_GPUS];
extern uint32_t gpus_intensity[MAX_GPUS]; extern uint32_t gpus_intensity[MAX_GPUS];
extern int opt_cudaschedule;
#define CL_N "\x1B[0m" #define CL_N "\x1B[0m"
#define CL_RED "\x1B[31m" #define CL_RED "\x1B[31m"

View File

@ -44,6 +44,8 @@ extern "C" int scanhash_luffa(int thr_id, uint32_t *pdata, const uint32_t *ptarg
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
if (opt_cudaschedule == -1) // to reduce cpu usage...
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));