From c7698afb4ac45ee4a1a6fc33855c210e6ec30261 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 19 Apr 2015 15:13:16 +0200 Subject: [PATCH] scrypt: add --interactive option also tested batchsize and texturecache but seems useless, they can be tested/tuned directly in kernel variants... Note: -i cuduminer param is already used in ccminer (--intensity) --- README.txt | 3 +++ ccminer.cpp | 29 ++++++++++++++++++++++------- scrypt/salsa_kernel.cu | 9 ++++----- scrypt/salsa_kernel.h | 2 +- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/README.txt b/README.txt index 6b10f8d..1a2270f 100644 --- a/README.txt +++ b/README.txt @@ -137,6 +137,9 @@ its command line interface and options. Scrypt specific options: -l, --launch-config gives the launch configuration for each kernel in a comma separated list, one per device. + --interactive comma separated list of flags (0/1) specifying + which of the CUDA device you need to run at inter- + active frame rates (because it drives a display). -L, --lookup-gap Divides the per-hash memory requirement by this factor by storing only every N'th value in the scratchpad. Default is 1. diff --git a/ccminer.cpp b/ccminer.cpp index c699211..49c49a6 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -191,8 +191,7 @@ short device_map[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 }; -// un-implemented scrypt options -int device_interactive[MAX_GPUS] = { 0 }; +// un-linked to cmdline scrypt options (useless) int device_batchsize[MAX_GPUS] = { 0 }; int device_texturecache[MAX_GPUS] = { 0 }; int device_singlememory[MAX_GPUS] = { 0 }; @@ -201,6 +200,7 @@ int parallel = 2; // All should be made on GPU char *device_config[MAX_GPUS] = { 0 }; int device_backoff[MAX_GPUS] = { 0 }; int device_lookup_gap[MAX_GPUS] = { 0 }; +int device_interactive[MAX_GPUS] = { 0 }; int opt_nfactor = 0; bool opt_autotune = true; bool abort_flag = false; @@ -375,6 +375,7 @@ static struct option const options[] = { { "no-longpoll", 0, NULL, 1003 }, { "no-stratum", 0, NULL, 1007 }, { "no-autotune", 0, NULL, 1004 }, // scrypt + { "interactive", 1, NULL, 1050 }, // scrypt { "launch-config", 0, NULL, 'l' }, // scrypt { "lookup-gap", 0, NULL, 'L' }, // scrypt { "pass", 1, NULL, 'p' }, @@ -410,6 +411,9 @@ Scrypt specific options:\n\ -L, --lookup-gap Divides the per-hash memory requirement by this factor\n\ by storing only every N'th value in the scratchpad.\n\ Default is 1.\n\ + --interactive comma separated list of flags (0/1) specifying\n\ + which of the CUDA device you need to run at inter-\n\ + active frame rates (because it drives a display).\n\ --no-autotune disable auto-tuning of kernel launch parameters\n\ "; @@ -2309,7 +2313,7 @@ void parse_arg(int key, char *arg) case 'L': /* scrypt --lookup-gap */ { char *pch = strtok(arg,","); - int n = 0, last = 0; + int n = 0, last = atoi(arg); while (pch != NULL) { device_lookup_gap[n++] = last = atoi(pch); pch = strtok(NULL, ","); @@ -2318,6 +2322,18 @@ void parse_arg(int key, char *arg) device_lookup_gap[n++] = last; } break; + case 1050: /* scrypt --interactive */ + { + char *pch = strtok(arg,","); + int n = 0, last = atoi(arg); + while (pch != NULL) { + device_interactive[n++] = last = atoi(pch); + pch = strtok(NULL, ","); + } + while (n < MAX_GPUS) + device_interactive[n++] = last; + } + break; case 1005: opt_benchmark = true; want_longpoll = false; @@ -2582,14 +2598,13 @@ int main(int argc, char *argv[]) for (i = 0; i < MAX_GPUS; i++) { device_map[i] = i; device_name[i] = NULL; - // for future use, maybe - device_interactive[i] = -1; - device_batchsize[i] = 1024; + device_config[i] = NULL; device_backoff[i] = is_windows() ? 12 : 2; device_lookup_gap[i] = 1; + device_batchsize[i] = 1024; + device_interactive[i] = -1; device_texturecache[i] = -1; device_singlememory[i] = -1; - device_config[i] = NULL; } // number of gpus diff --git a/scrypt/salsa_kernel.cu b/scrypt/salsa_kernel.cu index f17141a..2eabb4b 100644 --- a/scrypt/salsa_kernel.cu +++ b/scrypt/salsa_kernel.cu @@ -307,9 +307,6 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre checkCudaErrors(cudaGetDeviceProperties(&props, device_map[thr_id])); concurrent = (props.concurrentKernels > 0); - device_name[thr_id] = strdup(props.name); - applog(LOG_INFO, "GPU #%d: %s with SM %d.%d", device_map[thr_id], props.name, props.major, props.minor); - WARPS_PER_BLOCK = -1; // if not specified, use interactive mode for devices that have the watchdog timer enabled @@ -375,10 +372,12 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre device_lookup_gap[thr_id] = 1; } - applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id], + if (opt_debug) { + applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id], (device_interactive[thr_id] != 0) ? 1 : 0, (device_texturecache[thr_id] != 0) ? device_texturecache[thr_id] : 0, (device_texturecache[thr_id] != 0) ? "D" : "", (device_singlememory[thr_id] != 0) ? 1 : 0 ); + } // number of threads collaborating on one work unit (hash) unsigned int THREADS_PER_WU = kernel->threads_per_wu(); @@ -814,7 +813,7 @@ void cuda_scrypt_serialize(int thr_id, int stream) { // if the device can concurrently execute multiple kernels, then we must // wait for the serialization event recorded by the other stream - //if (context_concurrent[thr_id] || device_interactive[thr_id]) + if (context_concurrent[thr_id] || device_interactive[thr_id]) cudaStreamWaitEvent(context_streams[stream][thr_id], context_serialize[(stream+1)&1][thr_id], 0); } diff --git a/scrypt/salsa_kernel.h b/scrypt/salsa_kernel.h index 649aeb8..5e2d7f7 100644 --- a/scrypt/salsa_kernel.h +++ b/scrypt/salsa_kernel.h @@ -13,8 +13,8 @@ // from ccminer.cpp extern short device_map[MAX_GPUS]; -extern int device_interactive[MAX_GPUS]; // cudaminer -i extern int device_batchsize[MAX_GPUS]; // cudaminer -b +extern int device_interactive[MAX_GPUS]; // cudaminer -i extern int device_texturecache[MAX_GPUS]; // cudaminer -C extern int device_singlememory[MAX_GPUS]; // cudaminer -m extern int device_lookup_gap[MAX_GPUS]; // -L