scrypt: add --interactive option

also tested batchsize and texturecache but seems useless, they can be tested/tuned directly in kernel variants... Note: -i cuduminer param is already used in ccminer (--intensity)
10 years ago · c7698afb4a
4 changed files with 30 additions and 13 deletions
--- a/README.txt
+++ b/README.txt
@ -137,6 +137,9 @@ its command line interface and options.
 Scrypt specific options:
  -l, --launch-config   gives the launch configuration for each kernel
                        in a comma separated list, one per device.
      --interactive     comma separated list of flags (0/1) specifying
                        which of the CUDA device you need to run at inter-
                        active frame rates (because it drives a display).
  -L, --lookup-gap      Divides the per-hash memory requirement by this factor
                        by storing only every N'th value in the scratchpad.
                        Default is 1.
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -191,8 +191,7 @@ short device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
-// un-implemented scrypt options
+// un-linked to cmdline scrypt options (useless)
 int device_interactive[MAX_GPUS] = { 0 };
 int device_batchsize[MAX_GPUS] = { 0 };
 int device_texturecache[MAX_GPUS] = { 0 };
 int device_singlememory[MAX_GPUS] = { 0 };
@ -201,6 +200,7 @@ int parallel = 2; // All should be made on GPU
 char *device_config[MAX_GPUS] = { 0 };
 int device_backoff[MAX_GPUS] = { 0 };
 int device_lookup_gap[MAX_GPUS] = { 0 };
 int device_interactive[MAX_GPUS] = { 0 };
 int opt_nfactor = 0;
 bool opt_autotune = true;
 bool abort_flag = false;
@ -375,6 +375,7 @@ static struct option const options[] = {
 	{ "no-longpoll", 0, NULL, 1003 },
 	{ "no-stratum", 0, NULL, 1007 },
 	{ "no-autotune", 0, NULL, 1004 },  // scrypt
 	{ "interactive", 1, NULL, 1050 },  // scrypt
 	{ "launch-config", 0, NULL, 'l' }, // scrypt
 	{ "lookup-gap", 0, NULL, 'L' },    // scrypt
 	{ "pass", 1, NULL, 'p' },
@ -410,6 +411,9 @@ Scrypt specific options:\n\
  -L, --lookup-gap      Divides the per-hash memory requirement by this factor\n\
                        by storing only every N'th value in the scratchpad.\n\
                        Default is 1.\n\
      --interactive     comma separated list of flags (0/1) specifying\n\
                        which of the CUDA device you need to run at inter-\n\
                        active frame rates (because it drives a display).\n\
      --no-autotune     disable auto-tuning of kernel launch parameters\n\
 ";
@ -2309,7 +2313,7 @@ void parse_arg(int key, char *arg)
 	case 'L': /* scrypt --lookup-gap */
 		{
 			char *pch = strtok(arg,",");
-			int n = 0, last = 0;
+			int n = 0, last = atoi(arg);
 			while (pch != NULL) {
 				device_lookup_gap[n++] = last = atoi(pch);
 				pch = strtok(NULL, ",");
@ -2318,6 +2322,18 @@ void parse_arg(int key, char *arg)
 				device_lookup_gap[n++] = last;
 		}
 		break;
 	case 1050: /* scrypt --interactive */
 		{
 			char *pch = strtok(arg,",");
 			int n = 0, last = atoi(arg);
 			while (pch != NULL) {
 				device_interactive[n++] = last = atoi(pch);
 				pch = strtok(NULL, ",");
 			}
 			while (n < MAX_GPUS)
 				device_interactive[n++] = last;
 		}
 		break;
 	case 1005:
 		opt_benchmark = true;
 		want_longpoll = false;
@ -2582,14 +2598,13 @@ int main(int argc, char *argv[])
 	for (i = 0; i < MAX_GPUS; i++) {
 		device_map[i] = i;
 		device_name[i] = NULL;
-		// for future use, maybe
+		device_config[i] = NULL;
 		device_interactive[i] = -1;
 		device_batchsize[i] = 1024;
 		device_backoff[i] = is_windows() ? 12 : 2;
 		device_lookup_gap[i] = 1;
 		device_batchsize[i] = 1024;
 		device_interactive[i] = -1;
 		device_texturecache[i] = -1;
 		device_singlememory[i] = -1;
 		device_config[i] = NULL;
 	}
 	// number of gpus
--- a/scrypt/salsa_kernel.cu
+++ b/scrypt/salsa_kernel.cu
@ -307,9 +307,6 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
 	checkCudaErrors(cudaGetDeviceProperties(&props, device_map[thr_id]));
 	concurrent = (props.concurrentKernels > 0);
 	device_name[thr_id] = strdup(props.name);
 	applog(LOG_INFO, "GPU #%d: %s with SM %d.%d", device_map[thr_id], props.name, props.major, props.minor);
 	WARPS_PER_BLOCK = -1;
 	// if not specified, use interactive mode for devices that have the watchdog timer enabled
@ -375,10 +372,12 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
 		device_lookup_gap[thr_id] = 1;
 	}
-	applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id],
+	if (opt_debug) {
 		applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id],
 		   (device_interactive[thr_id]  != 0) ? 1 : 0,
 		   (device_texturecache[thr_id] != 0) ? device_texturecache[thr_id] : 0, (device_texturecache[thr_id] != 0) ? "D" : "",
 		   (device_singlememory[thr_id] != 0) ? 1 : 0 );
 	}
 	// number of threads collaborating on one work unit (hash)
 	unsigned int THREADS_PER_WU = kernel->threads_per_wu();
@ -814,7 +813,7 @@ void cuda_scrypt_serialize(int thr_id, int stream)
 {
 	// if the device can concurrently execute multiple kernels, then we must
 	// wait for the serialization event recorded by the other stream
-	//if (context_concurrent[thr_id] || device_interactive[thr_id])
+	if (context_concurrent[thr_id] || device_interactive[thr_id])
 		cudaStreamWaitEvent(context_streams[stream][thr_id], context_serialize[(stream+1)&1][thr_id], 0);
 }
--- a/scrypt/salsa_kernel.h
+++ b/scrypt/salsa_kernel.h
@ -13,8 +13,8 @@
 // from ccminer.cpp
 extern short device_map[MAX_GPUS];
 extern int device_interactive[MAX_GPUS]; // cudaminer -i
 extern int device_batchsize[MAX_GPUS]; // cudaminer -b
 extern int device_interactive[MAX_GPUS]; // cudaminer -i
 extern int device_texturecache[MAX_GPUS]; // cudaminer -C
 extern int device_singlememory[MAX_GPUS]; // cudaminer -m
 extern int device_lookup_gap[MAX_GPUS]; // -L