scrypt: add --interactive option

also tested batchsize and texturecache but seems useless,
they can be tested/tuned directly in kernel variants...

Note: -i cuduminer param is already used in ccminer (--intensity)
This commit is contained in:
Tanguy Pruvot 2015-04-19 15:13:16 +02:00
parent c6a94c9998
commit c7698afb4a
4 changed files with 30 additions and 13 deletions

View File

@ -137,6 +137,9 @@ its command line interface and options.
Scrypt specific options:
-l, --launch-config gives the launch configuration for each kernel
in a comma separated list, one per device.
--interactive comma separated list of flags (0/1) specifying
which of the CUDA device you need to run at inter-
active frame rates (because it drives a display).
-L, --lookup-gap Divides the per-hash memory requirement by this factor
by storing only every N'th value in the scratchpad.
Default is 1.

View File

@ -191,8 +191,7 @@ short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
// un-implemented scrypt options
int device_interactive[MAX_GPUS] = { 0 };
// un-linked to cmdline scrypt options (useless)
int device_batchsize[MAX_GPUS] = { 0 };
int device_texturecache[MAX_GPUS] = { 0 };
int device_singlememory[MAX_GPUS] = { 0 };
@ -201,6 +200,7 @@ int parallel = 2; // All should be made on GPU
char *device_config[MAX_GPUS] = { 0 };
int device_backoff[MAX_GPUS] = { 0 };
int device_lookup_gap[MAX_GPUS] = { 0 };
int device_interactive[MAX_GPUS] = { 0 };
int opt_nfactor = 0;
bool opt_autotune = true;
bool abort_flag = false;
@ -375,6 +375,7 @@ static struct option const options[] = {
{ "no-longpoll", 0, NULL, 1003 },
{ "no-stratum", 0, NULL, 1007 },
{ "no-autotune", 0, NULL, 1004 }, // scrypt
{ "interactive", 1, NULL, 1050 }, // scrypt
{ "launch-config", 0, NULL, 'l' }, // scrypt
{ "lookup-gap", 0, NULL, 'L' }, // scrypt
{ "pass", 1, NULL, 'p' },
@ -410,6 +411,9 @@ Scrypt specific options:\n\
-L, --lookup-gap Divides the per-hash memory requirement by this factor\n\
by storing only every N'th value in the scratchpad.\n\
Default is 1.\n\
--interactive comma separated list of flags (0/1) specifying\n\
which of the CUDA device you need to run at inter-\n\
active frame rates (because it drives a display).\n\
--no-autotune disable auto-tuning of kernel launch parameters\n\
";
@ -2309,7 +2313,7 @@ void parse_arg(int key, char *arg)
case 'L': /* scrypt --lookup-gap */
{
char *pch = strtok(arg,",");
int n = 0, last = 0;
int n = 0, last = atoi(arg);
while (pch != NULL) {
device_lookup_gap[n++] = last = atoi(pch);
pch = strtok(NULL, ",");
@ -2318,6 +2322,18 @@ void parse_arg(int key, char *arg)
device_lookup_gap[n++] = last;
}
break;
case 1050: /* scrypt --interactive */
{
char *pch = strtok(arg,",");
int n = 0, last = atoi(arg);
while (pch != NULL) {
device_interactive[n++] = last = atoi(pch);
pch = strtok(NULL, ",");
}
while (n < MAX_GPUS)
device_interactive[n++] = last;
}
break;
case 1005:
opt_benchmark = true;
want_longpoll = false;
@ -2582,14 +2598,13 @@ int main(int argc, char *argv[])
for (i = 0; i < MAX_GPUS; i++) {
device_map[i] = i;
device_name[i] = NULL;
// for future use, maybe
device_interactive[i] = -1;
device_batchsize[i] = 1024;
device_config[i] = NULL;
device_backoff[i] = is_windows() ? 12 : 2;
device_lookup_gap[i] = 1;
device_batchsize[i] = 1024;
device_interactive[i] = -1;
device_texturecache[i] = -1;
device_singlememory[i] = -1;
device_config[i] = NULL;
}
// number of gpus

View File

@ -307,9 +307,6 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
checkCudaErrors(cudaGetDeviceProperties(&props, device_map[thr_id]));
concurrent = (props.concurrentKernels > 0);
device_name[thr_id] = strdup(props.name);
applog(LOG_INFO, "GPU #%d: %s with SM %d.%d", device_map[thr_id], props.name, props.major, props.minor);
WARPS_PER_BLOCK = -1;
// if not specified, use interactive mode for devices that have the watchdog timer enabled
@ -375,10 +372,12 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
device_lookup_gap[thr_id] = 1;
}
applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id],
if (opt_debug) {
applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id],
(device_interactive[thr_id] != 0) ? 1 : 0,
(device_texturecache[thr_id] != 0) ? device_texturecache[thr_id] : 0, (device_texturecache[thr_id] != 0) ? "D" : "",
(device_singlememory[thr_id] != 0) ? 1 : 0 );
}
// number of threads collaborating on one work unit (hash)
unsigned int THREADS_PER_WU = kernel->threads_per_wu();
@ -814,7 +813,7 @@ void cuda_scrypt_serialize(int thr_id, int stream)
{
// if the device can concurrently execute multiple kernels, then we must
// wait for the serialization event recorded by the other stream
//if (context_concurrent[thr_id] || device_interactive[thr_id])
if (context_concurrent[thr_id] || device_interactive[thr_id])
cudaStreamWaitEvent(context_streams[stream][thr_id], context_serialize[(stream+1)&1][thr_id], 0);
}

View File

@ -13,8 +13,8 @@
// from ccminer.cpp
extern short device_map[MAX_GPUS];
extern int device_interactive[MAX_GPUS]; // cudaminer -i
extern int device_batchsize[MAX_GPUS]; // cudaminer -b
extern int device_interactive[MAX_GPUS]; // cudaminer -i
extern int device_texturecache[MAX_GPUS]; // cudaminer -C
extern int device_singlememory[MAX_GPUS]; // cudaminer -m
extern int device_lookup_gap[MAX_GPUS]; // -L