Browse Source

scrypt: add --interactive option

also tested batchsize and texturecache but seems useless,
they can be tested/tuned directly in kernel variants...

Note: -i cuduminer param is already used in ccminer (--intensity)
master
Tanguy Pruvot 10 years ago
parent
commit
c7698afb4a
  1. 3
      README.txt
  2. 29
      ccminer.cpp
  3. 9
      scrypt/salsa_kernel.cu
  4. 2
      scrypt/salsa_kernel.h

3
README.txt

@ -137,6 +137,9 @@ its command line interface and options.
Scrypt specific options: Scrypt specific options:
-l, --launch-config gives the launch configuration for each kernel -l, --launch-config gives the launch configuration for each kernel
in a comma separated list, one per device. in a comma separated list, one per device.
--interactive comma separated list of flags (0/1) specifying
which of the CUDA device you need to run at inter-
active frame rates (because it drives a display).
-L, --lookup-gap Divides the per-hash memory requirement by this factor -L, --lookup-gap Divides the per-hash memory requirement by this factor
by storing only every N'th value in the scratchpad. by storing only every N'th value in the scratchpad.
Default is 1. Default is 1.

29
ccminer.cpp

@ -191,8 +191,7 @@ short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 };
// un-implemented scrypt options // un-linked to cmdline scrypt options (useless)
int device_interactive[MAX_GPUS] = { 0 };
int device_batchsize[MAX_GPUS] = { 0 }; int device_batchsize[MAX_GPUS] = { 0 };
int device_texturecache[MAX_GPUS] = { 0 }; int device_texturecache[MAX_GPUS] = { 0 };
int device_singlememory[MAX_GPUS] = { 0 }; int device_singlememory[MAX_GPUS] = { 0 };
@ -201,6 +200,7 @@ int parallel = 2; // All should be made on GPU
char *device_config[MAX_GPUS] = { 0 }; char *device_config[MAX_GPUS] = { 0 };
int device_backoff[MAX_GPUS] = { 0 }; int device_backoff[MAX_GPUS] = { 0 };
int device_lookup_gap[MAX_GPUS] = { 0 }; int device_lookup_gap[MAX_GPUS] = { 0 };
int device_interactive[MAX_GPUS] = { 0 };
int opt_nfactor = 0; int opt_nfactor = 0;
bool opt_autotune = true; bool opt_autotune = true;
bool abort_flag = false; bool abort_flag = false;
@ -375,6 +375,7 @@ static struct option const options[] = {
{ "no-longpoll", 0, NULL, 1003 }, { "no-longpoll", 0, NULL, 1003 },
{ "no-stratum", 0, NULL, 1007 }, { "no-stratum", 0, NULL, 1007 },
{ "no-autotune", 0, NULL, 1004 }, // scrypt { "no-autotune", 0, NULL, 1004 }, // scrypt
{ "interactive", 1, NULL, 1050 }, // scrypt
{ "launch-config", 0, NULL, 'l' }, // scrypt { "launch-config", 0, NULL, 'l' }, // scrypt
{ "lookup-gap", 0, NULL, 'L' }, // scrypt { "lookup-gap", 0, NULL, 'L' }, // scrypt
{ "pass", 1, NULL, 'p' }, { "pass", 1, NULL, 'p' },
@ -410,6 +411,9 @@ Scrypt specific options:\n\
-L, --lookup-gap Divides the per-hash memory requirement by this factor\n\ -L, --lookup-gap Divides the per-hash memory requirement by this factor\n\
by storing only every N'th value in the scratchpad.\n\ by storing only every N'th value in the scratchpad.\n\
Default is 1.\n\ Default is 1.\n\
--interactive comma separated list of flags (0/1) specifying\n\
which of the CUDA device you need to run at inter-\n\
active frame rates (because it drives a display).\n\
--no-autotune disable auto-tuning of kernel launch parameters\n\ --no-autotune disable auto-tuning of kernel launch parameters\n\
"; ";
@ -2309,7 +2313,7 @@ void parse_arg(int key, char *arg)
case 'L': /* scrypt --lookup-gap */ case 'L': /* scrypt --lookup-gap */
{ {
char *pch = strtok(arg,","); char *pch = strtok(arg,",");
int n = 0, last = 0; int n = 0, last = atoi(arg);
while (pch != NULL) { while (pch != NULL) {
device_lookup_gap[n++] = last = atoi(pch); device_lookup_gap[n++] = last = atoi(pch);
pch = strtok(NULL, ","); pch = strtok(NULL, ",");
@ -2318,6 +2322,18 @@ void parse_arg(int key, char *arg)
device_lookup_gap[n++] = last; device_lookup_gap[n++] = last;
} }
break; break;
case 1050: /* scrypt --interactive */
{
char *pch = strtok(arg,",");
int n = 0, last = atoi(arg);
while (pch != NULL) {
device_interactive[n++] = last = atoi(pch);
pch = strtok(NULL, ",");
}
while (n < MAX_GPUS)
device_interactive[n++] = last;
}
break;
case 1005: case 1005:
opt_benchmark = true; opt_benchmark = true;
want_longpoll = false; want_longpoll = false;
@ -2582,14 +2598,13 @@ int main(int argc, char *argv[])
for (i = 0; i < MAX_GPUS; i++) { for (i = 0; i < MAX_GPUS; i++) {
device_map[i] = i; device_map[i] = i;
device_name[i] = NULL; device_name[i] = NULL;
// for future use, maybe device_config[i] = NULL;
device_interactive[i] = -1;
device_batchsize[i] = 1024;
device_backoff[i] = is_windows() ? 12 : 2; device_backoff[i] = is_windows() ? 12 : 2;
device_lookup_gap[i] = 1; device_lookup_gap[i] = 1;
device_batchsize[i] = 1024;
device_interactive[i] = -1;
device_texturecache[i] = -1; device_texturecache[i] = -1;
device_singlememory[i] = -1; device_singlememory[i] = -1;
device_config[i] = NULL;
} }
// number of gpus // number of gpus

9
scrypt/salsa_kernel.cu

@ -307,9 +307,6 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
checkCudaErrors(cudaGetDeviceProperties(&props, device_map[thr_id])); checkCudaErrors(cudaGetDeviceProperties(&props, device_map[thr_id]));
concurrent = (props.concurrentKernels > 0); concurrent = (props.concurrentKernels > 0);
device_name[thr_id] = strdup(props.name);
applog(LOG_INFO, "GPU #%d: %s with SM %d.%d", device_map[thr_id], props.name, props.major, props.minor);
WARPS_PER_BLOCK = -1; WARPS_PER_BLOCK = -1;
// if not specified, use interactive mode for devices that have the watchdog timer enabled // if not specified, use interactive mode for devices that have the watchdog timer enabled
@ -375,10 +372,12 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
device_lookup_gap[thr_id] = 1; device_lookup_gap[thr_id] = 1;
} }
applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id], if (opt_debug) {
applog(LOG_INFO, "GPU #%d: interactive: %d, tex-cache: %d%s, single-alloc: %d", device_map[thr_id],
(device_interactive[thr_id] != 0) ? 1 : 0, (device_interactive[thr_id] != 0) ? 1 : 0,
(device_texturecache[thr_id] != 0) ? device_texturecache[thr_id] : 0, (device_texturecache[thr_id] != 0) ? "D" : "", (device_texturecache[thr_id] != 0) ? device_texturecache[thr_id] : 0, (device_texturecache[thr_id] != 0) ? "D" : "",
(device_singlememory[thr_id] != 0) ? 1 : 0 ); (device_singlememory[thr_id] != 0) ? 1 : 0 );
}
// number of threads collaborating on one work unit (hash) // number of threads collaborating on one work unit (hash)
unsigned int THREADS_PER_WU = kernel->threads_per_wu(); unsigned int THREADS_PER_WU = kernel->threads_per_wu();
@ -814,7 +813,7 @@ void cuda_scrypt_serialize(int thr_id, int stream)
{ {
// if the device can concurrently execute multiple kernels, then we must // if the device can concurrently execute multiple kernels, then we must
// wait for the serialization event recorded by the other stream // wait for the serialization event recorded by the other stream
//if (context_concurrent[thr_id] || device_interactive[thr_id]) if (context_concurrent[thr_id] || device_interactive[thr_id])
cudaStreamWaitEvent(context_streams[stream][thr_id], context_serialize[(stream+1)&1][thr_id], 0); cudaStreamWaitEvent(context_streams[stream][thr_id], context_serialize[(stream+1)&1][thr_id], 0);
} }

2
scrypt/salsa_kernel.h

@ -13,8 +13,8 @@
// from ccminer.cpp // from ccminer.cpp
extern short device_map[MAX_GPUS]; extern short device_map[MAX_GPUS];
extern int device_interactive[MAX_GPUS]; // cudaminer -i
extern int device_batchsize[MAX_GPUS]; // cudaminer -b extern int device_batchsize[MAX_GPUS]; // cudaminer -b
extern int device_interactive[MAX_GPUS]; // cudaminer -i
extern int device_texturecache[MAX_GPUS]; // cudaminer -C extern int device_texturecache[MAX_GPUS]; // cudaminer -C
extern int device_singlememory[MAX_GPUS]; // cudaminer -m extern int device_singlememory[MAX_GPUS]; // cudaminer -m
extern int device_lookup_gap[MAX_GPUS]; // -L extern int device_lookup_gap[MAX_GPUS]; // -L

Loading…
Cancel
Save