From 39f7d2fa74567773549df6a04358b05b994176cc Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 21 Jul 2012 17:31:06 +1000 Subject: [PATCH] Allow lookup gap and thread concurrency to be passed per device and store details in kernel binary filename. --- cgminer.c | 10 ++++++++++ driver-opencl.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ driver-opencl.h | 4 ++++ miner.h | 4 ++++ ocl.c | 40 +++++++++++++++++++++---------------- ocl.h | 2 -- 6 files changed, 93 insertions(+), 19 deletions(-) diff --git a/cgminer.c b/cgminer.c index f2ea3826..b37814dc 100644 --- a/cgminer.c +++ b/cgminer.c @@ -853,6 +853,11 @@ static struct opt_table opt_config_table[] = { OPT_WITH_ARG("--gpu-vddc", set_gpu_vddc, NULL, NULL, "Set the GPU voltage in Volts - one value for all or separate by commas for per card"), +#endif +#ifdef USE_SCRYPT + OPT_WITH_ARG("--lookup-gap", + set_lookup_gap, NULL, NULL, + "Set GPU lookup gap for scrypt mining, comma separated"), #endif OPT_WITH_ARG("--intensity|-I", set_intensity, NULL, NULL, @@ -999,6 +1004,11 @@ static struct opt_table opt_config_table[] = { opt_hidden #endif ), +#ifdef USE_SCRYPT + OPT_WITH_ARG("--thread-concurrency", + set_thread_concurrency, NULL, NULL, + "Set GPU thread concurrency for scrypt mining, comma separated"), +#endif OPT_WITH_ARG("--url|-o", set_url, NULL, NULL, "URL for bitcoin JSON-RPC server"), diff --git a/driver-opencl.c b/driver-opencl.c index e44faeff..059a7ece 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -127,6 +127,58 @@ char *set_worksize(char *arg) return NULL; } +#ifdef USE_SCRYPT +char *set_lookup_gap(char *arg) +{ + int i, val = 0, device = 0; + char *nextptr; + + nextptr = strtok(arg, ","); + if (nextptr == NULL) + return "Invalid parameters for set lookup gap"; + val = atoi(nextptr); + + gpus[device++].lookup_gap = val; + + while ((nextptr = strtok(NULL, ",")) != NULL) { + val = atoi(nextptr); + + gpus[device++].lookup_gap = val; + } + if (device == 1) { + for (i = device; i < MAX_GPUDEVICES; i++) + gpus[i].lookup_gap = gpus[0].lookup_gap; + } + + return NULL; +} + +char *set_thread_concurrency(char *arg) +{ + int i, val = 0, device = 0; + char *nextptr; + + nextptr = strtok(arg, ","); + if (nextptr == NULL) + return "Invalid parameters for set thread concurrency"; + val = atoi(nextptr); + + gpus[device++].thread_concurrency = val; + + while ((nextptr = strtok(NULL, ",")) != NULL) { + val = atoi(nextptr); + + gpus[device++].thread_concurrency = val; + } + if (device == 1) { + for (i = device; i < MAX_GPUDEVICES; i++) + gpus[i].thread_concurrency = gpus[0].thread_concurrency; + } + + return NULL; +} +#endif + static enum cl_kernels select_kernel(char *arg) { if (!strcmp(arg, "diablo")) diff --git a/driver-opencl.h b/driver-opencl.h index 600bd854..f09571b9 100644 --- a/driver-opencl.h +++ b/driver-opencl.h @@ -18,6 +18,10 @@ extern char *set_temp_target(char *arg); extern char *set_intensity(char *arg); extern char *set_vector(char *arg); extern char *set_worksize(char *arg); +#ifdef USE_SCRYPT +extern char *set_lookup_gap(char *arg); +extern char *set_thread_concurrency(char *arg); +#endif extern char *set_kernel(char *arg); void manage_gpu(void); extern void pause_dynamic_threads(int gpu); diff --git a/miner.h b/miner.h index 5cc683ce..65c8fa14 100644 --- a/miner.h +++ b/miner.h @@ -360,6 +360,10 @@ struct cgpu_info { size_t work_size; enum cl_kernels kernel; +#ifdef USE_SCRYPT + int lookup_gap; + int thread_concurrency; +#endif struct timeval tv_gpustart;; struct timeval tv_gpuend; double gpu_us_average; diff --git a/ocl.c b/ocl.c index 8f70a395..4f21b2bb 100644 --- a/ocl.c +++ b/ocl.c @@ -367,6 +367,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) * compiler to ensure we only load a binary that matches what would * have otherwise created. The filename is: * name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin + * For scrypt the filename is: + * name + kernelname + g + lg + lookup_gap + tc + thread_concurrency + w + work_size + l + sizeof(long) + .bin */ char binaryfilename[255]; char filename[255]; @@ -461,6 +463,15 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth; gpus[gpu].work_size = clState->wsize; +#ifdef USE_SCRYPT + if (opt_scrypt) { + if (!gpus[gpu].lookup_gap) + gpus[gpu].lookup_gap = 2; + if (!gpus[gpu].thread_concurrency) + gpus[gpu].thread_concurrency = 2048; + } +#endif + FILE *binaryfile; size_t *binary_sizes; char **binaries; @@ -485,24 +496,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) return NULL; } -#ifdef USE_SCRYPT - if (opt_scrypt) { - clState->lookup_gap = 1; - clState->thread_concurrency = 6144; - } -#endif - strcat(binaryfilename, name); if (clState->goffset) strcat(binaryfilename, "g"); - strcat(binaryfilename, "v"); - sprintf(numbuf, "%d", clState->vwidth); - strcat(binaryfilename, numbuf); - strcat(binaryfilename, "w"); - sprintf(numbuf, "%d", (int)clState->wsize); + if (opt_scrypt) { + sprintf(numbuf, "lg%dtc%d", gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency); + strcat(binaryfilename, numbuf); + } else { + sprintf(numbuf, "v%d", clState->vwidth); + strcat(binaryfilename, numbuf); + } + sprintf(numbuf, "w%d", (int)clState->wsize); strcat(binaryfilename, numbuf); - strcat(binaryfilename, "l"); - sprintf(numbuf, "%d", (int)sizeof(long)); + sprintf(numbuf, "l%d", (int)sizeof(long)); strcat(binaryfilename, numbuf); strcat(binaryfilename, ".bin"); @@ -566,7 +572,7 @@ build: #ifdef USE_SCRYPT if (opt_scrypt) sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", - (int)clState->lookup_gap, (int)clState->thread_concurrency, (int)clState->wsize); + gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency, (int)clState->wsize); else #endif { @@ -753,8 +759,8 @@ built: #ifdef USE_SCRYPT if (opt_scrypt) { - size_t ipt = (1024 / clState->lookup_gap + (1024 % clState->lookup_gap > 0)); - size_t bufsize = 128 * ipt * clState->thread_concurrency; + size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0)); + size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency; clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 80, NULL, &status); clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); diff --git a/ocl.h b/ocl.h index 56fa9b0d..984e7d62 100644 --- a/ocl.h +++ b/ocl.h @@ -22,8 +22,6 @@ typedef struct { #ifdef USE_SCRYPT cl_mem CLbuffer0; cl_mem padbuffer8; - size_t lookup_gap; - size_t thread_concurrency; size_t padbufsize; void * cldata; #endif