From 7aeae40af22e6108aab8b68a229eea25a639d650 Mon Sep 17 00:00:00 2001 From: Martin Danielsen Date: Sat, 18 Jan 2014 16:49:15 +0100 Subject: [PATCH] EXPERIMENTAL: A new way of setting intensity; introducing xintensity! All of this is credited to ArGee of RGMiner, he did the initial ground work for this setting. This new setting allows for a much finer grained intensity setting and also opens up for dual gpu threads on devices not previously able to. Note: make sure to use lower thread-concurrency values when you increase cpu threads. Intensity is currently used to spawn GPU threads as a simple 2^value setting. I:13 = 8192 threads I:15 = 32768 threads I:17 = 131072 threads I:18 = 262144 threads I:19 = 524288 threads I:20 = 1048576 threads Notice how the higher settings increase thread count tremendously. Now enter the xintensity setting (Yes, I am a genius with my naming convention!). It is simply a shader multiplier, obviously based on the amount of shaders you got on a card, this should allow the same value to scale with different card models. 6970 with 1536 shaders: xI:64 = 98304 threads R9 280X with 2048 shaders: xI:64 = 131072 threads R9 290 with 2560 shaders: xI:64 = 180224 threads R9 290X with 2816 shaders: xI:64 = 163840 threads 6970 with 1536 shaders: xI:300 = 460800 threads R9 280X with 2048 shaders: xI:300 = 614400 threads R9 290 with 2560 shaders: xI:300 = 768000 threads R9 290X with 2816 shaders: xI:300 = 844800 threads It's now much easier to control thread intensity and it potentially allows for a uniform way of setting the intensity on your system. I'm very interested in constructive feedback, as I do not have access to a lot of different card models. This change has been tested on 6970, R9 290, R9 290X - all with equal or a little better speeds than regular intensity setting after a little tuning, but your mileage may vary. Don't fret it, if this doesn't work for you, the regular intensity setting is still available. Conflicts: driver-opencl.c sgminer.c --- driver-opencl.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ driver-opencl.h | 1 + miner.h | 1 + ocl.c | 11 ++++++++++ ocl.h | 1 + sgminer.c | 3 +++ 6 files changed, 68 insertions(+), 6 deletions(-) diff --git a/driver-opencl.c b/driver-opencl.c index 6624b63e..3cc62d54 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -563,6 +563,7 @@ char *set_intensity(char *arg) return "Invalid value passed to set intensity"; tt = &gpus[device].intensity; *tt = val; + gpus[device].xintensity = 0; // Disable shader based intensity } device++; @@ -578,6 +579,7 @@ char *set_intensity(char *arg) tt = &gpus[device].intensity; *tt = val; + gpus[device].xintensity = 0; // Disable shader based intensity } device++; } @@ -585,12 +587,49 @@ char *set_intensity(char *arg) for (i = device; i < MAX_GPUDEVICES; i++) { gpus[i].dynamic = gpus[0].dynamic; gpus[i].intensity = gpus[0].intensity; + gpus[i].xintensity = 0; // Disable shader based intensity } } return NULL; } +char *set_xintensity(char *arg) +{ + int i, device = 0, val = 0; + char *nextptr; + + nextptr = strtok(arg, ","); + if (nextptr == NULL) + return "Invalid parameters for shader based intensity"; + val = atoi(nextptr); + if (val < 1 || val > 9999) + return "Invalid value passed to set shader intensity"; + + gpus[device].dynamic = false; // Disable dynamic intensity + gpus[device].intensity = 0; // Disable regular intensity + gpus[device].xintensity = val; + device++; + + while ((nextptr = strtok(NULL, ",")) != NULL) { + val = atoi(nextptr); + if (val < 1 || val > 9999) + return "Invalid value passed to set shader based intensity"; + gpus[device].dynamic = false; // Disable dynamic intensity + gpus[device].intensity = 0; // Disable regular intensity + gpus[device].xintensity = val; + device++; + } + if (device == 1) + for (i = device; i < MAX_GPUDEVICES; i++) { + gpus[i].dynamic = gpus[0].dynamic; // Disable dynamic intensity + gpus[i].intensity = gpus[0].intensity; // Disable regular intensity + gpus[i].xintensity = gpus[0].xintensity; + } + + return NULL; +} + void print_ndevs(int *ndevs) { opt_log_output = true; @@ -878,13 +917,16 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u return status; } -static void set_threads_hashes(unsigned int vectors,int64_t *hashes, size_t *globalThreads, - unsigned int minthreads, __maybe_unused int *intensity) +static void set_threads_hashes(unsigned int vectors, unsigned int compute_shaders, int64_t *hashes, size_t *globalThreads, + unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity) { unsigned int threads = 0; - while (threads < minthreads) { - threads = 1 << *intensity; + if (*xintensity > 0) { + threads = compute_shaders * *xintensity; + } else { + threads = 1 << *intensity; + } if (threads < minthreads) { if (likely(*intensity < MAX_INTENSITY)) (*intensity)++; @@ -1077,7 +1119,10 @@ static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_inf static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu) { - tailsprintf(buf, bufsiz, " I:%2d", gpu->intensity); + if (gpu->xintensity > 0) + tailsprintf(buf, bufsiz, " xI:%3d", gpu->threads, gpu->xintensity); + else + tailsprintf(buf, bufsiz, " I:%2d", gpu->threads, gpu->intensity); } struct opencl_thread_data { @@ -1243,7 +1288,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, gpu->intervals = 0; } - set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity); + set_threads_hashes(clState->vwidth, clState->compute_shaders, &hashes, globalThreads, localThreads[0], &gpu->intensity, &gpu->xintensity); if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; diff --git a/driver-opencl.h b/driver-opencl.h index 58dcb0ec..f95ac1fd 100644 --- a/driver-opencl.h +++ b/driver-opencl.h @@ -17,6 +17,7 @@ extern char *set_gpu_vddc(char *arg); extern char *set_temp_overheat(char *arg); extern char *set_temp_target(char *arg); extern char *set_intensity(char *arg); +extern char *set_xintensity(char *arg); extern char *set_vector(char *arg); extern char *set_worksize(char *arg); extern char *set_shaders(char *arg); diff --git a/miner.h b/miner.h index 55d55295..decc96f7 100644 --- a/miner.h +++ b/miner.h @@ -464,6 +464,7 @@ struct cgpu_info { int virtual_gpu; int virtual_adl; int intensity; + int xintensity; bool dynamic; cl_uint vwidth; diff --git a/ocl.c b/ocl.c index c7569351..84446a5e 100644 --- a/ocl.c +++ b/ocl.c @@ -369,6 +369,17 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) return NULL; } applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size)); + + size_t compute_units = 0; + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), (void *)&compute_units, NULL); + if (status != CL_SUCCESS) { + applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status); + return NULL; + } + // AMD architechture got 64 compute shaders per compute unit. + // Source: http://www.amd.com/us/Documents/GCN_Architecture_whitepaper.pdf + clState->compute_shaders = compute_units * 64; + applog(LOG_DEBUG, "Max shaders calculated %d", (int)(clState->compute_shaders)); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL); if (status != CL_SUCCESS) { diff --git a/ocl.h b/ocl.h index 9b7a3aaf..29e45418 100644 --- a/ocl.h +++ b/ocl.h @@ -29,6 +29,7 @@ typedef struct { cl_uint vwidth; size_t max_work_size; size_t wsize; + size_t compute_shaders; enum cl_kernels chosen_kernel; } _clState; diff --git a/sgminer.c b/sgminer.c index 0212ca09..834e5e89 100644 --- a/sgminer.c +++ b/sgminer.c @@ -1083,6 +1083,9 @@ static struct opt_table opt_config_table[] = { "Intensity of GPU scanning (d or " MIN_INTENSITY_STR " -> " MAX_INTENSITY_STR ",default: d to maintain desktop interactivity)"), + OPT_WITH_ARG("--xintensity|-X", + set_xintensity, NULL, NULL, + "Shader based intensity of GPU scanning (0 to 9999), overrides --intensity|-I."), OPT_WITH_ARG("--kernel-path|-K", opt_set_charp, opt_show_charp, &opt_kernel_path, "Specify a path to where kernel files are"),