Browse Source

Allow different vectors to be set per device.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
deff55c640
  1. 4
      cgminer.c
  2. 40
      device-gpu.c
  3. 2
      device-gpu.h
  4. 5
      miner.h
  5. 32
      ocl.c
  6. 3
      ocl.h

4
cgminer.c

@ -831,8 +831,8 @@ static struct opt_table opt_config_table[] = {
"Username for bitcoin JSON-RPC server"), "Username for bitcoin JSON-RPC server"),
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
OPT_WITH_ARG("--vectors|-v", OPT_WITH_ARG("--vectors|-v",
set_vector, NULL, &opt_vectors, set_vector, NULL, NULL,
"Override detected optimal vector width (1, 2 or 4)"), "Override detected optimal vector (1, 2 or 4) - one value or comma separated list"),
#endif #endif
OPT_WITHOUT_ARG("--verbose", OPT_WITHOUT_ARG("--verbose",
opt_set_bool, &opt_log_output, opt_set_bool, &opt_log_output,

40
device-gpu.c

@ -65,14 +65,32 @@ extern int gpu_fanpercent(int gpu);
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
char *set_vector(const char *arg, int *i) char *set_vector(char *arg)
{ {
char *err = opt_set_intval(arg, i); int i, val = 0, device = 0;
if (err) char *nextptr;
return err;
nextptr = strtok(arg, ",");
if (nextptr == NULL)
return "Invalid parameters for set vector";
val = atoi(nextptr);
if (val != 1 && val != 2 && val != 4)
return "Invalid value passed to set_vector";
gpus[device++].vwidth = val;
while ((nextptr = strtok(NULL, ",")) != NULL) {
val = atoi(nextptr);
if (val != 1 && val != 2 && val != 4)
return "Invalid value passed to set_vector";
gpus[device++].vwidth = val;
}
if (device == 1) {
for (i = device; i < MAX_GPUDEVICES; i++)
gpus[i].vwidth = gpus[0].vwidth;
}
if (*i != 1 && *i != 2 && *i != 4)
return "Valid vectors are 1, 2 or 4";
return NULL; return NULL;
} }
#endif #endif
@ -655,8 +673,8 @@ static _clState *clStates[MAX_GPUDEVICES];
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
{ {
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel; cl_kernel *kernel = &clState->kernel;
cl_uint vwidth = clState->vwidth;
unsigned int i, num = 0; unsigned int i, num = 0;
cl_int status = 0; cl_int status = 0;
uint *nonces; uint *nonces;
@ -707,8 +725,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk, static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
__maybe_unused cl_uint threads) __maybe_unused cl_uint threads)
{ {
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel; cl_kernel *kernel = &clState->kernel;
cl_uint vwidth = clState->vwidth;
unsigned int i, num = 0; unsigned int i, num = 0;
cl_int status = 0; cl_int status = 0;
uint *nonces; uint *nonces;
@ -751,8 +769,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
__maybe_unused cl_uint threads) __maybe_unused cl_uint threads)
{ {
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel; cl_kernel *kernel = &clState->kernel;
cl_uint vwidth = clState->vwidth;
unsigned int i, num = 0; unsigned int i, num = 0;
cl_int status = 0; cl_int status = 0;
uint *nonces; uint *nonces;
@ -809,8 +827,8 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
{ {
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel; cl_kernel *kernel = &clState->kernel;
cl_uint vwidth = clState->vwidth;
unsigned int i, num = 0; unsigned int i, num = 0;
cl_int status = 0; cl_int status = 0;
uint *nonces; uint *nonces;
@ -1242,7 +1260,7 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
++gpu->intensity; ++gpu->intensity;
} }
} }
set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads, set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
localThreads[0], gpu->intensity); localThreads[0], gpu->intensity);
if (hashes > gpu->max_hashes) if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes; gpu->max_hashes = hashes;

2
device-gpu.h

@ -15,7 +15,7 @@ extern char *set_gpu_vddc(char *arg);
extern char *set_temp_overheat(char *arg); extern char *set_temp_overheat(char *arg);
extern char *set_temp_target(char *arg); extern char *set_temp_target(char *arg);
extern char *set_intensity(char *arg); extern char *set_intensity(char *arg);
extern char *set_vector(const char *arg, int *i); extern char *set_vector(char *arg);
void manage_gpu(void); void manage_gpu(void);
extern void pause_dynamic_threads(int gpu); extern void pause_dynamic_threads(int gpu);

5
miner.h

@ -236,9 +236,13 @@ struct cgpu_info {
struct thr_info *thread; struct thr_info *thread;
unsigned int max_hashes; unsigned int max_hashes;
#ifdef HAVE_OPENCL
int virtual_gpu; int virtual_gpu;
int intensity; int intensity;
bool dynamic; bool dynamic;
cl_uint vwidth;
size_t work_size;
float temp; float temp;
int cutofftemp; int cutofftemp;
@ -255,6 +259,7 @@ struct cgpu_info {
int gpu_memdiff; int gpu_memdiff;
int gpu_powertune; int gpu_powertune;
float gpu_vddc; float gpu_vddc;
#endif
#endif #endif
int last_share_pool; int last_share_pool;
time_t last_share_pool_time; time_t last_share_pool_time;

32
ocl.c

@ -33,7 +33,6 @@
#include "findnonce.h" #include "findnonce.h"
#include "ocl.h" #include "ocl.h"
extern int opt_vectors;
extern int opt_worksize; extern int opt_worksize;
int opt_platform_id; int opt_platform_id;
@ -194,6 +193,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
cl_platform_id platform = NULL; cl_platform_id platform = NULL;
char pbuff[256], vbuff[255]; char pbuff[256], vbuff[255];
cl_platform_id* platforms; cl_platform_id* platforms;
cl_uint preferred_vwidth;
cl_device_id *devices; cl_device_id *devices;
cl_uint numPlatforms; cl_uint numPlatforms;
cl_uint numDevices; cl_uint numDevices;
@ -319,12 +319,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (!find) if (!find)
clState->hasOpenCL11plus = true; clState->hasOpenCL11plus = true;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status); applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
return NULL; return NULL;
} }
applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth); applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
@ -337,22 +337,24 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
* otherwise, and many cards lie about their max so use 256 as max * otherwise, and many cards lie about their max so use 256 as max
* unless explicitly set on the command line. 79x0 cards perform * unless explicitly set on the command line. 79x0 cards perform
* better without vectors */ * better without vectors */
if (clState->preferred_vwidth > 1) { if (preferred_vwidth > 1) {
if (strstr(name, "Tahiti")) if (strstr(name, "Tahiti"))
clState->preferred_vwidth = 1; preferred_vwidth = 1;
else else
clState->preferred_vwidth = 2; preferred_vwidth = 2;
} }
if (opt_vectors) if (gpus[gpu].vwidth)
clState->preferred_vwidth = opt_vectors; clState->vwidth = gpus[gpu].vwidth;
else
clState->vwidth = preferred_vwidth;
if (opt_worksize && opt_worksize <= (int)clState->max_work_size) if (opt_worksize && opt_worksize <= (int)clState->max_work_size)
clState->work_size = opt_worksize; clState->work_size = opt_worksize;
else if (strstr(name, "Tahiti")) else if (strstr(name, "Tahiti"))
clState->work_size = 64; clState->work_size = 64;
else else
clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
clState->preferred_vwidth;
/* Create binary filename based on parameters passed to opencl /* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would * compiler to ensure we only load a binary that matches what would
@ -428,7 +430,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
strcat(binaryfilename, name); strcat(binaryfilename, name);
strcat(binaryfilename, "v"); strcat(binaryfilename, "v");
sprintf(numbuf, "%d", clState->preferred_vwidth); sprintf(numbuf, "%d", clState->vwidth);
strcat(binaryfilename, numbuf); strcat(binaryfilename, numbuf);
strcat(binaryfilename, "w"); strcat(binaryfilename, "w");
sprintf(numbuf, "%d", (int)clState->work_size); sprintf(numbuf, "%d", (int)clState->work_size);
@ -496,10 +498,10 @@ build:
char *CompilerOptions = calloc(1, 256); char *CompilerOptions = calloc(1, 256);
sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d", sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d",
(int)clState->work_size, clState->preferred_vwidth); (int)clState->work_size, clState->vwidth);
applog(LOG_DEBUG, "Setting worksize to %d", clState->work_size); applog(LOG_DEBUG, "Setting worksize to %d", clState->work_size);
if (clState->preferred_vwidth > 1) if (clState->vwidth > 1)
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth); applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
if (clState->hasBitAlign) { if (clState->hasBitAlign) {
strcat(CompilerOptions, " -D BITALIGN"); strcat(CompilerOptions, " -D BITALIGN");
@ -648,7 +650,7 @@ built:
free(binary_sizes); free(binary_sizes);
applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %d vectors and worksize %d", applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %d vectors and worksize %d",
filename, clState->hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->work_size); filename, clState->hasBitAlign ? "" : "out", clState->vwidth, clState->work_size);
if (!prog_built) { if (!prog_built) {
/* create a cl program executable for all the devices specified */ /* create a cl program executable for all the devices specified */

3
ocl.h

@ -21,9 +21,10 @@ typedef struct {
cl_mem outputBuffer; cl_mem outputBuffer;
bool hasBitAlign; bool hasBitAlign;
bool hasOpenCL11plus; bool hasOpenCL11plus;
cl_uint preferred_vwidth; cl_uint vwidth;
size_t max_work_size; size_t max_work_size;
size_t work_size; size_t work_size;
size_t wsize;
enum cl_kernels chosen_kernel; enum cl_kernels chosen_kernel;
} _clState; } _clState;

Loading…
Cancel
Save