Browse Source

Revert "Move the non cl_ variables into the cgpu info struct to allow creating a new cl state on reinit, preserving known GPU variables."

This reverts commit 28880d0dc7.
nfactor-troky
Con Kolivas 13 years ago
parent
commit
b1289a0159
  1. 15
      main.c
  2. 5
      miner.h
  3. 51
      ocl.c
  4. 6
      ocl.h

15
main.c

@ -2966,7 +2966,6 @@ static void *gpuminer_thread(void *userdata) @@ -2966,7 +2966,6 @@ static void *gpuminer_thread(void *userdata)
uint32_t *res, *blank_res;
double gpu_ms_average = 7;
int gpu = dev_from_id(thr_id);
struct cgpu_info *cgpu = mythr->cgpu;
size_t globalThreads[1];
size_t localThreads[1];
@ -2978,7 +2977,7 @@ static void *gpuminer_thread(void *userdata) @@ -2978,7 +2977,7 @@ static void *gpuminer_thread(void *userdata)
struct work *work = make_work();
unsigned int threads;
unsigned const int vectors = cgpu->vwidth;
unsigned const int vectors = clState->preferred_vwidth;
unsigned int hashes;
unsigned int hashes_done = 0;
@ -3015,7 +3014,7 @@ static void *gpuminer_thread(void *userdata) @@ -3015,7 +3014,7 @@ static void *gpuminer_thread(void *userdata)
}
gettimeofday(&tv_start, NULL);
localThreads[0] = cgpu->work_size;
localThreads[0] = clState->work_size;
set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0],
localThreads[0]);
@ -3029,7 +3028,7 @@ static void *gpuminer_thread(void *userdata) @@ -3029,7 +3028,7 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
cgpu->status = LIFE_WELL;
mythr->cgpu->status = LIFE_WELL;
if (opt_debug)
applog(LOG_DEBUG, "Popping ping in gpuminer thread");
@ -3156,7 +3155,7 @@ static void *gpuminer_thread(void *userdata) @@ -3156,7 +3155,7 @@ static void *gpuminer_thread(void *userdata)
}
if (unlikely(!gpu_devices[gpu])) {
applog(LOG_WARNING, "Thread %d being disabled", thr_id);
mythr->rolling = cgpu->rolling = 0;
mythr->rolling = mythr->cgpu->rolling = 0;
if (opt_debug)
applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread");
@ -4047,15 +4046,13 @@ int main (int argc, char *argv[]) @@ -4047,15 +4046,13 @@ int main (int argc, char *argv[])
/* start GPU mining threads */
for (j = 0; j < nDevs * opt_g_threads; j++) {
int gpu = j % nDevs;
struct cgpu_info *cgpu;
gpus[gpu].is_gpu = 1;
gpus[gpu].cpu_gpu = gpu;
thr = &thr_info[i];
thr->id = i;
cgpu = &gpus[gpu];
thr->cgpu = cgpu;
thr->cgpu = &gpus[gpu];
thr->q = tq_new();
if (!thr->q)
@ -4071,7 +4068,7 @@ int main (int argc, char *argv[]) @@ -4071,7 +4068,7 @@ int main (int argc, char *argv[])
}
applog(LOG_INFO, "Init GPU thread %i", i);
clStates[i] = initCl(cgpu, name, sizeof(name));
clStates[i] = initCl(gpu, name, sizeof(name));
if (!clStates[i]) {
applog(LOG_ERR, "Failed to init GPU thread %d", i);
gpu_devices[i] = false;

5
miner.h

@ -152,11 +152,6 @@ struct cgpu_info { @@ -152,11 +152,6 @@ struct cgpu_info {
double efficiency;
double utility;
enum alive status;
int hasBitAlign;
unsigned int vwidth;
size_t max_work_size;
size_t work_size;
};
struct thr_info {

51
ocl.c

@ -267,16 +267,8 @@ void patch_opcodes(char *w, unsigned remaining) @@ -267,16 +267,8 @@ void patch_opcodes(char *w, unsigned remaining)
_clState *initCQ(_clState *clState, unsigned int gpu)
{
cl_int status = 0;
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)");
return NULL;
}
/* create a cl program executable for the device specified */
/* create a cl program executable for all the devices specified */
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
if (status != CL_SUCCESS)
{
@ -320,9 +312,8 @@ _clState *initCQ(_clState *clState, unsigned int gpu) @@ -320,9 +312,8 @@ _clState *initCQ(_clState *clState, unsigned int gpu)
return clState;
}
_clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{
unsigned int gpu = cgpu->cpu_gpu;
int patchbfi = 0;
cl_int status = 0;
size_t nDevices;
@ -367,7 +358,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) @@ -367,7 +358,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
}
find = strstr(extensions, camo);
if (find)
cgpu->hasBitAlign = patchbfi = 1;
clState->hasBitAlign = patchbfi = 1;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) {
@ -377,27 +368,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) @@ -377,27 +368,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
if (opt_debug)
applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&cgpu->max_work_size, NULL);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE");
return NULL;
}
if (opt_debug)
applog(LOG_DEBUG, "Max work group size reported %d", cgpu->max_work_size);
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
/* For some reason 2 vectors is still better even if the card says
* otherwise, and many cards lie about their max so use 256 as max
* unless explicitly set on the command line */
cgpu->vwidth = clState->preferred_vwidth;
if (clState->preferred_vwidth > 1)
cgpu->vwidth = 2;
clState->preferred_vwidth = 2;
if (opt_vectors)
cgpu->vwidth = opt_vectors;
if (opt_worksize && opt_worksize <= cgpu->max_work_size)
cgpu->work_size = opt_worksize;
clState->preferred_vwidth = opt_vectors;
if (opt_worksize && opt_worksize <= clState->max_work_size)
clState->work_size = opt_worksize;
else
cgpu->work_size = (cgpu->max_work_size <= 256 ? cgpu->max_work_size : 256) /
cgpu->vwidth;
clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) /
clState->preferred_vwidth;
/* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would
@ -409,7 +399,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) @@ -409,7 +399,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
char filename[16];
if (chosen_kernel == KL_NONE) {
if (cgpu->hasBitAlign)
if (clState->hasBitAlign)
chosen_kernel = KL_PHATK;
else
chosen_kernel = KL_POCLBM;
@ -452,14 +442,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) @@ -452,14 +442,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
}
strcat(binaryfilename, name);
if (cgpu->hasBitAlign)
if (clState->hasBitAlign)
strcat(binaryfilename, "bitalign");
strcat(binaryfilename, "v");
sprintf(numbuf, "%d", cgpu->vwidth);
sprintf(numbuf, "%d", clState->preferred_vwidth);
strcat(binaryfilename, numbuf);
strcat(binaryfilename, "w");
sprintf(numbuf, "%d", (int)cgpu->work_size);
sprintf(numbuf, "%d", (int)clState->work_size);
strcat(binaryfilename, numbuf);
strcat(binaryfilename, "long");
sprintf(numbuf, "%d", (int)sizeof(long));
@ -515,7 +505,7 @@ build: @@ -515,7 +505,7 @@ build:
memcpy(source, rawsource, pl);
/* Patch the source file with the preferred_vwidth */
if (cgpu->vwidth > 1) {
if (clState->preferred_vwidth > 1) {
char *find = strstr(source, "VECTORSX");
if (unlikely(!find)) {
@ -523,7 +513,7 @@ build: @@ -523,7 +513,7 @@ build:
return NULL;
}
find += 7; // "VECTORS"
if (cgpu->vwidth == 2)
if (clState->preferred_vwidth == 2)
strncpy(find, "2", 1);
else
strncpy(find, "4", 1);
@ -532,7 +522,7 @@ build: @@ -532,7 +522,7 @@ build:
}
/* Patch the source file defining BITALIGN */
if (cgpu->hasBitAlign) {
if (clState->hasBitAlign) {
char *find = strstr(source, "BITALIGNX");
if (unlikely(!find)) {
@ -690,11 +680,8 @@ built: @@ -690,11 +680,8 @@ built:
free(binaries);
free(binary_sizes);
/* We throw everything out now and create the real context we're using in initCQ */
clReleaseContext(clState->context);
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
filename, patchbfi ? "" : "out", cgpu->vwidth, cgpu->work_size);
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
return initCQ(clState, gpu);
}

6
ocl.h

@ -7,7 +7,6 @@ @@ -7,7 +7,6 @@
#else
#include <CL/cl.h>
#endif
#include "miner.h"
typedef struct {
cl_context context;
@ -15,13 +14,16 @@ typedef struct { @@ -15,13 +14,16 @@ typedef struct {
cl_command_queue commandQueue;
cl_program program;
cl_mem outputBuffer;
int hasBitAlign;
cl_uint preferred_vwidth;
size_t max_work_size;
size_t work_size;
} _clState;
extern char *file_contents(const char *filename, int *length);
extern int clDevicesNum();
extern int preinit_devices(void);
extern _clState *initCQ(_clState *clState, unsigned int gpu);
extern _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize);
extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
#endif /* HAVE_OPENCL */
#endif /* __OCL_H__ */

Loading…
Cancel
Save