From b1289a015936a962f3cfa52f4e04c9cf52efecc1 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 15 Aug 2011 20:26:46 +1000 Subject: [PATCH] Revert "Move the non cl_ variables into the cgpu info struct to allow creating a new cl state on reinit, preserving known GPU variables." This reverts commit 28880d0dc7c601ee4479921502b66e913e38e36d. --- main.c | 15 ++++++--------- miner.h | 5 ----- ocl.c | 51 +++++++++++++++++++-------------------------------- ocl.h | 6 ++++-- 4 files changed, 29 insertions(+), 48 deletions(-) diff --git a/main.c b/main.c index 18374e07..17e1ca93 100644 --- a/main.c +++ b/main.c @@ -2966,7 +2966,6 @@ static void *gpuminer_thread(void *userdata) uint32_t *res, *blank_res; double gpu_ms_average = 7; int gpu = dev_from_id(thr_id); - struct cgpu_info *cgpu = mythr->cgpu; size_t globalThreads[1]; size_t localThreads[1]; @@ -2978,7 +2977,7 @@ static void *gpuminer_thread(void *userdata) struct work *work = make_work(); unsigned int threads; - unsigned const int vectors = cgpu->vwidth; + unsigned const int vectors = clState->preferred_vwidth; unsigned int hashes; unsigned int hashes_done = 0; @@ -3015,7 +3014,7 @@ static void *gpuminer_thread(void *userdata) } gettimeofday(&tv_start, NULL); - localThreads[0] = cgpu->work_size; + localThreads[0] = clState->work_size; set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0], localThreads[0]); @@ -3029,7 +3028,7 @@ static void *gpuminer_thread(void *userdata) if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - cgpu->status = LIFE_WELL; + mythr->cgpu->status = LIFE_WELL; if (opt_debug) applog(LOG_DEBUG, "Popping ping in gpuminer thread"); @@ -3156,7 +3155,7 @@ static void *gpuminer_thread(void *userdata) } if (unlikely(!gpu_devices[gpu])) { applog(LOG_WARNING, "Thread %d being disabled", thr_id); - mythr->rolling = cgpu->rolling = 0; + mythr->rolling = mythr->cgpu->rolling = 0; if (opt_debug) applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread"); @@ -4047,15 +4046,13 @@ int main (int argc, char *argv[]) /* start GPU mining threads */ for (j = 0; j < nDevs * opt_g_threads; j++) { int gpu = j % nDevs; - struct cgpu_info *cgpu; gpus[gpu].is_gpu = 1; gpus[gpu].cpu_gpu = gpu; thr = &thr_info[i]; thr->id = i; - cgpu = &gpus[gpu]; - thr->cgpu = cgpu; + thr->cgpu = &gpus[gpu]; thr->q = tq_new(); if (!thr->q) @@ -4071,7 +4068,7 @@ int main (int argc, char *argv[]) } applog(LOG_INFO, "Init GPU thread %i", i); - clStates[i] = initCl(cgpu, name, sizeof(name)); + clStates[i] = initCl(gpu, name, sizeof(name)); if (!clStates[i]) { applog(LOG_ERR, "Failed to init GPU thread %d", i); gpu_devices[i] = false; diff --git a/miner.h b/miner.h index ac9f6165..4a706511 100644 --- a/miner.h +++ b/miner.h @@ -152,11 +152,6 @@ struct cgpu_info { double efficiency; double utility; enum alive status; - - int hasBitAlign; - unsigned int vwidth; - size_t max_work_size; - size_t work_size; }; struct thr_info { diff --git a/ocl.c b/ocl.c index 21f6bd90..873bfa4c 100644 --- a/ocl.c +++ b/ocl.c @@ -267,16 +267,8 @@ void patch_opcodes(char *w, unsigned remaining) _clState *initCQ(_clState *clState, unsigned int gpu) { cl_int status = 0; - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; - - clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); - if (status != CL_SUCCESS) - { - applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); - return NULL; - } - /* create a cl program executable for the device specified */ + /* create a cl program executable for all the devices specified */ status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); if (status != CL_SUCCESS) { @@ -320,9 +312,8 @@ _clState *initCQ(_clState *clState, unsigned int gpu) return clState; } -_clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) +_clState *initCl(unsigned int gpu, char *name, size_t nameSize) { - unsigned int gpu = cgpu->cpu_gpu; int patchbfi = 0; cl_int status = 0; size_t nDevices; @@ -367,7 +358,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) } find = strstr(extensions, camo); if (find) - cgpu->hasBitAlign = patchbfi = 1; + clState->hasBitAlign = patchbfi = 1; status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); if (status != CL_SUCCESS) { @@ -377,27 +368,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) if (opt_debug) applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth); - status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&cgpu->max_work_size, NULL); + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL); if (status != CL_SUCCESS) { applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); return NULL; } if (opt_debug) - applog(LOG_DEBUG, "Max work group size reported %d", cgpu->max_work_size); + applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); /* For some reason 2 vectors is still better even if the card says * otherwise, and many cards lie about their max so use 256 as max * unless explicitly set on the command line */ - cgpu->vwidth = clState->preferred_vwidth; if (clState->preferred_vwidth > 1) - cgpu->vwidth = 2; + clState->preferred_vwidth = 2; if (opt_vectors) - cgpu->vwidth = opt_vectors; - if (opt_worksize && opt_worksize <= cgpu->max_work_size) - cgpu->work_size = opt_worksize; + clState->preferred_vwidth = opt_vectors; + if (opt_worksize && opt_worksize <= clState->max_work_size) + clState->work_size = opt_worksize; else - cgpu->work_size = (cgpu->max_work_size <= 256 ? cgpu->max_work_size : 256) / - cgpu->vwidth; + clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / + clState->preferred_vwidth; /* Create binary filename based on parameters passed to opencl * compiler to ensure we only load a binary that matches what would @@ -409,7 +399,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) char filename[16]; if (chosen_kernel == KL_NONE) { - if (cgpu->hasBitAlign) + if (clState->hasBitAlign) chosen_kernel = KL_PHATK; else chosen_kernel = KL_POCLBM; @@ -452,14 +442,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) } strcat(binaryfilename, name); - if (cgpu->hasBitAlign) + if (clState->hasBitAlign) strcat(binaryfilename, "bitalign"); strcat(binaryfilename, "v"); - sprintf(numbuf, "%d", cgpu->vwidth); + sprintf(numbuf, "%d", clState->preferred_vwidth); strcat(binaryfilename, numbuf); strcat(binaryfilename, "w"); - sprintf(numbuf, "%d", (int)cgpu->work_size); + sprintf(numbuf, "%d", (int)clState->work_size); strcat(binaryfilename, numbuf); strcat(binaryfilename, "long"); sprintf(numbuf, "%d", (int)sizeof(long)); @@ -515,7 +505,7 @@ build: memcpy(source, rawsource, pl); /* Patch the source file with the preferred_vwidth */ - if (cgpu->vwidth > 1) { + if (clState->preferred_vwidth > 1) { char *find = strstr(source, "VECTORSX"); if (unlikely(!find)) { @@ -523,7 +513,7 @@ build: return NULL; } find += 7; // "VECTORS" - if (cgpu->vwidth == 2) + if (clState->preferred_vwidth == 2) strncpy(find, "2", 1); else strncpy(find, "4", 1); @@ -532,7 +522,7 @@ build: } /* Patch the source file defining BITALIGN */ - if (cgpu->hasBitAlign) { + if (clState->hasBitAlign) { char *find = strstr(source, "BITALIGNX"); if (unlikely(!find)) { @@ -690,11 +680,8 @@ built: free(binaries); free(binary_sizes); - /* We throw everything out now and create the real context we're using in initCQ */ - clReleaseContext(clState->context); - applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", - filename, patchbfi ? "" : "out", cgpu->vwidth, cgpu->work_size); + filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size); return initCQ(clState, gpu); } diff --git a/ocl.h b/ocl.h index 2189fd46..a95f9726 100644 --- a/ocl.h +++ b/ocl.h @@ -7,7 +7,6 @@ #else #include #endif -#include "miner.h" typedef struct { cl_context context; @@ -15,13 +14,16 @@ typedef struct { cl_command_queue commandQueue; cl_program program; cl_mem outputBuffer; + int hasBitAlign; cl_uint preferred_vwidth; + size_t max_work_size; + size_t work_size; } _clState; extern char *file_contents(const char *filename, int *length); extern int clDevicesNum(); extern int preinit_devices(void); extern _clState *initCQ(_clState *clState, unsigned int gpu); -extern _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize); +extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize); #endif /* HAVE_OPENCL */ #endif /* __OCL_H__ */