Browse Source

Merge branch 'ckolivas-master' into merge-ckolivas-master

nfactor-troky
Znort 987 13 years ago
parent
commit
eed634a3ab
  1. 150
      main.c
  2. 6
      miner.h
  3. 279
      ocl.c
  4. 8
      ocl.h

150
main.c

@ -217,6 +217,8 @@ int longpoll_thr_id;
static int stage_thr_id; static int stage_thr_id;
static int watchdog_thr_id; static int watchdog_thr_id;
static int input_thr_id; static int input_thr_id;
static int gpur_thr_id;
static int cpur_thr_id;
static int total_threads; static int total_threads;
struct work_restart *work_restart = NULL; struct work_restart *work_restart = NULL;
@ -3464,7 +3466,6 @@ static void *gpuminer_thread(void *userdata)
uint32_t *res, *blank_res; uint32_t *res, *blank_res;
double gpu_ms_average = 7; double gpu_ms_average = 7;
int gpu = dev_from_id(thr_id); int gpu = dev_from_id(thr_id);
struct cgpu_info *cgpu = mythr->cgpu;
size_t globalThreads[1]; size_t globalThreads[1];
size_t localThreads[1]; size_t localThreads[1];
@ -3476,7 +3477,7 @@ static void *gpuminer_thread(void *userdata)
struct work *work = make_work(); struct work *work = make_work();
unsigned int threads; unsigned int threads;
unsigned const int vectors = cgpu->vwidth; unsigned const int vectors = clState->preferred_vwidth;
unsigned int hashes; unsigned int hashes;
unsigned int hashes_done = 0; unsigned int hashes_done = 0;
@ -3513,7 +3514,7 @@ static void *gpuminer_thread(void *userdata)
} }
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
localThreads[0] = cgpu->work_size; localThreads[0] = clState->work_size;
set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0], set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0],
localThreads[0]); localThreads[0]);
@ -3527,7 +3528,7 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
cgpu->status = LIFE_WELL; mythr->cgpu->status = LIFE_WELL;
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "Popping ping in gpuminer thread"); applog(LOG_DEBUG, "Popping ping in gpuminer thread");
@ -3654,7 +3655,7 @@ static void *gpuminer_thread(void *userdata)
} }
if (unlikely(!gpu_devices[gpu])) { if (unlikely(!gpu_devices[gpu])) {
applog(LOG_WARNING, "Thread %d being disabled", thr_id); applog(LOG_WARNING, "Thread %d being disabled", thr_id);
mythr->rolling = cgpu->rolling = 0; mythr->rolling = mythr->cgpu->rolling = 0;
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread"); applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread");
@ -3839,6 +3840,7 @@ static void restart_longpoll(void)
static void *reinit_cpu(void *userdata) static void *reinit_cpu(void *userdata)
{ {
pthread_detach(pthread_self());
#if 0 #if 0
struct cgpu_info *cgpu = (struct cgpu_info *)userdata; struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
int cpu = cgpu->cpu_gpu; int cpu = cgpu->cpu_gpu;
@ -3870,16 +3872,34 @@ static void *reinit_cpu(void *userdata)
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
/* We have only one thread that ever re-initialises GPUs, thus if any GPU
* init command fails due to a completely wedged GPU, the thread will never
* return, unable to harm other GPUs. If it does return, it means we only had
* a soft failure and then the reinit_gpu thread is ready to tackle another
* GPU */
static void *reinit_gpu(void *userdata) static void *reinit_gpu(void *userdata)
{ {
struct cgpu_info *cgpu = (struct cgpu_info *)userdata; struct thr_info *mythr = userdata;
int gpu = cgpu->cpu_gpu; struct cgpu_info *cgpu;
struct thr_info *thr; struct thr_info *thr;
struct timeval now; struct timeval now;
_clState *clState; char name[256];
int thr_id; int thr_id;
int gpu;
pthread_detach(pthread_self());
/* Send threads message to stop */ select_cgpu:
cgpu = tq_pop(mythr->q, NULL);
if (!cgpu)
goto out;
if (clDevicesNum() != nDevs) {
applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
goto out;
}
gpu = cgpu->cpu_gpu;
gpu_devices[gpu] = false; gpu_devices[gpu] = false;
for (thr_id = 0; thr_id < gpu_threads; thr_id ++) { for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
@ -3889,9 +3909,16 @@ static void *reinit_gpu(void *userdata)
thr = &thr_info[thr_id]; thr = &thr_info[thr_id];
thr->rolling = thr->cgpu->rolling = 0; thr->rolling = thr->cgpu->rolling = 0;
if (!pthread_cancel(*thr->pth)) { if (!pthread_cancel(*thr->pth)) {
applog(LOG_WARNING, "Thread still exists, killing it off"); applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
} else } else
applog(LOG_WARNING, "Thread no longer exists!"); applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
}
for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
if (dev_from_id(thr_id) != gpu)
continue;
thr = &thr_info[thr_id];
/* Lose this ram cause we may get stuck here! */ /* Lose this ram cause we may get stuck here! */
//tq_freeze(thr->q); //tq_freeze(thr->q);
@ -3900,13 +3927,16 @@ static void *reinit_gpu(void *userdata)
if (!thr->q) if (!thr->q)
quit(1, "Failed to tq_new in reinit_gpu"); quit(1, "Failed to tq_new in reinit_gpu");
/* Create a new clstate */
applog(LOG_WARNING, "Attempting to create a new clState");
clState = initCQ(clStates[thr_id], gpu);
/* Lose this ram cause we may dereference in the dying thread! */ /* Lose this ram cause we may dereference in the dying thread! */
//free(clState); //free(clState);
applog(LOG_WARNING, "Command successful, attempting to create new thread");
applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
clStates[thr_id] = initCl(gpu, name, sizeof(name));
if (!clStates[thr_id]) {
applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
goto out;
}
applog(LOG_INFO, "initCl() finished. Found %s", name);
if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) { if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) {
applog(LOG_ERR, "thread %d create failed", thr_id); applog(LOG_ERR, "thread %d create failed", thr_id);
@ -3918,70 +3948,31 @@ static void *reinit_gpu(void *userdata)
gettimeofday(&now, NULL); gettimeofday(&now, NULL);
get_datestamp(cgpu->init, &now); get_datestamp(cgpu->init, &now);
/* Try to re-enable it */
gpu_devices[gpu] = true; gpu_devices[gpu] = true;
for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
thr = &thr_info[thr_id];
if (dev_from_id(thr_id) == gpu)
tq_push(thr->q, &ping);
}
return NULL;
}
static void *ping_gputhread(void *userdata)
{
struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
int gpu = cgpu->cpu_gpu;
struct thr_info *thr;
_clState *clState;
int thr_id;
for (thr_id = 0; thr_id < gpu_threads; thr_id ++) { for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
if (dev_from_id(thr_id) != gpu) if (dev_from_id(thr_id) != gpu)
continue; continue;
thr = &thr_info[thr_id]; thr = &thr_info[thr_id];
clState = clStates[thr_id];
tq_push(thr->q, &ping);
applog(LOG_WARNING, "Attempting to flush command queue of thread %d", thr_id);
clFlush(clState->commandQueue);
clFinish(clState->commandQueue);
tq_push(thr->q, &ping); tq_push(thr->q, &ping);
} }
goto select_cgpu;
out:
return NULL; return NULL;
} }
static void ping_gpu(struct cgpu_info *cgpu)
{
pthread_t ping_thread;
if (unlikely(pthread_create(&ping_thread, NULL, ping_gputhread, (void *)cgpu)))
applog(LOG_ERR, "Failed to create ping thread");
}
#else #else
static void *reinit_gpu(void *userdata) static void *reinit_gpu(void *userdata)
{ {
} }
static void ping_gpu(struct cgpu_info *cgpu)
{
}
#endif #endif
static void reinit_device(struct cgpu_info *cgpu) static void reinit_device(struct cgpu_info *cgpu)
{ {
pthread_t resus_thread;
void *reinit;
if (cgpu->is_gpu) if (cgpu->is_gpu)
reinit = reinit_gpu; tq_push(thr_info[gpur_thr_id].q, cgpu);
else else
reinit = reinit_cpu; tq_push(thr_info[cpur_thr_id].q, cgpu);
if (unlikely(pthread_create(&resus_thread, NULL, reinit, (void *)cgpu)))
applog(LOG_ERR, "Failed to create reinit thread");
} }
/* Determine which are the first threads belonging to a device and if they're /* Determine which are the first threads belonging to a device and if they're
@ -4080,7 +4071,7 @@ static void *watchdog_thread(void *userdata)
gpus[gpu].status = LIFE_SICK; gpus[gpu].status = LIFE_SICK;
applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu); applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu);
/* Sent it a ping, it might respond */ /* Sent it a ping, it might respond */
ping_gpu(thr->cgpu); tq_push(thr->q, &ping);
} else if (now.tv_sec - thr->last.tv_sec > 300 && gpus[i].status == LIFE_SICK) { } else if (now.tv_sec - thr->last.tv_sec > 300 && gpus[i].status == LIFE_SICK) {
gpus[gpu].status = LIFE_DEAD; gpus[gpu].status = LIFE_DEAD;
applog(LOG_ERR, "Thread %d idle for more than 5 minutes, GPU %d declared DEAD!", i, gpu); applog(LOG_ERR, "Thread %d idle for more than 5 minutes, GPU %d declared DEAD!", i, gpu);
@ -4340,7 +4331,7 @@ out:
int main (int argc, char *argv[]) int main (int argc, char *argv[])
{ {
unsigned int i, j = 0, x, y, pools_active = 0; unsigned int i, x, y, pools_active = 0;
struct sigaction handler; struct sigaction handler;
struct thr_info *thr; struct thr_info *thr;
char name[256]; char name[256];
@ -4568,7 +4559,7 @@ int main (int argc, char *argv[])
mining_threads = opt_n_threads + gpu_threads; mining_threads = opt_n_threads + gpu_threads;
total_threads = mining_threads + 5; total_threads = mining_threads + 7;
work_restart = calloc(total_threads, sizeof(*work_restart)); work_restart = calloc(total_threads, sizeof(*work_restart));
if (!work_restart) if (!work_restart)
quit(1, "Failed to calloc work_restart"); quit(1, "Failed to calloc work_restart");
@ -4649,14 +4640,9 @@ int main (int argc, char *argv[])
quit(0, "No pools active! Exiting."); quit(0, "No pools active! Exiting.");
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
i = 0;
if (nDevs > 0)
preinit_devices();
/* start GPU mining threads */ /* start GPU mining threads */
for (j = 0; j < nDevs * opt_g_threads; j++) { for (i = 0; i < nDevs * opt_g_threads; i++) {
int gpu = j % nDevs; int gpu = i % nDevs;
struct cgpu_info *cgpu; struct cgpu_info *cgpu;
struct timeval now; struct timeval now;
@ -4665,8 +4651,7 @@ int main (int argc, char *argv[])
thr = &thr_info[i]; thr = &thr_info[i];
thr->id = i; thr->id = i;
cgpu = &gpus[gpu]; cgpu = thr->cgpu = &gpus[gpu];
thr->cgpu = cgpu;
thr->q = tq_new(); thr->q = tq_new();
if (!thr->q) if (!thr->q)
@ -4682,11 +4667,10 @@ int main (int argc, char *argv[])
} }
applog(LOG_INFO, "Init GPU thread %i", i); applog(LOG_INFO, "Init GPU thread %i", i);
clStates[i] = initCl(cgpu, name, sizeof(name)); clStates[i] = initCl(gpu, name, sizeof(name));
if (!clStates[i]) { if (!clStates[i]) {
applog(LOG_ERR, "Failed to init GPU thread %d", i); applog(LOG_ERR, "Failed to init GPU thread %d", i);
gpu_devices[i] = false; gpu_devices[i] = false;
strcat(cgpu->init, "Never");
continue; continue;
} }
applog(LOG_INFO, "initCl() finished. Found %s", name); applog(LOG_INFO, "initCl() finished. Found %s", name);
@ -4695,8 +4679,6 @@ int main (int argc, char *argv[])
if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
quit(1, "thread %d create failed", i); quit(1, "thread %d create failed", i);
i++;
} }
applog(LOG_INFO, "%d gpu miner threads started", gpu_threads); applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
@ -4742,6 +4724,24 @@ int main (int argc, char *argv[])
quit(1, "input thread create failed"); quit(1, "input thread create failed");
pthread_detach(*thr->pth); pthread_detach(*thr->pth);
/* Create reinit cpu thread */
cpur_thr_id = mining_threads + 5;
thr = &thr_info[cpur_thr_id];
thr->q = tq_new();
if (!thr->q)
quit(1, "tq_new failed for cpur_thr_id");
if (thr_info_create(thr, NULL, reinit_cpu, thr))
quit(1, "reinit_cpu thread create failed");
/* Create reinit gpu thread */
gpur_thr_id = mining_threads + 6;
thr = &thr_info[gpur_thr_id];
thr->q = tq_new();
if (!thr->q)
quit(1, "tq_new failed for gpur_thr_id");
if (thr_info_create(thr, NULL, reinit_gpu, thr))
quit(1, "reinit_gpu thread create failed");
/* main loop - simply wait for workio thread to exit */ /* main loop - simply wait for workio thread to exit */
pthread_join(*thr_info[work_thr_id].pth, NULL); pthread_join(*thr_info[work_thr_id].pth, NULL);
applog(LOG_INFO, "workio thread dead, exiting."); applog(LOG_INFO, "workio thread dead, exiting.");

6
miner.h

@ -152,12 +152,6 @@ struct cgpu_info {
double efficiency; double efficiency;
double utility; double utility;
enum alive status; enum alive status;
int hasBitAlign;
unsigned int vwidth;
size_t max_work_size;
size_t work_size;
char init[40]; char init[40];
}; };

279
ocl.c

@ -52,8 +52,6 @@ char *file_contents(const char *filename, int *length)
return (char*)buffer; return (char*)buffer;
} }
static cl_uint numDevices;
int clDevicesNum() { int clDevicesNum() {
cl_int status = 0; cl_int status = 0;
@ -113,95 +111,6 @@ int clDevicesNum() {
return numDevices; return numDevices;
} }
static cl_platform_id platform = NULL;
static cl_device_id *devices;
int preinit_devices(void)
{
cl_int status;
cl_uint numPlatforms;
int i;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
return -1;
}
if (numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
return -1;
}
for(i = 0; i < numPlatforms; ++i)
{
char pbuff[100];
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
free(platforms);
return -1;
}
platform = platforms[i];
if (!strcmp(pbuff, "Advanced Micro Devices, Inc."))
{
break;
}
}
free(platforms);
}
if (platform == NULL) {
perror("NULL platform found!\n");
return -1;
}
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Device IDs (num)");
return -1;
}
if (numDevices > 0 ) {
devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
/* Now, get the device list data */
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Device IDs (list)");
return -1;
}
applog(LOG_INFO, "List of devices:");
unsigned int i;
for(i=0; i<numDevices; i++) {
char pbuff[100];
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Device Info");
return -1;
}
applog(LOG_INFO, "\t%i\t%s", i, pbuff);
}
} else return -1;
return 0;
}
static int advance(char **area, unsigned *remaining, const char *marker) static int advance(char **area, unsigned *remaining, const char *marker)
{ {
char *find = memmem(*area, *remaining, marker, strlen(marker)); char *find = memmem(*area, *remaining, marker, strlen(marker));
@ -264,80 +173,94 @@ void patch_opcodes(char *w, unsigned remaining)
} }
} }
_clState *initCQ(_clState *clState, unsigned int gpu) _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{ {
int patchbfi = 0;
cl_int status = 0; cl_int status = 0;
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; unsigned int i;
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); _clState *clState = calloc(1, sizeof(_clState));
cl_uint numPlatforms;
cl_platform_id platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) if (status != CL_SUCCESS)
{ {
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
return NULL; return NULL;
} }
/* create a cl program executable for the device specified */ if (numPlatforms > 0)
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); {
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if (status != CL_SUCCESS) if (status != CL_SUCCESS)
{ {
applog(LOG_ERR, "Error: Building Program (clBuildProgram)"); applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
size_t logSize;
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char *log = malloc(logSize);
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
applog(LOG_INFO, "%s", log);
return NULL; return NULL;
} }
/* get a kernel object handle for a kernel with the given name */ for(i = 0; i < numPlatforms; ++i)
clState->kernel = clCreateKernel(clState->program, "search", &status); {
char pbuff[100];
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
if (status != CL_SUCCESS) if (status != CL_SUCCESS)
{ {
applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)"); applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
free(platforms);
return NULL; return NULL;
} }
platform = platforms[i];
///////////////////////////////////////////////////////////////// if (!strcmp(pbuff, "Advanced Micro Devices, Inc."))
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
if (status != CL_SUCCESS)
{ {
applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)"); break;
}
}
free(platforms);
}
if (platform == NULL) {
perror("NULL platform found!\n");
return NULL; return NULL;
} }
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status); size_t nDevices;
if (status != CL_SUCCESS) { cl_uint numDevices;
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)"); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Device IDs (num)");
return NULL; return NULL;
} }
return clState; cl_device_id *devices;
} if (numDevices > 0 ) {
devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
_clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) /* Now, get the device list data */
{
unsigned int gpu = cgpu->cpu_gpu;
int patchbfi = 0;
cl_int status = 0;
size_t nDevices;
_clState *clState = calloc(1, sizeof(_clState)); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Getting Device IDs (list)");
return NULL;
}
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; applog(LOG_INFO, "List of devices:");
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); unsigned int i;
for(i=0; i<numDevices; i++) {
char pbuff[100];
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if (status != CL_SUCCESS) if (status != CL_SUCCESS)
{ {
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); applog(LOG_ERR, "Error: Getting Device Info");
return NULL; return NULL;
} }
applog(LOG_INFO, "\t%i\t%s", i, pbuff);
}
if (gpu < numDevices) { if (gpu < numDevices) {
char pbuff[100]; char pbuff[100];
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices);
@ -354,6 +277,17 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
return NULL; return NULL;
} }
} else return NULL;
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)");
return NULL;
}
/* Check for BFI INT support. Hopefully people don't mix devices with /* Check for BFI INT support. Hopefully people don't mix devices with
* and without it! */ * and without it! */
char * extensions = malloc(1024); char * extensions = malloc(1024);
@ -367,7 +301,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
} }
find = strstr(extensions, camo); find = strstr(extensions, camo);
if (find) if (find)
cgpu->hasBitAlign = patchbfi = 1; clState->hasBitAlign = patchbfi = 1;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
@ -377,27 +311,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth); applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&cgpu->max_work_size, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE");
return NULL; return NULL;
} }
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "Max work group size reported %d", cgpu->max_work_size); applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
/* For some reason 2 vectors is still better even if the card says /* For some reason 2 vectors is still better even if the card says
* otherwise, and many cards lie about their max so use 256 as max * otherwise, and many cards lie about their max so use 256 as max
* unless explicitly set on the command line */ * unless explicitly set on the command line */
cgpu->vwidth = clState->preferred_vwidth;
if (clState->preferred_vwidth > 1) if (clState->preferred_vwidth > 1)
cgpu->vwidth = 2; clState->preferred_vwidth = 2;
if (opt_vectors) if (opt_vectors)
cgpu->vwidth = opt_vectors; clState->preferred_vwidth = opt_vectors;
if (opt_worksize && opt_worksize <= cgpu->max_work_size) if (opt_worksize && opt_worksize <= clState->max_work_size)
cgpu->work_size = opt_worksize; clState->work_size = opt_worksize;
else else
cgpu->work_size = (cgpu->max_work_size <= 256 ? cgpu->max_work_size : 256) / clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) /
cgpu->vwidth; clState->preferred_vwidth;
/* Create binary filename based on parameters passed to opencl /* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would * compiler to ensure we only load a binary that matches what would
@ -409,7 +342,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
char filename[16]; char filename[16];
if (chosen_kernel == KL_NONE) { if (chosen_kernel == KL_NONE) {
if (cgpu->hasBitAlign) if (clState->hasBitAlign)
chosen_kernel = KL_PHATK; chosen_kernel = KL_PHATK;
else else
chosen_kernel = KL_POCLBM; chosen_kernel = KL_POCLBM;
@ -452,14 +385,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
} }
strcat(binaryfilename, name); strcat(binaryfilename, name);
if (cgpu->hasBitAlign) if (clState->hasBitAlign)
strcat(binaryfilename, "bitalign"); strcat(binaryfilename, "bitalign");
strcat(binaryfilename, "v"); strcat(binaryfilename, "v");
sprintf(numbuf, "%d", cgpu->vwidth); sprintf(numbuf, "%d", clState->preferred_vwidth);
strcat(binaryfilename, numbuf); strcat(binaryfilename, numbuf);
strcat(binaryfilename, "w"); strcat(binaryfilename, "w");
sprintf(numbuf, "%d", (int)cgpu->work_size); sprintf(numbuf, "%d", (int)clState->work_size);
strcat(binaryfilename, numbuf); strcat(binaryfilename, numbuf);
strcat(binaryfilename, "long"); strcat(binaryfilename, "long");
sprintf(numbuf, "%d", (int)sizeof(long)); sprintf(numbuf, "%d", (int)sizeof(long));
@ -515,7 +448,7 @@ build:
memcpy(source, rawsource, pl); memcpy(source, rawsource, pl);
/* Patch the source file with the preferred_vwidth */ /* Patch the source file with the preferred_vwidth */
if (cgpu->vwidth > 1) { if (clState->preferred_vwidth > 1) {
char *find = strstr(source, "VECTORSX"); char *find = strstr(source, "VECTORSX");
if (unlikely(!find)) { if (unlikely(!find)) {
@ -523,7 +456,7 @@ build:
return NULL; return NULL;
} }
find += 7; // "VECTORS" find += 7; // "VECTORS"
if (cgpu->vwidth == 2) if (clState->preferred_vwidth == 2)
strncpy(find, "2", 1); strncpy(find, "2", 1);
else else
strncpy(find, "4", 1); strncpy(find, "4", 1);
@ -532,7 +465,7 @@ build:
} }
/* Patch the source file defining BITALIGN */ /* Patch the source file defining BITALIGN */
if (cgpu->hasBitAlign) { if (clState->hasBitAlign) {
char *find = strstr(source, "BITALIGNX"); char *find = strstr(source, "BITALIGNX");
if (unlikely(!find)) { if (unlikely(!find)) {
@ -690,13 +623,51 @@ built:
free(binaries); free(binaries);
free(binary_sizes); free(binary_sizes);
/* We throw everything out now and create the real context we're using in initCQ */
clReleaseContext(clState->context);
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
filename, patchbfi ? "" : "out", cgpu->vwidth, cgpu->work_size); filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
/* create a cl program executable for all the devices specified */
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
size_t logSize;
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char *log = malloc(logSize);
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
applog(LOG_INFO, "%s", log);
return NULL;
}
return initCQ(clState, gpu); /* get a kernel object handle for a kernel with the given name */
clState->kernel = clCreateKernel(clState->program, "search", &status);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)");
return NULL;
}
/////////////////////////////////////////////////////////////////
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)");
return NULL;
}
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
return NULL;
}
return clState;
} }
#endif /* HAVE_OPENCL */ #endif /* HAVE_OPENCL */

8
ocl.h

@ -7,7 +7,6 @@
#else #else
#include <CL/cl.h> #include <CL/cl.h>
#endif #endif
#include "miner.h"
typedef struct { typedef struct {
cl_context context; cl_context context;
@ -15,13 +14,14 @@ typedef struct {
cl_command_queue commandQueue; cl_command_queue commandQueue;
cl_program program; cl_program program;
cl_mem outputBuffer; cl_mem outputBuffer;
int hasBitAlign;
cl_uint preferred_vwidth; cl_uint preferred_vwidth;
size_t max_work_size;
size_t work_size;
} _clState; } _clState;
extern char *file_contents(const char *filename, int *length); extern char *file_contents(const char *filename, int *length);
extern int clDevicesNum(); extern int clDevicesNum();
extern int preinit_devices(void); extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
extern _clState *initCQ(_clState *clState, unsigned int gpu);
extern _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize);
#endif /* HAVE_OPENCL */ #endif /* HAVE_OPENCL */
#endif /* __OCL_H__ */ #endif /* __OCL_H__ */

Loading…
Cancel
Save