From 26546ad5b98ec0170f1c02c1f49699f37b4f061d Mon Sep 17 00:00:00 2001 From: ckolivas Date: Fri, 24 Jun 2011 09:17:09 +1000 Subject: [PATCH] Make the optimisations per-gpu card and update code to work properly with multiple cards. --- cpu-miner.c | 12 ++++---- ocl.c | 84 ++++++++++++++++++++++++++--------------------------- ocl.h | 6 ++-- 3 files changed, 50 insertions(+), 52 deletions(-) diff --git a/cpu-miner.c b/cpu-miner.c index 2f702239..998e8811 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -825,12 +825,12 @@ static void *gpuminer_thread(void *userdata) struct work *work = malloc(sizeof(struct work)); bool need_work = true; unsigned int threads = 1 << (15 + scan_intensity); - unsigned int vectors = preferred_vwidth; + unsigned int vectors = clState->preferred_vwidth; unsigned int hashes_done = threads * vectors; gettimeofday(&tv_start, NULL); globalThreads[0] = threads; - localThreads[0] = max_work_size / vectors; + localThreads[0] = clState->max_work_size / vectors; while (1) { struct timeval tv_end, diff; @@ -1165,7 +1165,7 @@ static void parse_cmdline(int argc, char *argv[]) int main (int argc, char *argv[]) { struct thr_info *thr; - int i; + unsigned int i; char name[32]; #ifdef WIN32 @@ -1177,7 +1177,7 @@ int main (int argc, char *argv[]) nDevs = clDevicesNum(); if (opt_ndevs) { - printf("%i\n", nDevs); + applog(LOG_INFO, "%i", nDevs); return nDevs; } @@ -1258,13 +1258,13 @@ int main (int argc, char *argv[]) if (!thr->q) return 1; - printf("Init GPU %i\n", i); + applog(LOG_INFO, "Init GPU %i", i); clStates[i] = initCl(i, name, sizeof(name)); if (!clStates[i]) { applog(LOG_ERR, "Failed to init GPU %d", i); continue; } - printf("initCl() finished. Found %s\n", name); + applog(LOG_INFO, "initCl() finished. Found %s", name); if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) { applog(LOG_ERR, "thread %d create failed", i); diff --git a/ocl.c b/ocl.c index 41730262..8f39b9f5 100644 --- a/ocl.c +++ b/ocl.c @@ -14,16 +14,13 @@ #include "findnonce.h" #include "ocl.h" -cl_uint preferred_vwidth = 1; -size_t max_work_size; - char *file_contents(const char *filename, int *length) { FILE *f = fopen(filename, "r"); void *buffer; if (!f) { - fprintf(stderr, "Unable to open %s for reading\n", filename); + applog(LOG_ERR, "Unable to open %s for reading", filename); return NULL; } @@ -102,7 +99,7 @@ void advance(char **area, unsigned *remaining, const char *marker) char *find = memmem(*area, *remaining, marker, strlen(marker)); if (!find) - fprintf(stderr, "Marker \"%s\" not found\n", marker), exit(1); + applog(LOG_ERR, "Marker \"%s\" not found", marker), exit(1); *remaining -= find - *area; *area = find; } @@ -155,7 +152,7 @@ void patch_opcodes(char *w, unsigned remaining) } } -_clState *initCl(int gpu, char *name, size_t nameSize) +_clState *initCl(unsigned int gpu, char *name, size_t nameSize) { bool hasBitAlign = false; cl_int status = 0; @@ -215,7 +212,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) } cl_device_id *devices; - if(numDevices > 0 ) { + if (numDevices > 0 ) { devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id)); /* Now, get the device list data */ @@ -227,7 +224,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) return NULL; } - printf("List of devices:\n"); + applog(LOG_INFO, "List of devices:"); unsigned int i; for(i=0; i= 0 && gpu < numDevices) { + if (gpu < numDevices) { char pbuff[100]; status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); if(status != CL_SUCCESS) @@ -251,7 +248,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) return NULL; } - printf("Selected %i: %s\n", gpu, pbuff); + applog(LOG_INFO, "Selected %i: %s", gpu, pbuff); strncpy(name, pbuff, nameSize); } else { printf("Invalid GPU %i\n", gpu); @@ -272,36 +269,32 @@ _clState *initCl(int gpu, char *name, size_t nameSize) /* Check for BFI INT support. Hopefully people don't mix devices with * and without it! */ char * extensions = malloc(1024); + const char * camo = "cl_amd_media_ops"; + char *find; - /* This needs to create separate programs for each GPU, but for now - * assume they all have the same capabilities D: */ - for (i = 0; i < numDevices; i++) { - const char * camo = "cl_amd_media_ops"; - char *find; - - status = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL); - if (status != CL_SUCCESS) { - applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS"); - return NULL; - } - find = strstr(extensions, camo); - if (find) - hasBitAlign = true; + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL); + if (status != CL_SUCCESS) { + applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS"); + return NULL; + } + find = strstr(extensions, camo); + if (find) + hasBitAlign = true; - status = clGetDeviceInfo(devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); - if (status != CL_SUCCESS) { - applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); - return NULL; - } - applog(LOG_INFO, "Preferred vector width reported %d", preferred_vwidth); + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); + if (status != CL_SUCCESS) { + applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); + return NULL; + } + applog(LOG_INFO, "Preferred vector width reported %d", clState->preferred_vwidth); - status = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&max_work_size, NULL); - if (status != CL_SUCCESS) { - applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); - return NULL; - } - applog(LOG_INFO, "Max work group size reported %d", max_work_size); + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL); + if (status != CL_SUCCESS) { + applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); + return NULL; } + if (opt_debug) + applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); ///////////////////////////////////////////////////////////////// // Load CL file, build CL program object, create CL kernel object @@ -316,7 +309,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) size_t sourceSize[] = {(size_t)pl}; /* Patch the source file with the preferred_vwidth */ - if (preferred_vwidth > 1) { + if (clState->preferred_vwidth > 1) { char *find = strstr(source, "VECTORSX"); if (unlikely(!find)) { @@ -324,11 +317,12 @@ _clState *initCl(int gpu, char *name, size_t nameSize) return NULL; } find += 7; // "VECTORS" - if (preferred_vwidth == 2) + if (clState->preferred_vwidth == 2) strncpy(find, "2", 1); else strncpy(find, "4", 1); - applog(LOG_INFO, "Patched source to suit %d vectors", preferred_vwidth); + if (opt_debug) + applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth); } /* Patch the source file defining BFI_INT */ @@ -341,9 +335,13 @@ _clState *initCl(int gpu, char *name, size_t nameSize) } find += 7; // "BFI_INT" strncpy(find, " ", 1); - applog(LOG_INFO, "cl_amd_media_ops found, patched source with BFI_INT"); - } else - applog(LOG_INFO, "cl_amd_media_ops not found, will not BFI_INT patch"); + if (opt_debug) + applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BFI_INT"); + } else if (opt_debug) + applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch"); + + applog(LOG_INFO, "Initialising kernel with%s BFI_INT patching, %d vectors and %d worksize", + hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->max_work_size); clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status); if(status != CL_SUCCESS) diff --git a/ocl.h b/ocl.h index 5c2e9dd5..311fee14 100644 --- a/ocl.h +++ b/ocl.h @@ -12,12 +12,12 @@ typedef struct { cl_command_queue commandQueue; cl_program program; cl_mem outputBuffer; + cl_uint preferred_vwidth; + size_t max_work_size; } _clState; extern char *file_contents(const char *filename, int *length); extern int clDevicesNum(); -extern _clState *initCl(int gpu, char *name, size_t nameSize); -extern cl_uint preferred_vwidth; -extern size_t max_work_size; +extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize); #endif /* __OCL_H__ */