From 55da7b5bb14436b079e5580edba185bcb2fdb2cf Mon Sep 17 00:00:00 2001 From: Wolf Date: Tue, 7 Apr 2015 04:38:44 -0500 Subject: [PATCH 1/2] Removed bitalign/opcode patching code. It's dead, and no one else is gonna do it. --- Makefile.am | 1 - ocl.c | 38 ++---------------- ocl.h | 1 - ocl/build_kernel.c | 46 +--------------------- ocl/build_kernel.h | 3 -- ocl/patch_kernel.c | 97 ---------------------------------------------- ocl/patch_kernel.h | 10 ----- 7 files changed, 6 insertions(+), 190 deletions(-) delete mode 100644 ocl/patch_kernel.c delete mode 100644 ocl/patch_kernel.h diff --git a/Makefile.am b/Makefile.am index 39da01b1..73073459 100644 --- a/Makefile.am +++ b/Makefile.am @@ -44,7 +44,6 @@ sgminer_SOURCES += pool.c pool.h sgminer_SOURCES += algorithm.c algorithm.h sgminer_SOURCES += config_parser.c config_parser.h sgminer_SOURCES += events.c events.h -sgminer_SOURCES += ocl/patch_kernel.c ocl/patch_kernel.h sgminer_SOURCES += ocl/build_kernel.c ocl/build_kernel.h sgminer_SOURCES += ocl/binary_kernel.c ocl/binary_kernel.h diff --git a/ocl.c b/ocl.c index c7244938..65e34e75 100644 --- a/ocl.c +++ b/ocl.c @@ -168,21 +168,6 @@ static float get_opencl_version(cl_device_id device) return version; } -static bool get_opencl_bit_align_support(cl_device_id *device) -{ - char extensions[1024]; - const char * camo = "cl_amd_media_ops"; - char *find; - cl_int status; - - status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL); - if (status != CL_SUCCESS) { - return false; - } - find = strstr(extensions, camo); - return !!find; -} - static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties) { cl_int status; @@ -262,8 +247,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } - clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]); - status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); if (status != CL_SUCCESS) { applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status); @@ -544,9 +527,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL; build_data->work_size = clState->wsize; - build_data->has_bit_align = clState->hasBitAlign; build_data->opencl_version = get_opencl_version(devices[gpu]); - build_data->patch_bfi = needs_bfi_patch(build_data); strcpy(build_data->binary_filename, filename); build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix. @@ -572,23 +553,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } - if (save_opencl_kernel(build_data, clState->program)) { - /* Program needs to be rebuilt, because the binary was patched */ - if (build_data->patch_bfi) { - clReleaseProgram(clState->program); - clState->program = load_opencl_binary_kernel(build_data); - } - } - else { - if (build_data->patch_bfi) - quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch"); - } + // If it doesn't work, oh well, build it again next run + save_opencl_kernel(build_data, clState->program); } // Load kernels - applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d", - filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", - algorithm->nfactor, algorithm->n); + applog(LOG_NOTICE, "Initialising kernel %s with nfactor %d, n %d", + filename, algorithm->nfactor, algorithm->n); /* get a kernel object handle for a kernel with the given name */ clState->kernel = clCreateKernel(clState->program, "search", &status); @@ -597,7 +568,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } - clState->n_extra_kernels = algorithm->n_extra_kernels; if (clState->n_extra_kernels > 0) { unsigned int i; diff --git a/ocl.h b/ocl.h index 502119e0..0950d2c0 100644 --- a/ocl.h +++ b/ocl.h @@ -24,7 +24,6 @@ typedef struct __clState { cl_mem MidstateBuf; cl_mem padbuffer8; unsigned char cldata[80]; - bool hasBitAlign; bool goffset; cl_uint vwidth; size_t max_work_size; diff --git a/ocl/build_kernel.c b/ocl/build_kernel.c index 29a99e18..2e1b7383 100644 --- a/ocl/build_kernel.c +++ b/ocl/build_kernel.c @@ -1,6 +1,5 @@ #include #include "build_kernel.h" -#include "patch_kernel.h" #include "miner.h" static char *file_contents(const char *filename, int *length) @@ -52,6 +51,7 @@ static char *file_contents(const char *filename, int *length) return (char*)buffer; } +// This should NOT be in here! -- Wolf9466 void set_base_compiler_options(build_kernel_data *data) { char buf[255]; @@ -61,12 +61,6 @@ void set_base_compiler_options(build_kernel_data *data) sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long)); strcat(data->binary_filename, buf); - - if (data->has_bit_align) { - strcat(data->compiler_options, " -D BITALIGN"); - applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); - } else - applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN"); if (data->kernel_path) { strcat(data->compiler_options, " -I \""); @@ -74,38 +68,10 @@ void set_base_compiler_options(build_kernel_data *data) strcat(data->compiler_options, "\""); } - if (data->patch_bfi) { - strcat(data->compiler_options, " -D BFI_INT"); - applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT"); - } else - applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch"); - if (data->opencl_version < 1.1) strcat(data->compiler_options, " -D OCL1"); } -bool needs_bfi_patch(build_kernel_data *data) -{ - if (data->has_bit_align && - (data->opencl_version < 1.2) && - (strstr(data->platform, "Cedar") || - strstr(data->platform, "Redwood") || - strstr(data->platform, "Juniper") || - strstr(data->platform, "Cypress" ) || - strstr(data->platform, "Hemlock" ) || - strstr(data->platform, "Caicos" ) || - strstr(data->platform, "Turks" ) || - strstr(data->platform, "Barts" ) || - strstr(data->platform, "Cayman" ) || - strstr(data->platform, "Antilles" ) || - strstr(data->platform, "Wrestler" ) || - strstr(data->platform, "Zacate" ) || - strstr(data->platform, "WinterPark" ))) - return true; - else - return false; -} - cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) { int pl; @@ -198,18 +164,10 @@ bool save_opencl_kernel(build_kernel_data *data, cl_program program) goto out; } - /* Patch the kernel if the hardware supports BFI_INT but it needs to - * be hacked in */ - if (data->patch_bfi) { - if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) { - quit(1, "Could not patch BFI_INT, please report this issue."); - } - } - /* Save the binary to be loaded next time */ binaryfile = fopen(data->binary_filename, "wb"); if (!binaryfile) { - /* Not fatal, just means we build it again next time, unless BFI patch is needed */ + /* Not fatal, just means we build it again next time */ applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename); goto out; } else { diff --git a/ocl/build_kernel.h b/ocl/build_kernel.h index 92de074a..89fb8db8 100644 --- a/ocl/build_kernel.h +++ b/ocl/build_kernel.h @@ -23,12 +23,9 @@ typedef struct _build_kernel_data { char sgminer_path[255]; const char *kernel_path; size_t work_size; - bool has_bit_align; - bool patch_bfi; float opencl_version; } build_kernel_data; -bool needs_bfi_patch(build_kernel_data *data); cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); bool save_opencl_kernel(build_kernel_data *data, cl_program program); void set_base_compiler_options(build_kernel_data *data); diff --git a/ocl/patch_kernel.c b/ocl/patch_kernel.c deleted file mode 100644 index 7c72cebc..00000000 --- a/ocl/patch_kernel.c +++ /dev/null @@ -1,97 +0,0 @@ -#include "patch_kernel.h" -#include "logging.h" -#include -#include - -static int advance(char **area, unsigned *remaining, const char *marker) -{ - char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker)); - - if (!find) { - applog(LOG_DEBUG, "Marker \"%s\" not found", marker); - return 0; - } - *remaining -= find - *area; - *area = find; - return 1; -} - -#define OP3_INST_BFE_UINT 4ULL -#define OP3_INST_BFE_INT 5ULL -#define OP3_INST_BFI_INT 6ULL -#define OP3_INST_BIT_ALIGN_INT 12ULL -#define OP3_INST_BYTE_ALIGN_INT 13ULL - -static void patch_opcodes(char *w, unsigned remaining) -{ - uint64_t *opcode = (uint64_t *)w; - int patched = 0; - int count_bfe_int = 0; - int count_bfe_uint = 0; - int count_byte_align = 0; - while (42) { - int clamp = (*opcode >> (32 + 31)) & 0x1; - int dest_rel = (*opcode >> (32 + 28)) & 0x1; - int alu_inst = (*opcode >> (32 + 13)) & 0x1f; - int s2_neg = (*opcode >> (32 + 12)) & 0x1; - int s2_rel = (*opcode >> (32 + 9)) & 0x1; - int pred_sel = (*opcode >> 29) & 0x3; - if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { - if (alu_inst == OP3_INST_BFE_INT) { - count_bfe_int++; - } else if (alu_inst == OP3_INST_BFE_UINT) { - count_bfe_uint++; - } else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { - count_byte_align++; - // patch this instruction to BFI_INT - *opcode &= 0xfffc1fffffffffffULL; - *opcode |= OP3_INST_BFI_INT << (32 + 13); - patched++; - } - } - if (remaining <= 8) - break; - opcode++; - remaining -= 8; - } - applog(LOG_DEBUG, "Potential OP3 instructions identified: " - "%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN", - count_bfe_int, count_bfe_uint, count_byte_align); - applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched); -} - -bool kernel_bfi_patch(char *binary, unsigned binary_size) -{ - unsigned remaining = binary_size; - char *w = binary; - unsigned int start, length; - - /* Find 2nd incidence of .text, and copy the program's - * position and length at a fixed offset from that. Then go - * back and find the 2nd incidence of \x7ELF (rewind by one - * from ELF) and then patch the opcocdes */ - if (!advance(&w, &remaining, ".text")) - return false; - w++; remaining--; - if (!advance(&w, &remaining, ".text")) { - /* 32 bit builds only one ELF */ - w--; remaining++; - } - memcpy(&start, w + 285, 4); - memcpy(&length, w + 289, 4); - w = binary; remaining = binary_size; - if (!advance(&w, &remaining, "ELF")) - return false; - w++; remaining--; - if (!advance(&w, &remaining, "ELF")) { - /* 32 bit builds only one ELF */ - w--; remaining++; - } - w--; remaining++; - w += start; remaining -= start; - applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", - w, remaining); - patch_opcodes(w, length); - - return true; -} diff --git a/ocl/patch_kernel.h b/ocl/patch_kernel.h deleted file mode 100644 index d13b1869..00000000 --- a/ocl/patch_kernel.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef PATCH_KERNEL_H -#define PATCH_KERNEL_H - -#include - -bool kernel_bfi_patch(char *binary, unsigned binary_size); - -#endif /* PATCH_KERNEL_H */ - - From e59c616a7eb25b3d4a7b569c666d18a13508a069 Mon Sep 17 00:00:00 2001 From: Wolf Date: Tue, 7 Apr 2015 04:42:43 -0500 Subject: [PATCH 2/2] When my WhirlpoolX code was checked in, I used C99 features that cause SGMiner to not compile without -std=c99, -std=gnu99, or better. The latter must be used because of other code in SGMiner. Also removed the old, useless -fno-strict-aliasing flag that seems to date back to cpuminer... --- Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index 73073459..d48ff419 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,7 +12,7 @@ SUBDIRS = lib submodules ccan sph bin_PROGRAMS = sgminer -sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_CPPFLAGS) +sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -std=gnu99 $(JANSSON_CPPFLAGS) sgminer_LDFLAGS = $(PTHREAD_FLAGS) sgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \ @OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \