Browse Source

Merge pull request #414 from wolf9466/master

Removal of old, decrepit bitalign patching code; fix for master not compiling after WhirlpoolX merge
windows
troky 10 years ago
parent
commit
5fe8555c4d
  1. 3
      Makefile.am
  2. 38
      ocl.c
  3. 1
      ocl.h
  4. 46
      ocl/build_kernel.c
  5. 3
      ocl/build_kernel.h
  6. 97
      ocl/patch_kernel.c
  7. 10
      ocl/patch_kernel.h

3
Makefile.am

@ -12,7 +12,7 @@ SUBDIRS = lib submodules ccan sph
bin_PROGRAMS = sgminer bin_PROGRAMS = sgminer
sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_CPPFLAGS) sgminer_CPPFLAGS = $(PTHREAD_FLAGS) -std=gnu99 $(JANSSON_CPPFLAGS)
sgminer_LDFLAGS = $(PTHREAD_FLAGS) sgminer_LDFLAGS = $(PTHREAD_FLAGS)
sgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \ sgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
@OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \ @OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \
@ -44,7 +44,6 @@ sgminer_SOURCES += pool.c pool.h
sgminer_SOURCES += algorithm.c algorithm.h sgminer_SOURCES += algorithm.c algorithm.h
sgminer_SOURCES += config_parser.c config_parser.h sgminer_SOURCES += config_parser.c config_parser.h
sgminer_SOURCES += events.c events.h sgminer_SOURCES += events.c events.h
sgminer_SOURCES += ocl/patch_kernel.c ocl/patch_kernel.h
sgminer_SOURCES += ocl/build_kernel.c ocl/build_kernel.h sgminer_SOURCES += ocl/build_kernel.c ocl/build_kernel.h
sgminer_SOURCES += ocl/binary_kernel.c ocl/binary_kernel.h sgminer_SOURCES += ocl/binary_kernel.c ocl/binary_kernel.h

38
ocl.c

@ -168,21 +168,6 @@ static float get_opencl_version(cl_device_id device)
return version; return version;
} }
static bool get_opencl_bit_align_support(cl_device_id *device)
{
char extensions[1024];
const char * camo = "cl_amd_media_ops";
char *find;
cl_int status;
status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
if (status != CL_SUCCESS) {
return false;
}
find = strstr(extensions, camo);
return !!find;
}
static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties) static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_context *context, cl_device_id *device, cl_command_queue_properties cq_properties)
{ {
cl_int status; cl_int status;
@ -262,8 +247,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL; return NULL;
} }
clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status); applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
@ -544,9 +527,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL; build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL;
build_data->work_size = clState->wsize; build_data->work_size = clState->wsize;
build_data->has_bit_align = clState->hasBitAlign;
build_data->opencl_version = get_opencl_version(devices[gpu]); build_data->opencl_version = get_opencl_version(devices[gpu]);
build_data->patch_bfi = needs_bfi_patch(build_data);
strcpy(build_data->binary_filename, filename); strcpy(build_data->binary_filename, filename);
build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix. build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix.
@ -572,23 +553,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL; return NULL;
} }
if (save_opencl_kernel(build_data, clState->program)) { // If it doesn't work, oh well, build it again next run
/* Program needs to be rebuilt, because the binary was patched */ save_opencl_kernel(build_data, clState->program);
if (build_data->patch_bfi) {
clReleaseProgram(clState->program);
clState->program = load_opencl_binary_kernel(build_data);
}
}
else {
if (build_data->patch_bfi)
quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch");
}
} }
// Load kernels // Load kernels
applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d", applog(LOG_NOTICE, "Initialising kernel %s with nfactor %d, n %d",
filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", filename, algorithm->nfactor, algorithm->n);
algorithm->nfactor, algorithm->n);
/* get a kernel object handle for a kernel with the given name */ /* get a kernel object handle for a kernel with the given name */
clState->kernel = clCreateKernel(clState->program, "search", &status); clState->kernel = clCreateKernel(clState->program, "search", &status);
@ -597,7 +568,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL; return NULL;
} }
clState->n_extra_kernels = algorithm->n_extra_kernels; clState->n_extra_kernels = algorithm->n_extra_kernels;
if (clState->n_extra_kernels > 0) { if (clState->n_extra_kernels > 0) {
unsigned int i; unsigned int i;

1
ocl.h

@ -24,7 +24,6 @@ typedef struct __clState {
cl_mem MidstateBuf; cl_mem MidstateBuf;
cl_mem padbuffer8; cl_mem padbuffer8;
unsigned char cldata[80]; unsigned char cldata[80];
bool hasBitAlign;
bool goffset; bool goffset;
cl_uint vwidth; cl_uint vwidth;
size_t max_work_size; size_t max_work_size;

46
ocl/build_kernel.c

@ -1,6 +1,5 @@
#include <stdio.h> #include <stdio.h>
#include "build_kernel.h" #include "build_kernel.h"
#include "patch_kernel.h"
#include "miner.h" #include "miner.h"
static char *file_contents(const char *filename, int *length) static char *file_contents(const char *filename, int *length)
@ -52,6 +51,7 @@ static char *file_contents(const char *filename, int *length)
return (char*)buffer; return (char*)buffer;
} }
// This should NOT be in here! -- Wolf9466
void set_base_compiler_options(build_kernel_data *data) void set_base_compiler_options(build_kernel_data *data)
{ {
char buf[255]; char buf[255];
@ -61,12 +61,6 @@ void set_base_compiler_options(build_kernel_data *data)
sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long)); sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long));
strcat(data->binary_filename, buf); strcat(data->binary_filename, buf);
if (data->has_bit_align) {
strcat(data->compiler_options, " -D BITALIGN");
applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
} else
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");
if (data->kernel_path) { if (data->kernel_path) {
strcat(data->compiler_options, " -I \""); strcat(data->compiler_options, " -I \"");
@ -74,38 +68,10 @@ void set_base_compiler_options(build_kernel_data *data)
strcat(data->compiler_options, "\""); strcat(data->compiler_options, "\"");
} }
if (data->patch_bfi) {
strcat(data->compiler_options, " -D BFI_INT");
applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
} else
applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
if (data->opencl_version < 1.1) if (data->opencl_version < 1.1)
strcat(data->compiler_options, " -D OCL1"); strcat(data->compiler_options, " -D OCL1");
} }
bool needs_bfi_patch(build_kernel_data *data)
{
if (data->has_bit_align &&
(data->opencl_version < 1.2) &&
(strstr(data->platform, "Cedar") ||
strstr(data->platform, "Redwood") ||
strstr(data->platform, "Juniper") ||
strstr(data->platform, "Cypress" ) ||
strstr(data->platform, "Hemlock" ) ||
strstr(data->platform, "Caicos" ) ||
strstr(data->platform, "Turks" ) ||
strstr(data->platform, "Barts" ) ||
strstr(data->platform, "Cayman" ) ||
strstr(data->platform, "Antilles" ) ||
strstr(data->platform, "Wrestler" ) ||
strstr(data->platform, "Zacate" ) ||
strstr(data->platform, "WinterPark" )))
return true;
else
return false;
}
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) cl_program build_opencl_kernel(build_kernel_data *data, const char *filename)
{ {
int pl; int pl;
@ -198,18 +164,10 @@ bool save_opencl_kernel(build_kernel_data *data, cl_program program)
goto out; goto out;
} }
/* Patch the kernel if the hardware supports BFI_INT but it needs to
* be hacked in */
if (data->patch_bfi) {
if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) {
quit(1, "Could not patch BFI_INT, please report this issue.");
}
}
/* Save the binary to be loaded next time */ /* Save the binary to be loaded next time */
binaryfile = fopen(data->binary_filename, "wb"); binaryfile = fopen(data->binary_filename, "wb");
if (!binaryfile) { if (!binaryfile) {
/* Not fatal, just means we build it again next time, unless BFI patch is needed */ /* Not fatal, just means we build it again next time */
applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename); applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename);
goto out; goto out;
} else { } else {

3
ocl/build_kernel.h

@ -23,12 +23,9 @@ typedef struct _build_kernel_data {
char sgminer_path[255]; char sgminer_path[255];
const char *kernel_path; const char *kernel_path;
size_t work_size; size_t work_size;
bool has_bit_align;
bool patch_bfi;
float opencl_version; float opencl_version;
} build_kernel_data; } build_kernel_data;
bool needs_bfi_patch(build_kernel_data *data);
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); cl_program build_opencl_kernel(build_kernel_data *data, const char *filename);
bool save_opencl_kernel(build_kernel_data *data, cl_program program); bool save_opencl_kernel(build_kernel_data *data, cl_program program);
void set_base_compiler_options(build_kernel_data *data); void set_base_compiler_options(build_kernel_data *data);

97
ocl/patch_kernel.c

@ -1,97 +0,0 @@
#include "patch_kernel.h"
#include "logging.h"
#include <string.h>
#include <stdint.h>
static int advance(char **area, unsigned *remaining, const char *marker)
{
char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker));
if (!find) {
applog(LOG_DEBUG, "Marker \"%s\" not found", marker);
return 0;
}
*remaining -= find - *area;
*area = find;
return 1;
}
#define OP3_INST_BFE_UINT 4ULL
#define OP3_INST_BFE_INT 5ULL
#define OP3_INST_BFI_INT 6ULL
#define OP3_INST_BIT_ALIGN_INT 12ULL
#define OP3_INST_BYTE_ALIGN_INT 13ULL
static void patch_opcodes(char *w, unsigned remaining)
{
uint64_t *opcode = (uint64_t *)w;
int patched = 0;
int count_bfe_int = 0;
int count_bfe_uint = 0;
int count_byte_align = 0;
while (42) {
int clamp = (*opcode >> (32 + 31)) & 0x1;
int dest_rel = (*opcode >> (32 + 28)) & 0x1;
int alu_inst = (*opcode >> (32 + 13)) & 0x1f;
int s2_neg = (*opcode >> (32 + 12)) & 0x1;
int s2_rel = (*opcode >> (32 + 9)) & 0x1;
int pred_sel = (*opcode >> 29) & 0x3;
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
if (alu_inst == OP3_INST_BFE_INT) {
count_bfe_int++;
} else if (alu_inst == OP3_INST_BFE_UINT) {
count_bfe_uint++;
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
count_byte_align++;
// patch this instruction to BFI_INT
*opcode &= 0xfffc1fffffffffffULL;
*opcode |= OP3_INST_BFI_INT << (32 + 13);
patched++;
}
}
if (remaining <= 8)
break;
opcode++;
remaining -= 8;
}
applog(LOG_DEBUG, "Potential OP3 instructions identified: "
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
count_bfe_int, count_bfe_uint, count_byte_align);
applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
}
bool kernel_bfi_patch(char *binary, unsigned binary_size)
{
unsigned remaining = binary_size;
char *w = binary;
unsigned int start, length;
/* Find 2nd incidence of .text, and copy the program's
* position and length at a fixed offset from that. Then go
* back and find the 2nd incidence of \x7ELF (rewind by one
* from ELF) and then patch the opcocdes */
if (!advance(&w, &remaining, ".text"))
return false;
w++; remaining--;
if (!advance(&w, &remaining, ".text")) {
/* 32 bit builds only one ELF */
w--; remaining++;
}
memcpy(&start, w + 285, 4);
memcpy(&length, w + 289, 4);
w = binary; remaining = binary_size;
if (!advance(&w, &remaining, "ELF"))
return false;
w++; remaining--;
if (!advance(&w, &remaining, "ELF")) {
/* 32 bit builds only one ELF */
w--; remaining++;
}
w--; remaining++;
w += start; remaining -= start;
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
w, remaining);
patch_opcodes(w, length);
return true;
}

10
ocl/patch_kernel.h

@ -1,10 +0,0 @@
#ifndef PATCH_KERNEL_H
#define PATCH_KERNEL_H
#include <stdbool.h>
bool kernel_bfi_patch(char *binary, unsigned binary_size);
#endif /* PATCH_KERNEL_H */
Loading…
Cancel
Save