mirror of https://github.com/GOSTSec/sgminer
Jan Berdajs
11 years ago
9 changed files with 518 additions and 394 deletions
@ -0,0 +1,69 @@ |
|||||||
|
#include "binary_kernel.h" |
||||||
|
#include <sys/stat.h> |
||||||
|
|
||||||
|
cl_program load_opencl_binary_kernel(build_kernel_data *data) |
||||||
|
{ |
||||||
|
FILE *binaryfile = NULL; |
||||||
|
size_t binary_size; |
||||||
|
char **binaries = (char **)calloc(MAX_GPUDEVICES * 4, sizeof(char *)); |
||||||
|
cl_int status; |
||||||
|
cl_program program; |
||||||
|
cl_program ret = NULL; |
||||||
|
|
||||||
|
binaryfile = fopen(data->binary_filename, "rb"); |
||||||
|
if (!binaryfile) { |
||||||
|
applog(LOG_DEBUG, "No binary found, generating from source"); |
||||||
|
goto out; |
||||||
|
} else { |
||||||
|
struct stat binary_stat; |
||||||
|
|
||||||
|
if (unlikely(stat(data->binary_filename, &binary_stat))) { |
||||||
|
applog(LOG_DEBUG, "Unable to stat binary, generating from source"); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
if (!binary_stat.st_size) |
||||||
|
goto out; |
||||||
|
|
||||||
|
binary_size = binary_stat.st_size; |
||||||
|
binaries[0] = (char *)calloc(binary_size, 1); |
||||||
|
if (unlikely(!binaries[0])) { |
||||||
|
quit(1, "Unable to calloc binaries"); |
||||||
|
} |
||||||
|
|
||||||
|
if (fread(binaries[0], 1, binary_size, binaryfile) != binary_size) { |
||||||
|
applog(LOG_ERR, "Unable to fread binary"); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
program = clCreateProgramWithBinary(data->context, 1, data->device, &binary_size, (const unsigned char **)binaries, &status, NULL); |
||||||
|
if (status != CL_SUCCESS) { |
||||||
|
applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "Loaded binary image %s", data->binary_filename); |
||||||
|
|
||||||
|
/* create a cl program executable for all the devices specified */ |
||||||
|
status = clBuildProgram(program, 1, data->device, NULL, NULL, NULL); |
||||||
|
if (status != CL_SUCCESS) { |
||||||
|
applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status); |
||||||
|
size_t log_size; |
||||||
|
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); |
||||||
|
|
||||||
|
char *sz_log = (char *)malloc(log_size + 1); |
||||||
|
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, log_size, sz_log, NULL); |
||||||
|
sz_log[log_size] = '\0'; |
||||||
|
applog(LOG_ERR, "%s", sz_log); |
||||||
|
free(sz_log); |
||||||
|
clReleaseProgram(program); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
ret = program; |
||||||
|
} |
||||||
|
out: |
||||||
|
if (binaryfile) fclose(binaryfile); |
||||||
|
if (binaries[0]) free(binaries[0]); |
||||||
|
if (binaries) free(binaries); |
||||||
|
return ret; |
||||||
|
} |
@ -0,0 +1,14 @@ |
|||||||
|
#ifndef BINARY_KERNEL_H |
||||||
|
#define BINARY_KERNEL_H |
||||||
|
|
||||||
|
#ifdef __APPLE_CC__ |
||||||
|
#include <OpenCL/opencl.h> |
||||||
|
#else |
||||||
|
#include <CL/cl.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#include "build_kernel.h" |
||||||
|
|
||||||
|
cl_program load_opencl_binary_kernel(build_kernel_data *data); |
||||||
|
|
||||||
|
#endif /* BINARY_KERNEL_H */ |
@ -0,0 +1,233 @@ |
|||||||
|
#include "build_kernel.h" |
||||||
|
#include "patch_kernel.h" |
||||||
|
|
||||||
|
static char *file_contents(const char *filename, int *length) |
||||||
|
{ |
||||||
|
char *fullpath = (char *)alloca(PATH_MAX); |
||||||
|
void *buffer; |
||||||
|
FILE *f; |
||||||
|
|
||||||
|
/* Try in the optional kernel path first, defaults to PREFIX */ |
||||||
|
strcpy(fullpath, opt_kernel_path); |
||||||
|
strcat(fullpath, filename); |
||||||
|
f = fopen(fullpath, "rb"); |
||||||
|
if (!f) { |
||||||
|
/* Then try from the path sgminer was called */ |
||||||
|
strcpy(fullpath, sgminer_path); |
||||||
|
strcat(fullpath, filename); |
||||||
|
f = fopen(fullpath, "rb"); |
||||||
|
} |
||||||
|
if (!f) { |
||||||
|
/* Then from `pwd`/kernel/ */ |
||||||
|
strcpy(fullpath, sgminer_path); |
||||||
|
strcat(fullpath, "kernel/"); |
||||||
|
strcat(fullpath, filename); |
||||||
|
f = fopen(fullpath, "rb"); |
||||||
|
} |
||||||
|
/* Finally try opening it directly */ |
||||||
|
if (!f) |
||||||
|
f = fopen(filename, "rb"); |
||||||
|
|
||||||
|
if (!f) { |
||||||
|
applog(LOG_ERR, "Unable to open %s or %s for reading", |
||||||
|
filename, fullpath); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
fseek(f, 0, SEEK_END); |
||||||
|
*length = ftell(f); |
||||||
|
fseek(f, 0, SEEK_SET); |
||||||
|
|
||||||
|
buffer = malloc(*length+1); |
||||||
|
*length = fread(buffer, 1, *length, f); |
||||||
|
fclose(f); |
||||||
|
((char*)buffer)[*length] = '\0'; |
||||||
|
|
||||||
|
return (char*)buffer; |
||||||
|
} |
||||||
|
|
||||||
|
void set_base_compiler_options(build_kernel_data *data) |
||||||
|
{ |
||||||
|
sprintf(data->compiler_options, "-I \"%s\" -I \"%skernel\" -I \".\" -D WORKSIZE=%d", |
||||||
|
data->sgminer_path, data->sgminer_path, (int)data->work_size); |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "Setting worksize to %d", (int)(data->work_size)); |
||||||
|
|
||||||
|
if (data->has_bit_align) { |
||||||
|
strcat(data->compiler_options, " -D BITALIGN"); |
||||||
|
applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); |
||||||
|
} else |
||||||
|
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN"); |
||||||
|
|
||||||
|
if (data->kernel_path) { |
||||||
|
strcat(data->compiler_options, " -I \""); |
||||||
|
strcat(data->compiler_options, data->kernel_path); |
||||||
|
strcat(data->compiler_options, "\""); |
||||||
|
} |
||||||
|
|
||||||
|
if (data->patch_bfi) { |
||||||
|
strcat(data->compiler_options, " -D BFI_INT"); |
||||||
|
applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT"); |
||||||
|
} else |
||||||
|
applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch"); |
||||||
|
|
||||||
|
if (data->opencl_version < 1.1) |
||||||
|
strcat(data->compiler_options, " -D OCL1"); |
||||||
|
} |
||||||
|
|
||||||
|
bool needs_bfi_patch(build_kernel_data *data) |
||||||
|
{ |
||||||
|
if (data->has_bit_align && |
||||||
|
(data->opencl_version < 1.2) && |
||||||
|
(strstr(data->platform, "Cedar") || |
||||||
|
strstr(data->platform, "Redwood") || |
||||||
|
strstr(data->platform, "Juniper") || |
||||||
|
strstr(data->platform, "Cypress" ) || |
||||||
|
strstr(data->platform, "Hemlock" ) || |
||||||
|
strstr(data->platform, "Caicos" ) || |
||||||
|
strstr(data->platform, "Turks" ) || |
||||||
|
strstr(data->platform, "Barts" ) || |
||||||
|
strstr(data->platform, "Cayman" ) || |
||||||
|
strstr(data->platform, "Antilles" ) || |
||||||
|
strstr(data->platform, "Wrestler" ) || |
||||||
|
strstr(data->platform, "Zacate" ) || |
||||||
|
strstr(data->platform, "WinterPark" ))) |
||||||
|
return true; |
||||||
|
else |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: move away, specific
|
||||||
|
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor) |
||||||
|
{ |
||||||
|
char buf[255]; |
||||||
|
sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D NFACTOR=%d", |
||||||
|
lookup_gap, thread_concurrency, nfactor); |
||||||
|
|
||||||
|
strcat(data->compiler_options, buf); |
||||||
|
} |
||||||
|
|
||||||
|
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) |
||||||
|
{ |
||||||
|
int pl; |
||||||
|
char *source = file_contents(data->source_filename, &pl); |
||||||
|
size_t sourceSize[] = {(size_t)pl}; |
||||||
|
cl_int status; |
||||||
|
cl_program program = NULL; |
||||||
|
cl_program ret = NULL; |
||||||
|
|
||||||
|
if (!source) |
||||||
|
goto out; |
||||||
|
|
||||||
|
program = clCreateProgramWithSource(data->context, 1, (const char **)&source, sourceSize, &status); |
||||||
|
if (status != CL_SUCCESS) { |
||||||
|
applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "CompilerOptions: %s", data->compiler_options); |
||||||
|
status = clBuildProgram(program, 1, data->device, data->compiler_options, NULL, NULL); |
||||||
|
|
||||||
|
if (status != CL_SUCCESS) { |
||||||
|
size_t log_size; |
||||||
|
applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status); |
||||||
|
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); |
||||||
|
|
||||||
|
char *sz_log = (char *)malloc(log_size + 1); |
||||||
|
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, log_size, sz_log, NULL); |
||||||
|
sz_log[log_size] = '\0'; |
||||||
|
applog(LOG_ERR, "%s", sz_log); |
||||||
|
free(sz_log); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
ret = program; |
||||||
|
out: |
||||||
|
if (source) free(source); |
||||||
|
return ret; |
||||||
|
} |
||||||
|
|
||||||
|
bool save_opencl_kernel(build_kernel_data *data, cl_program program) |
||||||
|
{ |
||||||
|
cl_uint slot, cpnd = 0; |
||||||
|
size_t *binary_sizes = (size_t *)calloc(MAX_GPUDEVICES * 4, sizeof(size_t)); |
||||||
|
char **binaries = NULL; |
||||||
|
cl_int status; |
||||||
|
FILE *binaryfile; |
||||||
|
bool ret = false; |
||||||
|
|
||||||
|
#ifdef __APPLE__ |
||||||
|
/* OSX OpenCL breaks reading off binaries with >1 GPU so always build
|
||||||
|
* from source. */ |
||||||
|
goto out; |
||||||
|
#endif |
||||||
|
|
||||||
|
status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL); |
||||||
|
if (unlikely(status != CL_SUCCESS)) { |
||||||
|
applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL); |
||||||
|
if (unlikely(status != CL_SUCCESS)) { |
||||||
|
applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
binaries = (char **)calloc(MAX_GPUDEVICES * 4, sizeof(char *)); |
||||||
|
for (slot = 0; slot < cpnd; slot++) |
||||||
|
if (binary_sizes[slot]) |
||||||
|
binaries[slot] = (char *)calloc(binary_sizes[slot], 1); |
||||||
|
|
||||||
|
status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL ); |
||||||
|
if (unlikely(status != CL_SUCCESS)) { |
||||||
|
applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
|
||||||
|
* to iterate over all the binary slots and find where the real program |
||||||
|
* is. What the heck is this!? */ |
||||||
|
for (slot = 0; slot < cpnd; slot++) |
||||||
|
if (binary_sizes[slot]) |
||||||
|
break; |
||||||
|
|
||||||
|
/* copy over all of the generated binaries. */ |
||||||
|
applog(LOG_DEBUG, "Binary size found in binary slot %d: %d", slot, (int)(binary_sizes[slot])); |
||||||
|
if (!binary_sizes[slot]) { |
||||||
|
applog(LOG_ERR, "OpenCL compiler generated a zero sized binary!"); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
|
||||||
|
/* Patch the kernel if the hardware supports BFI_INT but it needs to
|
||||||
|
* be hacked in */ |
||||||
|
if (data->patch_bfi) { |
||||||
|
if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) { |
||||||
|
quit(1, "Could not patch BFI_INT, please report this issue."); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Save the binary to be loaded next time */ |
||||||
|
binaryfile = fopen(data->binary_filename, "wb"); |
||||||
|
if (!binaryfile) { |
||||||
|
/* Not fatal, just means we build it again next time, unless BFI patch is needed */ |
||||||
|
applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename); |
||||||
|
goto out; |
||||||
|
} else { |
||||||
|
if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) { |
||||||
|
applog(LOG_ERR, "Unable to fwrite to binaryfile"); |
||||||
|
goto out; |
||||||
|
} |
||||||
|
fclose(binaryfile); |
||||||
|
} |
||||||
|
|
||||||
|
ret = true; |
||||||
|
out: |
||||||
|
for (slot = 0; slot < cpnd; slot++) |
||||||
|
if (binary_sizes[slot]) |
||||||
|
free(binaries[slot]); |
||||||
|
if (binaries) free(binaries); |
||||||
|
free(binary_sizes); |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
@ -0,0 +1,31 @@ |
|||||||
|
#ifndef BUILD_KERNEL_H |
||||||
|
#define BUILD_KERNEL_H |
||||||
|
|
||||||
|
#include "ocl.h" |
||||||
|
#include <stdbool.h> |
||||||
|
|
||||||
|
typedef struct _build_kernel_data { |
||||||
|
char source_filename[255]; |
||||||
|
char binary_filename[255]; |
||||||
|
char compiler_options[512]; |
||||||
|
|
||||||
|
cl_context context; |
||||||
|
cl_device_id *device; |
||||||
|
|
||||||
|
// for compiler options
|
||||||
|
char platform[64]; |
||||||
|
char sgminer_path[255]; |
||||||
|
const char *kernel_path; |
||||||
|
size_t work_size; |
||||||
|
bool has_bit_align; |
||||||
|
bool patch_bfi; |
||||||
|
float opencl_version; |
||||||
|
} build_kernel_data; |
||||||
|
|
||||||
|
bool needs_bfi_patch(build_kernel_data *data); |
||||||
|
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); |
||||||
|
bool save_opencl_kernel(build_kernel_data *data, cl_program program); |
||||||
|
void set_base_compiler_options(build_kernel_data *data); |
||||||
|
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor); |
||||||
|
|
||||||
|
#endif /* BUILD_KERNEL_H */ |
@ -0,0 +1,97 @@ |
|||||||
|
#include "patch_kernel.h" |
||||||
|
#include "logging.h" |
||||||
|
#include <string.h> |
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
static int advance(char **area, unsigned *remaining, const char *marker) |
||||||
|
{ |
||||||
|
char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker)); |
||||||
|
|
||||||
|
if (!find) { |
||||||
|
applog(LOG_DEBUG, "Marker \"%s\" not found", marker); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
*remaining -= find - *area; |
||||||
|
*area = find; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
#define OP3_INST_BFE_UINT 4ULL |
||||||
|
#define OP3_INST_BFE_INT 5ULL |
||||||
|
#define OP3_INST_BFI_INT 6ULL |
||||||
|
#define OP3_INST_BIT_ALIGN_INT 12ULL |
||||||
|
#define OP3_INST_BYTE_ALIGN_INT 13ULL |
||||||
|
|
||||||
|
static void patch_opcodes(char *w, unsigned remaining) |
||||||
|
{ |
||||||
|
uint64_t *opcode = (uint64_t *)w; |
||||||
|
int patched = 0; |
||||||
|
int count_bfe_int = 0; |
||||||
|
int count_bfe_uint = 0; |
||||||
|
int count_byte_align = 0; |
||||||
|
while (42) { |
||||||
|
int clamp = (*opcode >> (32 + 31)) & 0x1; |
||||||
|
int dest_rel = (*opcode >> (32 + 28)) & 0x1; |
||||||
|
int alu_inst = (*opcode >> (32 + 13)) & 0x1f; |
||||||
|
int s2_neg = (*opcode >> (32 + 12)) & 0x1; |
||||||
|
int s2_rel = (*opcode >> (32 + 9)) & 0x1; |
||||||
|
int pred_sel = (*opcode >> 29) & 0x3; |
||||||
|
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { |
||||||
|
if (alu_inst == OP3_INST_BFE_INT) { |
||||||
|
count_bfe_int++; |
||||||
|
} else if (alu_inst == OP3_INST_BFE_UINT) { |
||||||
|
count_bfe_uint++; |
||||||
|
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { |
||||||
|
count_byte_align++; |
||||||
|
// patch this instruction to BFI_INT
|
||||||
|
*opcode &= 0xfffc1fffffffffffULL; |
||||||
|
*opcode |= OP3_INST_BFI_INT << (32 + 13); |
||||||
|
patched++; |
||||||
|
} |
||||||
|
} |
||||||
|
if (remaining <= 8) |
||||||
|
break; |
||||||
|
opcode++; |
||||||
|
remaining -= 8; |
||||||
|
} |
||||||
|
applog(LOG_DEBUG, "Potential OP3 instructions identified: " |
||||||
|
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN", |
||||||
|
count_bfe_int, count_bfe_uint, count_byte_align); |
||||||
|
applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched); |
||||||
|
} |
||||||
|
|
||||||
|
bool kernel_bfi_patch(char *binary, unsigned binary_size) |
||||||
|
{ |
||||||
|
unsigned remaining = binary_size; |
||||||
|
char *w = binary; |
||||||
|
unsigned int start, length; |
||||||
|
|
||||||
|
/* Find 2nd incidence of .text, and copy the program's
|
||||||
|
* position and length at a fixed offset from that. Then go |
||||||
|
* back and find the 2nd incidence of \x7ELF (rewind by one |
||||||
|
* from ELF) and then patch the opcocdes */ |
||||||
|
if (!advance(&w, &remaining, ".text")) |
||||||
|
return false; |
||||||
|
w++; remaining--; |
||||||
|
if (!advance(&w, &remaining, ".text")) { |
||||||
|
/* 32 bit builds only one ELF */ |
||||||
|
w--; remaining++; |
||||||
|
} |
||||||
|
memcpy(&start, w + 285, 4); |
||||||
|
memcpy(&length, w + 289, 4); |
||||||
|
w = binary; remaining = binary_size; |
||||||
|
if (!advance(&w, &remaining, "ELF")) |
||||||
|
return false; |
||||||
|
w++; remaining--; |
||||||
|
if (!advance(&w, &remaining, "ELF")) { |
||||||
|
/* 32 bit builds only one ELF */ |
||||||
|
w--; remaining++; |
||||||
|
} |
||||||
|
w--; remaining++; |
||||||
|
w += start; remaining -= start; |
||||||
|
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", |
||||||
|
w, remaining); |
||||||
|
patch_opcodes(w, length); |
||||||
|
|
||||||
|
return true; |
||||||
|
} |
Loading…
Reference in new issue