mirror of https://github.com/GOSTSec/sgminer
Jan Berdajs
11 years ago
9 changed files with 518 additions and 394 deletions
@ -0,0 +1,69 @@
@@ -0,0 +1,69 @@
|
||||
#include "binary_kernel.h" |
||||
#include <sys/stat.h> |
||||
|
||||
cl_program load_opencl_binary_kernel(build_kernel_data *data) |
||||
{ |
||||
FILE *binaryfile = NULL; |
||||
size_t binary_size; |
||||
char **binaries = (char **)calloc(MAX_GPUDEVICES * 4, sizeof(char *)); |
||||
cl_int status; |
||||
cl_program program; |
||||
cl_program ret = NULL; |
||||
|
||||
binaryfile = fopen(data->binary_filename, "rb"); |
||||
if (!binaryfile) { |
||||
applog(LOG_DEBUG, "No binary found, generating from source"); |
||||
goto out; |
||||
} else { |
||||
struct stat binary_stat; |
||||
|
||||
if (unlikely(stat(data->binary_filename, &binary_stat))) { |
||||
applog(LOG_DEBUG, "Unable to stat binary, generating from source"); |
||||
goto out; |
||||
} |
||||
if (!binary_stat.st_size) |
||||
goto out; |
||||
|
||||
binary_size = binary_stat.st_size; |
||||
binaries[0] = (char *)calloc(binary_size, 1); |
||||
if (unlikely(!binaries[0])) { |
||||
quit(1, "Unable to calloc binaries"); |
||||
} |
||||
|
||||
if (fread(binaries[0], 1, binary_size, binaryfile) != binary_size) { |
||||
applog(LOG_ERR, "Unable to fread binary"); |
||||
goto out; |
||||
} |
||||
|
||||
program = clCreateProgramWithBinary(data->context, 1, data->device, &binary_size, (const unsigned char **)binaries, &status, NULL); |
||||
if (status != CL_SUCCESS) { |
||||
applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status); |
||||
goto out; |
||||
} |
||||
|
||||
applog(LOG_DEBUG, "Loaded binary image %s", data->binary_filename); |
||||
|
||||
/* create a cl program executable for all the devices specified */ |
||||
status = clBuildProgram(program, 1, data->device, NULL, NULL, NULL); |
||||
if (status != CL_SUCCESS) { |
||||
applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status); |
||||
size_t log_size; |
||||
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); |
||||
|
||||
char *sz_log = (char *)malloc(log_size + 1); |
||||
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, log_size, sz_log, NULL); |
||||
sz_log[log_size] = '\0'; |
||||
applog(LOG_ERR, "%s", sz_log); |
||||
free(sz_log); |
||||
clReleaseProgram(program); |
||||
goto out; |
||||
} |
||||
|
||||
ret = program; |
||||
} |
||||
out: |
||||
if (binaryfile) fclose(binaryfile); |
||||
if (binaries[0]) free(binaries[0]); |
||||
if (binaries) free(binaries); |
||||
return ret; |
||||
} |
@ -0,0 +1,14 @@
@@ -0,0 +1,14 @@
|
||||
#ifndef BINARY_KERNEL_H |
||||
#define BINARY_KERNEL_H |
||||
|
||||
#ifdef __APPLE_CC__ |
||||
#include <OpenCL/opencl.h> |
||||
#else |
||||
#include <CL/cl.h> |
||||
#endif |
||||
|
||||
#include "build_kernel.h" |
||||
|
||||
cl_program load_opencl_binary_kernel(build_kernel_data *data); |
||||
|
||||
#endif /* BINARY_KERNEL_H */ |
@ -0,0 +1,233 @@
@@ -0,0 +1,233 @@
|
||||
#include "build_kernel.h" |
||||
#include "patch_kernel.h" |
||||
|
||||
static char *file_contents(const char *filename, int *length) |
||||
{ |
||||
char *fullpath = (char *)alloca(PATH_MAX); |
||||
void *buffer; |
||||
FILE *f; |
||||
|
||||
/* Try in the optional kernel path first, defaults to PREFIX */ |
||||
strcpy(fullpath, opt_kernel_path); |
||||
strcat(fullpath, filename); |
||||
f = fopen(fullpath, "rb"); |
||||
if (!f) { |
||||
/* Then try from the path sgminer was called */ |
||||
strcpy(fullpath, sgminer_path); |
||||
strcat(fullpath, filename); |
||||
f = fopen(fullpath, "rb"); |
||||
} |
||||
if (!f) { |
||||
/* Then from `pwd`/kernel/ */ |
||||
strcpy(fullpath, sgminer_path); |
||||
strcat(fullpath, "kernel/"); |
||||
strcat(fullpath, filename); |
||||
f = fopen(fullpath, "rb"); |
||||
} |
||||
/* Finally try opening it directly */ |
||||
if (!f) |
||||
f = fopen(filename, "rb"); |
||||
|
||||
if (!f) { |
||||
applog(LOG_ERR, "Unable to open %s or %s for reading", |
||||
filename, fullpath); |
||||
return NULL; |
||||
} |
||||
|
||||
fseek(f, 0, SEEK_END); |
||||
*length = ftell(f); |
||||
fseek(f, 0, SEEK_SET); |
||||
|
||||
buffer = malloc(*length+1); |
||||
*length = fread(buffer, 1, *length, f); |
||||
fclose(f); |
||||
((char*)buffer)[*length] = '\0'; |
||||
|
||||
return (char*)buffer; |
||||
} |
||||
|
||||
void set_base_compiler_options(build_kernel_data *data) |
||||
{ |
||||
sprintf(data->compiler_options, "-I \"%s\" -I \"%skernel\" -I \".\" -D WORKSIZE=%d", |
||||
data->sgminer_path, data->sgminer_path, (int)data->work_size); |
||||
|
||||
applog(LOG_DEBUG, "Setting worksize to %d", (int)(data->work_size)); |
||||
|
||||
if (data->has_bit_align) { |
||||
strcat(data->compiler_options, " -D BITALIGN"); |
||||
applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); |
||||
} else |
||||
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN"); |
||||
|
||||
if (data->kernel_path) { |
||||
strcat(data->compiler_options, " -I \""); |
||||
strcat(data->compiler_options, data->kernel_path); |
||||
strcat(data->compiler_options, "\""); |
||||
} |
||||
|
||||
if (data->patch_bfi) { |
||||
strcat(data->compiler_options, " -D BFI_INT"); |
||||
applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT"); |
||||
} else |
||||
applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch"); |
||||
|
||||
if (data->opencl_version < 1.1) |
||||
strcat(data->compiler_options, " -D OCL1"); |
||||
} |
||||
|
||||
bool needs_bfi_patch(build_kernel_data *data) |
||||
{ |
||||
if (data->has_bit_align && |
||||
(data->opencl_version < 1.2) && |
||||
(strstr(data->platform, "Cedar") || |
||||
strstr(data->platform, "Redwood") || |
||||
strstr(data->platform, "Juniper") || |
||||
strstr(data->platform, "Cypress" ) || |
||||
strstr(data->platform, "Hemlock" ) || |
||||
strstr(data->platform, "Caicos" ) || |
||||
strstr(data->platform, "Turks" ) || |
||||
strstr(data->platform, "Barts" ) || |
||||
strstr(data->platform, "Cayman" ) || |
||||
strstr(data->platform, "Antilles" ) || |
||||
strstr(data->platform, "Wrestler" ) || |
||||
strstr(data->platform, "Zacate" ) || |
||||
strstr(data->platform, "WinterPark" ))) |
||||
return true; |
||||
else |
||||
return false; |
||||
} |
||||
|
||||
// TODO: move away, specific
|
||||
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor) |
||||
{ |
||||
char buf[255]; |
||||
sprintf(buf, " -D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D NFACTOR=%d", |
||||
lookup_gap, thread_concurrency, nfactor); |
||||
|
||||
strcat(data->compiler_options, buf); |
||||
} |
||||
|
||||
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) |
||||
{ |
||||
int pl; |
||||
char *source = file_contents(data->source_filename, &pl); |
||||
size_t sourceSize[] = {(size_t)pl}; |
||||
cl_int status; |
||||
cl_program program = NULL; |
||||
cl_program ret = NULL; |
||||
|
||||
if (!source) |
||||
goto out; |
||||
|
||||
program = clCreateProgramWithSource(data->context, 1, (const char **)&source, sourceSize, &status); |
||||
if (status != CL_SUCCESS) { |
||||
applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status); |
||||
goto out; |
||||
} |
||||
|
||||
applog(LOG_DEBUG, "CompilerOptions: %s", data->compiler_options); |
||||
status = clBuildProgram(program, 1, data->device, data->compiler_options, NULL, NULL); |
||||
|
||||
if (status != CL_SUCCESS) { |
||||
size_t log_size; |
||||
applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status); |
||||
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); |
||||
|
||||
char *sz_log = (char *)malloc(log_size + 1); |
||||
status = clGetProgramBuildInfo(program, *data->device, CL_PROGRAM_BUILD_LOG, log_size, sz_log, NULL); |
||||
sz_log[log_size] = '\0'; |
||||
applog(LOG_ERR, "%s", sz_log); |
||||
free(sz_log); |
||||
goto out; |
||||
} |
||||
|
||||
ret = program; |
||||
out: |
||||
if (source) free(source); |
||||
return ret; |
||||
} |
||||
|
||||
bool save_opencl_kernel(build_kernel_data *data, cl_program program) |
||||
{ |
||||
cl_uint slot, cpnd = 0; |
||||
size_t *binary_sizes = (size_t *)calloc(MAX_GPUDEVICES * 4, sizeof(size_t)); |
||||
char **binaries = NULL; |
||||
cl_int status; |
||||
FILE *binaryfile; |
||||
bool ret = false; |
||||
|
||||
#ifdef __APPLE__ |
||||
/* OSX OpenCL breaks reading off binaries with >1 GPU so always build
|
||||
* from source. */ |
||||
goto out; |
||||
#endif |
||||
|
||||
status = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL); |
||||
if (unlikely(status != CL_SUCCESS)) { |
||||
applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status); |
||||
goto out; |
||||
} |
||||
|
||||
status = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL); |
||||
if (unlikely(status != CL_SUCCESS)) { |
||||
applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status); |
||||
goto out; |
||||
} |
||||
|
||||
binaries = (char **)calloc(MAX_GPUDEVICES * 4, sizeof(char *)); |
||||
for (slot = 0; slot < cpnd; slot++) |
||||
if (binary_sizes[slot]) |
||||
binaries[slot] = (char *)calloc(binary_sizes[slot], 1); |
||||
|
||||
status = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL ); |
||||
if (unlikely(status != CL_SUCCESS)) { |
||||
applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status); |
||||
goto out; |
||||
} |
||||
|
||||
/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
|
||||
* to iterate over all the binary slots and find where the real program |
||||
* is. What the heck is this!? */ |
||||
for (slot = 0; slot < cpnd; slot++) |
||||
if (binary_sizes[slot]) |
||||
break; |
||||
|
||||
/* copy over all of the generated binaries. */ |
||||
applog(LOG_DEBUG, "Binary size found in binary slot %d: %d", slot, (int)(binary_sizes[slot])); |
||||
if (!binary_sizes[slot]) { |
||||
applog(LOG_ERR, "OpenCL compiler generated a zero sized binary!"); |
||||
goto out; |
||||
} |
||||
|
||||
/* Patch the kernel if the hardware supports BFI_INT but it needs to
|
||||
* be hacked in */ |
||||
if (data->patch_bfi) { |
||||
if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) { |
||||
quit(1, "Could not patch BFI_INT, please report this issue."); |
||||
} |
||||
} |
||||
|
||||
/* Save the binary to be loaded next time */ |
||||
binaryfile = fopen(data->binary_filename, "wb"); |
||||
if (!binaryfile) { |
||||
/* Not fatal, just means we build it again next time, unless BFI patch is needed */ |
||||
applog(LOG_DEBUG, "Unable to create file %s", data->binary_filename); |
||||
goto out; |
||||
} else { |
||||
if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) { |
||||
applog(LOG_ERR, "Unable to fwrite to binaryfile"); |
||||
goto out; |
||||
} |
||||
fclose(binaryfile); |
||||
} |
||||
|
||||
ret = true; |
||||
out: |
||||
for (slot = 0; slot < cpnd; slot++) |
||||
if (binary_sizes[slot]) |
||||
free(binaries[slot]); |
||||
if (binaries) free(binaries); |
||||
free(binary_sizes); |
||||
|
||||
return ret; |
||||
} |
@ -0,0 +1,31 @@
@@ -0,0 +1,31 @@
|
||||
#ifndef BUILD_KERNEL_H |
||||
#define BUILD_KERNEL_H |
||||
|
||||
#include "ocl.h" |
||||
#include <stdbool.h> |
||||
|
||||
typedef struct _build_kernel_data { |
||||
char source_filename[255]; |
||||
char binary_filename[255]; |
||||
char compiler_options[512]; |
||||
|
||||
cl_context context; |
||||
cl_device_id *device; |
||||
|
||||
// for compiler options
|
||||
char platform[64]; |
||||
char sgminer_path[255]; |
||||
const char *kernel_path; |
||||
size_t work_size; |
||||
bool has_bit_align; |
||||
bool patch_bfi; |
||||
float opencl_version; |
||||
} build_kernel_data; |
||||
|
||||
bool needs_bfi_patch(build_kernel_data *data); |
||||
cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); |
||||
bool save_opencl_kernel(build_kernel_data *data, cl_program program); |
||||
void set_base_compiler_options(build_kernel_data *data); |
||||
void append_scrypt_compiler_options(build_kernel_data *data, int lookup_gap, unsigned int thread_concurrency, unsigned int nfactor); |
||||
|
||||
#endif /* BUILD_KERNEL_H */ |
@ -0,0 +1,97 @@
@@ -0,0 +1,97 @@
|
||||
#include "patch_kernel.h" |
||||
#include "logging.h" |
||||
#include <string.h> |
||||
#include <stdint.h> |
||||
|
||||
static int advance(char **area, unsigned *remaining, const char *marker) |
||||
{ |
||||
char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker)); |
||||
|
||||
if (!find) { |
||||
applog(LOG_DEBUG, "Marker \"%s\" not found", marker); |
||||
return 0; |
||||
} |
||||
*remaining -= find - *area; |
||||
*area = find; |
||||
return 1; |
||||
} |
||||
|
||||
#define OP3_INST_BFE_UINT 4ULL |
||||
#define OP3_INST_BFE_INT 5ULL |
||||
#define OP3_INST_BFI_INT 6ULL |
||||
#define OP3_INST_BIT_ALIGN_INT 12ULL |
||||
#define OP3_INST_BYTE_ALIGN_INT 13ULL |
||||
|
||||
static void patch_opcodes(char *w, unsigned remaining) |
||||
{ |
||||
uint64_t *opcode = (uint64_t *)w; |
||||
int patched = 0; |
||||
int count_bfe_int = 0; |
||||
int count_bfe_uint = 0; |
||||
int count_byte_align = 0; |
||||
while (42) { |
||||
int clamp = (*opcode >> (32 + 31)) & 0x1; |
||||
int dest_rel = (*opcode >> (32 + 28)) & 0x1; |
||||
int alu_inst = (*opcode >> (32 + 13)) & 0x1f; |
||||
int s2_neg = (*opcode >> (32 + 12)) & 0x1; |
||||
int s2_rel = (*opcode >> (32 + 9)) & 0x1; |
||||
int pred_sel = (*opcode >> 29) & 0x3; |
||||
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { |
||||
if (alu_inst == OP3_INST_BFE_INT) { |
||||
count_bfe_int++; |
||||
} else if (alu_inst == OP3_INST_BFE_UINT) { |
||||
count_bfe_uint++; |
||||
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { |
||||
count_byte_align++; |
||||
// patch this instruction to BFI_INT
|
||||
*opcode &= 0xfffc1fffffffffffULL; |
||||
*opcode |= OP3_INST_BFI_INT << (32 + 13); |
||||
patched++; |
||||
} |
||||
} |
||||
if (remaining <= 8) |
||||
break; |
||||
opcode++; |
||||
remaining -= 8; |
||||
} |
||||
applog(LOG_DEBUG, "Potential OP3 instructions identified: " |
||||
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN", |
||||
count_bfe_int, count_bfe_uint, count_byte_align); |
||||
applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched); |
||||
} |
||||
|
||||
bool kernel_bfi_patch(char *binary, unsigned binary_size) |
||||
{ |
||||
unsigned remaining = binary_size; |
||||
char *w = binary; |
||||
unsigned int start, length; |
||||
|
||||
/* Find 2nd incidence of .text, and copy the program's
|
||||
* position and length at a fixed offset from that. Then go |
||||
* back and find the 2nd incidence of \x7ELF (rewind by one |
||||
* from ELF) and then patch the opcocdes */ |
||||
if (!advance(&w, &remaining, ".text")) |
||||
return false; |
||||
w++; remaining--; |
||||
if (!advance(&w, &remaining, ".text")) { |
||||
/* 32 bit builds only one ELF */ |
||||
w--; remaining++; |
||||
} |
||||
memcpy(&start, w + 285, 4); |
||||
memcpy(&length, w + 289, 4); |
||||
w = binary; remaining = binary_size; |
||||
if (!advance(&w, &remaining, "ELF")) |
||||
return false; |
||||
w++; remaining--; |
||||
if (!advance(&w, &remaining, "ELF")) { |
||||
/* 32 bit builds only one ELF */ |
||||
w--; remaining++; |
||||
} |
||||
w--; remaining++; |
||||
w += start; remaining -= start; |
||||
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", |
||||
w, remaining); |
||||
patch_opcodes(w, length); |
||||
|
||||
return true; |
||||
} |
Loading…
Reference in new issue