|
|
|
@ -1,3 +1,4 @@
@@ -1,3 +1,4 @@
|
|
|
|
|
#define _GNU_SOURCE |
|
|
|
|
#include <signal.h> |
|
|
|
|
#include <stdlib.h> |
|
|
|
|
#include <string.h> |
|
|
|
@ -93,6 +94,63 @@ int clDevicesNum() {
@@ -93,6 +94,63 @@ int clDevicesNum() {
|
|
|
|
|
return numDevices; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void advance(char **area, unsigned *remaining, const char *marker) |
|
|
|
|
{ |
|
|
|
|
char *find = memmem(*area, *remaining, marker, strlen(marker)); |
|
|
|
|
if (!find) |
|
|
|
|
fprintf(stderr, "Marker \"%s\" not found\n", marker), exit(1); |
|
|
|
|
*remaining -= find - *area; |
|
|
|
|
*area = find; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#define OP3_INST_BFE_UINT 4UL |
|
|
|
|
#define OP3_INST_BFE_INT 5UL |
|
|
|
|
#define OP3_INST_BFI_INT 6UL |
|
|
|
|
#define OP3_INST_BIT_ALIGN_INT 12UL |
|
|
|
|
#define OP3_INST_BYTE_ALIGN_INT 13UL |
|
|
|
|
|
|
|
|
|
void patch_opcodes(char *w, unsigned remaining) |
|
|
|
|
{ |
|
|
|
|
uint64_t *opcode = (uint64_t *)w; |
|
|
|
|
int patched = 0; |
|
|
|
|
int count_bfe_int = 0; |
|
|
|
|
int count_bfe_uint = 0; |
|
|
|
|
int count_byte_align = 0; |
|
|
|
|
while (42) |
|
|
|
|
{ |
|
|
|
|
int clamp = (*opcode >> (32 + 31)) & 0x1; |
|
|
|
|
int dest_rel = (*opcode >> (32 + 28)) & 0x1; |
|
|
|
|
int alu_inst = (*opcode >> (32 + 13)) & 0x1f; |
|
|
|
|
int s2_neg = (*opcode >> (32 + 12)) & 0x1; |
|
|
|
|
int s2_rel = (*opcode >> (32 + 9)) & 0x1; |
|
|
|
|
int pred_sel = (*opcode >> 29) & 0x3; |
|
|
|
|
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { |
|
|
|
|
if (alu_inst == OP3_INST_BFE_INT) { |
|
|
|
|
count_bfe_int++; |
|
|
|
|
} else if (alu_inst == OP3_INST_BFE_UINT) { |
|
|
|
|
count_bfe_uint++; |
|
|
|
|
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { |
|
|
|
|
count_byte_align++; |
|
|
|
|
// patch this instruction to BFI_INT
|
|
|
|
|
*opcode &= 0xfffc1fffffffffffUL; |
|
|
|
|
*opcode |= OP3_INST_BFI_INT << (32 + 13); |
|
|
|
|
patched++; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (remaining <= 8) { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
opcode++; |
|
|
|
|
remaining -= 8; |
|
|
|
|
} |
|
|
|
|
if (opt_debug) { |
|
|
|
|
printf("Potential OP3 instructions identified: " |
|
|
|
|
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n", |
|
|
|
|
count_bfe_int, count_bfe_uint, count_byte_align); |
|
|
|
|
printf("Patched a total of %i BFI_INT instructions\n", patched); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
_clState *initCl(int gpu, char *name, size_t nameSize) { |
|
|
|
|
cl_int status = 0; |
|
|
|
|
|
|
|
|
@ -165,7 +223,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
@@ -165,7 +223,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
|
|
|
|
|
|
|
|
|
|
printf("List of devices:\n"); |
|
|
|
|
|
|
|
|
|
int i; |
|
|
|
|
unsigned int i; |
|
|
|
|
for(i=0; i<numDevices; i++) { |
|
|
|
|
char pbuff[100]; |
|
|
|
|
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); |
|
|
|
@ -236,6 +294,82 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
@@ -236,6 +294,82 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
|
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
size_t nDevices; |
|
|
|
|
size_t * binary_sizes; |
|
|
|
|
char ** binaries; |
|
|
|
|
unsigned int i; |
|
|
|
|
int err; |
|
|
|
|
|
|
|
|
|
/* figure out number of devices and the sizes of the binary for each device. */ |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL ); |
|
|
|
|
binary_sizes = (size_t *)malloc( sizeof(size_t)*nDevices ); |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL ); |
|
|
|
|
|
|
|
|
|
/* copy over all of the generated binaries. */ |
|
|
|
|
binaries = (char **)malloc( sizeof(char *)*nDevices ); |
|
|
|
|
for( i = 0; i < nDevices; i++ ) { |
|
|
|
|
printf("binary size %d : %d\n", i, binary_sizes[i]); |
|
|
|
|
if( binary_sizes[i] != 0 ) |
|
|
|
|
binaries[i] = (char *)malloc( sizeof(char)*binary_sizes[i] ); |
|
|
|
|
else |
|
|
|
|
binaries[i] = NULL; |
|
|
|
|
} |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); |
|
|
|
|
// all the code should be within the first 83000 bytes or so, but scan
|
|
|
|
|
// a bit more for headroom
|
|
|
|
|
unsigned bytes_to_scan = 93000; |
|
|
|
|
for (i = 0; i < nDevices; i++) { |
|
|
|
|
if (!binaries[i]) |
|
|
|
|
continue; |
|
|
|
|
|
|
|
|
|
unsigned remaining = bytes_to_scan; |
|
|
|
|
char *w = binaries[i]; |
|
|
|
|
int j; |
|
|
|
|
|
|
|
|
|
if (opt_debug) |
|
|
|
|
printf("At %p (%u rem. bytes), searching outer elf marker\n", w, remaining); |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
if (opt_debug) |
|
|
|
|
printf("At %p (%u rem. bytes), searching inner elf marker\n", w, remaining); |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
if (opt_debug) |
|
|
|
|
printf("At %p (%u rem. bytes), searching first .text marker\n", w, remaining); |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
if (opt_debug) |
|
|
|
|
printf("At %p (%u rem. bytes), searching second .text marker\n", w, remaining); |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
// now we are pointing to the first opcode
|
|
|
|
|
patch_opcodes(w, remaining); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
status = clReleaseProgram(clState->program); |
|
|
|
|
if(status != CL_SUCCESS) |
|
|
|
|
{ |
|
|
|
|
printf("Error: Releasing program. (clReleaseProgram)\n"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clState->program = clCreateProgramWithBinary(clState->context, numDevices, &devices[gpu], binary_sizes, binaries, &status, NULL); |
|
|
|
|
if(status != CL_SUCCESS) |
|
|
|
|
{ |
|
|
|
|
printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* create a cl program executable for all the devices specified */ |
|
|
|
|
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); |
|
|
|
|
if(status != CL_SUCCESS) |
|
|
|
|
{ |
|
|
|
|
printf("Error: Building Program (clBuildProgram)\n"); |
|
|
|
|
size_t logSize; |
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); |
|
|
|
|
|
|
|
|
|
char *log = malloc(logSize); |
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL); |
|
|
|
|
printf("%s\n", log); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* get a kernel object handle for a kernel with the given name */ |
|
|
|
|
clState->kernel = clCreateKernel(clState->program, "oclminer", &status); |
|
|
|
|
if(status != CL_SUCCESS) |
|
|
|
|