mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-11 07:17:58 +00:00
First BFI_INT patch changes.
This commit is contained in:
parent
910e6943b2
commit
c548dea848
136
ocl.c
136
ocl.c
@ -1,3 +1,4 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -93,6 +94,63 @@ int clDevicesNum() {
|
||||
return numDevices;
|
||||
}
|
||||
|
||||
void advance(char **area, unsigned *remaining, const char *marker)
|
||||
{
|
||||
char *find = memmem(*area, *remaining, marker, strlen(marker));
|
||||
if (!find)
|
||||
fprintf(stderr, "Marker \"%s\" not found\n", marker), exit(1);
|
||||
*remaining -= find - *area;
|
||||
*area = find;
|
||||
}
|
||||
|
||||
#define OP3_INST_BFE_UINT 4UL
|
||||
#define OP3_INST_BFE_INT 5UL
|
||||
#define OP3_INST_BFI_INT 6UL
|
||||
#define OP3_INST_BIT_ALIGN_INT 12UL
|
||||
#define OP3_INST_BYTE_ALIGN_INT 13UL
|
||||
|
||||
void patch_opcodes(char *w, unsigned remaining)
|
||||
{
|
||||
uint64_t *opcode = (uint64_t *)w;
|
||||
int patched = 0;
|
||||
int count_bfe_int = 0;
|
||||
int count_bfe_uint = 0;
|
||||
int count_byte_align = 0;
|
||||
while (42)
|
||||
{
|
||||
int clamp = (*opcode >> (32 + 31)) & 0x1;
|
||||
int dest_rel = (*opcode >> (32 + 28)) & 0x1;
|
||||
int alu_inst = (*opcode >> (32 + 13)) & 0x1f;
|
||||
int s2_neg = (*opcode >> (32 + 12)) & 0x1;
|
||||
int s2_rel = (*opcode >> (32 + 9)) & 0x1;
|
||||
int pred_sel = (*opcode >> 29) & 0x3;
|
||||
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
|
||||
if (alu_inst == OP3_INST_BFE_INT) {
|
||||
count_bfe_int++;
|
||||
} else if (alu_inst == OP3_INST_BFE_UINT) {
|
||||
count_bfe_uint++;
|
||||
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
|
||||
count_byte_align++;
|
||||
// patch this instruction to BFI_INT
|
||||
*opcode &= 0xfffc1fffffffffffUL;
|
||||
*opcode |= OP3_INST_BFI_INT << (32 + 13);
|
||||
patched++;
|
||||
}
|
||||
}
|
||||
if (remaining <= 8) {
|
||||
break;
|
||||
}
|
||||
opcode++;
|
||||
remaining -= 8;
|
||||
}
|
||||
if (opt_debug) {
|
||||
printf("Potential OP3 instructions identified: "
|
||||
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n",
|
||||
count_bfe_int, count_bfe_uint, count_byte_align);
|
||||
printf("Patched a total of %i BFI_INT instructions\n", patched);
|
||||
}
|
||||
}
|
||||
|
||||
_clState *initCl(int gpu, char *name, size_t nameSize) {
|
||||
cl_int status = 0;
|
||||
|
||||
@ -165,7 +223,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
|
||||
|
||||
printf("List of devices:\n");
|
||||
|
||||
int i;
|
||||
unsigned int i;
|
||||
for(i=0; i<numDevices; i++) {
|
||||
char pbuff[100];
|
||||
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
|
||||
@ -236,6 +294,82 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t nDevices;
|
||||
size_t * binary_sizes;
|
||||
char ** binaries;
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
/* figure out number of devices and the sizes of the binary for each device. */
|
||||
err = clGetProgramInfo( clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL );
|
||||
binary_sizes = (size_t *)malloc( sizeof(size_t)*nDevices );
|
||||
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL );
|
||||
|
||||
/* copy over all of the generated binaries. */
|
||||
binaries = (char **)malloc( sizeof(char *)*nDevices );
|
||||
for( i = 0; i < nDevices; i++ ) {
|
||||
printf("binary size %d : %d\n", i, binary_sizes[i]);
|
||||
if( binary_sizes[i] != 0 )
|
||||
binaries[i] = (char *)malloc( sizeof(char)*binary_sizes[i] );
|
||||
else
|
||||
binaries[i] = NULL;
|
||||
}
|
||||
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
|
||||
// all the code should be within the first 83000 bytes or so, but scan
|
||||
// a bit more for headroom
|
||||
unsigned bytes_to_scan = 93000;
|
||||
for (i = 0; i < nDevices; i++) {
|
||||
if (!binaries[i])
|
||||
continue;
|
||||
|
||||
unsigned remaining = bytes_to_scan;
|
||||
char *w = binaries[i];
|
||||
int j;
|
||||
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), searching outer elf marker\n", w, remaining);
|
||||
advance(&w, &remaining, "ELF");
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), searching inner elf marker\n", w, remaining);
|
||||
advance(&w, &remaining, "ELF");
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), searching first .text marker\n", w, remaining);
|
||||
advance(&w, &remaining, ".text");
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), searching second .text marker\n", w, remaining);
|
||||
advance(&w, &remaining, ".text");
|
||||
// now we are pointing to the first opcode
|
||||
patch_opcodes(w, remaining);
|
||||
}
|
||||
|
||||
status = clReleaseProgram(clState->program);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Releasing program. (clReleaseProgram)\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
clState->program = clCreateProgramWithBinary(clState->context, numDevices, &devices[gpu], binary_sizes, binaries, &status, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create a cl program executable for all the devices specified */
|
||||
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Building Program (clBuildProgram)\n");
|
||||
size_t logSize;
|
||||
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
||||
|
||||
char *log = malloc(logSize);
|
||||
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
|
||||
printf("%s\n", log);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* get a kernel object handle for a kernel with the given name */
|
||||
clState->kernel = clCreateKernel(clState->program, "oclminer", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
|
14
oclminer.cl
14
oclminer.cl
@ -1,4 +1,16 @@
|
||||
#define rotr(x, n) rotate(x, (uint)(32 - n))
|
||||
typedef uint z;
|
||||
#define BITALIGN
|
||||
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b)
|
||||
#define Ch(a, b, c) amd_bytealign(a, b, c)
|
||||
#define Ma(a, b, c) amd_bytealign((b), (a | c), (c & a))
|
||||
#else
|
||||
#define rotr(a, b) rotate((z)a, (z)(32 - b))
|
||||
#define Ch(a, b, c) (c ^ (a & (b ^ c)))
|
||||
#define Ma(a, b, c) ((b & c) | (a & (b | c)))
|
||||
#endif
|
||||
|
||||
#define WGS __attribute__((reqd_work_group_size(128, 1, 1)))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user