mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-11 15:27:53 +00:00
Actually get first BFI_INT patch working.
This commit is contained in:
parent
a40003ab62
commit
91e5cef3a5
@ -814,7 +814,6 @@ static void *gpuminer_thread(void *userdata)
|
||||
BUFFERSIZE, res, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS))
|
||||
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
|
||||
|
||||
for (i = 0; i < 128; i++) {
|
||||
int found = false;
|
||||
|
||||
|
69
ocl.c
69
ocl.c
@ -125,29 +125,29 @@ void patch_opcodes(char *w, unsigned remaining)
|
||||
int s2_rel = (*opcode >> (32 + 9)) & 0x1;
|
||||
int pred_sel = (*opcode >> 29) & 0x3;
|
||||
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
|
||||
if (alu_inst == OP3_INST_BFE_INT) {
|
||||
count_bfe_int++;
|
||||
} else if (alu_inst == OP3_INST_BFE_UINT) {
|
||||
count_bfe_uint++;
|
||||
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
|
||||
count_byte_align++;
|
||||
// patch this instruction to BFI_INT
|
||||
*opcode &= 0xfffc1fffffffffffUL;
|
||||
*opcode |= OP3_INST_BFI_INT << (32 + 13);
|
||||
patched++;
|
||||
}
|
||||
if (alu_inst == OP3_INST_BFE_INT) {
|
||||
count_bfe_int++;
|
||||
} else if (alu_inst == OP3_INST_BFE_UINT) {
|
||||
count_bfe_uint++;
|
||||
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
|
||||
count_byte_align++;
|
||||
// patch this instruction to BFI_INT
|
||||
*opcode &= 0xfffc1fffffffffffUL;
|
||||
*opcode |= OP3_INST_BFI_INT << (32 + 13);
|
||||
patched++;
|
||||
}
|
||||
}
|
||||
if (remaining <= 8) {
|
||||
break;
|
||||
break;
|
||||
}
|
||||
opcode++;
|
||||
remaining -= 8;
|
||||
}
|
||||
if (opt_debug) {
|
||||
printf("Potential OP3 instructions identified: "
|
||||
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n",
|
||||
applog(LOG_DEBUG, "Potential OP3 instructions identified: "
|
||||
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
|
||||
count_bfe_int, count_bfe_uint, count_byte_align);
|
||||
printf("Patched a total of %i BFI_INT instructions\n", patched);
|
||||
applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,31 +316,34 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
|
||||
}
|
||||
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
|
||||
|
||||
#if 0
|
||||
for (i = 0; i < nDevices; i++) {
|
||||
if (!binaries[i])
|
||||
continue;
|
||||
|
||||
unsigned remaining = binary_sizes[i];
|
||||
char *w = binaries[i];
|
||||
const int ati_cal_markers = 17;
|
||||
int j;
|
||||
for (j = 0; j < ati_cal_markers; j++) {
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), searching ATI CAL marker %i\n",
|
||||
w, remaining, j);
|
||||
advance(&w, &remaining, "ATI CAL");
|
||||
if (remaining < 1)
|
||||
fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1);
|
||||
w++; remaining--;
|
||||
}
|
||||
if (remaining < 11)
|
||||
fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1);
|
||||
w += 11; remaining -= 11;
|
||||
patch_opcodes(w, remaining);
|
||||
exit (0);
|
||||
unsigned int start, length;
|
||||
|
||||
/* Find 2nd incidence of .text, and copy the program's
|
||||
* position and length at a fixed offset from that. Then go
|
||||
* back and find the 2nd incidence of \x7ELF (rewind by one
|
||||
* from ELF) and then patch the opcocdes */
|
||||
advance(&w, &remaining, ".text");
|
||||
w++; remaining--;
|
||||
advance(&w, &remaining, ".text");
|
||||
memcpy(&start, w + 285, 4);
|
||||
memcpy(&length, w + 289, 4);
|
||||
w = binaries[i]; remaining = binary_sizes[i];
|
||||
advance(&w, &remaining, "ELF");
|
||||
w++; remaining--;
|
||||
advance(&w, &remaining, "ELF");
|
||||
w--; remaining++;
|
||||
w += start; remaining -= start;
|
||||
if (opt_debug)
|
||||
printf("At %p (%u rem. bytes), to begin patching\n",
|
||||
w, remaining);
|
||||
patch_opcodes(w, length);
|
||||
}
|
||||
#endif
|
||||
status = clReleaseProgram(clState->program);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
|
@ -1,5 +1,7 @@
|
||||
typedef uint z;
|
||||
|
||||
#define BITALIGN
|
||||
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b)
|
||||
|
Loading…
Reference in New Issue
Block a user