Browse Source

Actually get first BFI_INT patch working.

nfactor-troky
Con Kolivas 14 years ago
parent
commit
91e5cef3a5
  1. 1
      cpu-miner.c
  2. 69
      ocl.c
  3. 2
      oclminer.cl

1
cpu-miner.c

@ -814,7 +814,6 @@ static void *gpuminer_thread(void *userdata)
BUFFERSIZE, res, 0, NULL, NULL); BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;} { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
int found = false; int found = false;

69
ocl.c

@ -125,29 +125,29 @@ void patch_opcodes(char *w, unsigned remaining)
int s2_rel = (*opcode >> (32 + 9)) & 0x1; int s2_rel = (*opcode >> (32 + 9)) & 0x1;
int pred_sel = (*opcode >> 29) & 0x3; int pred_sel = (*opcode >> 29) & 0x3;
if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
if (alu_inst == OP3_INST_BFE_INT) { if (alu_inst == OP3_INST_BFE_INT) {
count_bfe_int++; count_bfe_int++;
} else if (alu_inst == OP3_INST_BFE_UINT) { } else if (alu_inst == OP3_INST_BFE_UINT) {
count_bfe_uint++; count_bfe_uint++;
} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { } else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
count_byte_align++; count_byte_align++;
// patch this instruction to BFI_INT // patch this instruction to BFI_INT
*opcode &= 0xfffc1fffffffffffUL; *opcode &= 0xfffc1fffffffffffUL;
*opcode |= OP3_INST_BFI_INT << (32 + 13); *opcode |= OP3_INST_BFI_INT << (32 + 13);
patched++; patched++;
} }
} }
if (remaining <= 8) { if (remaining <= 8) {
break; break;
} }
opcode++; opcode++;
remaining -= 8; remaining -= 8;
} }
if (opt_debug) { if (opt_debug) {
printf("Potential OP3 instructions identified: " applog(LOG_DEBUG, "Potential OP3 instructions identified: "
"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n", "%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
count_bfe_int, count_bfe_uint, count_byte_align); count_bfe_int, count_bfe_uint, count_byte_align);
printf("Patched a total of %i BFI_INT instructions\n", patched); applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
} }
} }
@ -316,31 +316,34 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
} }
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
#if 0
for (i = 0; i < nDevices; i++) { for (i = 0; i < nDevices; i++) {
if (!binaries[i]) if (!binaries[i])
continue; continue;
unsigned remaining = binary_sizes[i]; unsigned remaining = binary_sizes[i];
char *w = binaries[i]; char *w = binaries[i];
const int ati_cal_markers = 17; unsigned int start, length;
int j;
for (j = 0; j < ati_cal_markers; j++) { /* Find 2nd incidence of .text, and copy the program's
if (opt_debug) * position and length at a fixed offset from that. Then go
printf("At %p (%u rem. bytes), searching ATI CAL marker %i\n", * back and find the 2nd incidence of \x7ELF (rewind by one
w, remaining, j); * from ELF) and then patch the opcocdes */
advance(&w, &remaining, "ATI CAL"); advance(&w, &remaining, ".text");
if (remaining < 1) w++; remaining--;
fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1); advance(&w, &remaining, ".text");
w++; remaining--; memcpy(&start, w + 285, 4);
} memcpy(&length, w + 289, 4);
if (remaining < 11) w = binaries[i]; remaining = binary_sizes[i];
fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1); advance(&w, &remaining, "ELF");
w += 11; remaining -= 11; w++; remaining--;
patch_opcodes(w, remaining); advance(&w, &remaining, "ELF");
exit (0); w--; remaining++;
w += start; remaining -= start;
if (opt_debug)
printf("At %p (%u rem. bytes), to begin patching\n",
w, remaining);
patch_opcodes(w, length);
} }
#endif
status = clReleaseProgram(clState->program); status = clReleaseProgram(clState->program);
if(status != CL_SUCCESS) if(status != CL_SUCCESS)
{ {

2
oclminer.cl

@ -1,5 +1,7 @@
typedef uint z; typedef uint z;
#define BITALIGN
#ifdef BITALIGN #ifdef BITALIGN
#pragma OPENCL EXTENSION cl_amd_media_ops : enable #pragma OPENCL EXTENSION cl_amd_media_ops : enable
#define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b) #define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b)

Loading…
Cancel
Save