From 91e5cef3a58927131ce2aa603e47ba85c8e10a2f Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Wed, 22 Jun 2011 00:13:46 +1000 Subject: [PATCH] Actually get first BFI_INT patch working. --- cpu-miner.c | 1 - ocl.c | 69 ++++++++++++++++++++++++++++------------------------- oclminer.cl | 2 ++ 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/cpu-miner.c b/cpu-miner.c index 4e8c8cdb..16c6a9e8 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -814,7 +814,6 @@ static void *gpuminer_thread(void *userdata) BUFFERSIZE, res, 0, NULL, NULL); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;} - for (i = 0; i < 128; i++) { int found = false; diff --git a/ocl.c b/ocl.c index 264adc40..02e0d60d 100644 --- a/ocl.c +++ b/ocl.c @@ -125,29 +125,29 @@ void patch_opcodes(char *w, unsigned remaining) int s2_rel = (*opcode >> (32 + 9)) & 0x1; int pred_sel = (*opcode >> 29) & 0x3; if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { - if (alu_inst == OP3_INST_BFE_INT) { - count_bfe_int++; - } else if (alu_inst == OP3_INST_BFE_UINT) { - count_bfe_uint++; - } else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { - count_byte_align++; - // patch this instruction to BFI_INT - *opcode &= 0xfffc1fffffffffffUL; - *opcode |= OP3_INST_BFI_INT << (32 + 13); - patched++; - } + if (alu_inst == OP3_INST_BFE_INT) { + count_bfe_int++; + } else if (alu_inst == OP3_INST_BFE_UINT) { + count_bfe_uint++; + } else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { + count_byte_align++; + // patch this instruction to BFI_INT + *opcode &= 0xfffc1fffffffffffUL; + *opcode |= OP3_INST_BFI_INT << (32 + 13); + patched++; + } } if (remaining <= 8) { - break; + break; } opcode++; remaining -= 8; } if (opt_debug) { - printf("Potential OP3 instructions identified: " - "%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n", + applog(LOG_DEBUG, "Potential OP3 instructions identified: " + "%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN", count_bfe_int, count_bfe_uint, count_byte_align); - printf("Patched a total of %i BFI_INT instructions\n", patched); + applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched); } } @@ -316,31 +316,34 @@ _clState *initCl(int gpu, char *name, size_t nameSize) { } err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); -#if 0 for (i = 0; i < nDevices; i++) { if (!binaries[i]) continue; unsigned remaining = binary_sizes[i]; char *w = binaries[i]; - const int ati_cal_markers = 17; - int j; - for (j = 0; j < ati_cal_markers; j++) { - if (opt_debug) - printf("At %p (%u rem. bytes), searching ATI CAL marker %i\n", - w, remaining, j); - advance(&w, &remaining, "ATI CAL"); - if (remaining < 1) - fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1); - w++; remaining--; - } - if (remaining < 11) - fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1); - w += 11; remaining -= 11; - patch_opcodes(w, remaining); - exit (0); + unsigned int start, length; + + /* Find 2nd incidence of .text, and copy the program's + * position and length at a fixed offset from that. Then go + * back and find the 2nd incidence of \x7ELF (rewind by one + * from ELF) and then patch the opcocdes */ + advance(&w, &remaining, ".text"); + w++; remaining--; + advance(&w, &remaining, ".text"); + memcpy(&start, w + 285, 4); + memcpy(&length, w + 289, 4); + w = binaries[i]; remaining = binary_sizes[i]; + advance(&w, &remaining, "ELF"); + w++; remaining--; + advance(&w, &remaining, "ELF"); + w--; remaining++; + w += start; remaining -= start; + if (opt_debug) + printf("At %p (%u rem. bytes), to begin patching\n", + w, remaining); + patch_opcodes(w, length); } -#endif status = clReleaseProgram(clState->program); if(status != CL_SUCCESS) { diff --git a/oclminer.cl b/oclminer.cl index d0650ac2..40550ca5 100644 --- a/oclminer.cl +++ b/oclminer.cl @@ -1,5 +1,7 @@ typedef uint z; +#define BITALIGN + #ifdef BITALIGN #pragma OPENCL EXTENSION cl_amd_media_ops : enable #define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b)