|
|
|
@ -379,44 +379,35 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -379,44 +379,35 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
|
|
|
|
|
/* copy over all of the generated binaries. */ |
|
|
|
|
binaries = (char **)malloc( sizeof(char *)*nDevices ); |
|
|
|
|
for( i = 0; i < nDevices; i++ ) { |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "binary size %d : %d", i, binary_sizes[i]); |
|
|
|
|
if( binary_sizes[i] != 0 ) |
|
|
|
|
binaries[i] = (char *)malloc( sizeof(char)*binary_sizes[i] ); |
|
|
|
|
else |
|
|
|
|
binaries[i] = NULL; |
|
|
|
|
} |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "binary size %d : %d", gpu, binary_sizes[gpu]); |
|
|
|
|
binaries[gpu] = (char *)malloc( sizeof(char)*binary_sizes[i] ); |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < nDevices; i++) { |
|
|
|
|
if (!binaries[i]) |
|
|
|
|
continue; |
|
|
|
|
|
|
|
|
|
unsigned remaining = binary_sizes[i]; |
|
|
|
|
char *w = binaries[i]; |
|
|
|
|
unsigned int start, length; |
|
|
|
|
|
|
|
|
|
/* Find 2nd incidence of .text, and copy the program's
|
|
|
|
|
* position and length at a fixed offset from that. Then go |
|
|
|
|
* back and find the 2nd incidence of \x7ELF (rewind by one |
|
|
|
|
* from ELF) and then patch the opcocdes */ |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
w++; remaining--; |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
memcpy(&start, w + 285, 4); |
|
|
|
|
memcpy(&length, w + 289, 4); |
|
|
|
|
w = binaries[i]; remaining = binary_sizes[i]; |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
w++; remaining--; |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
w--; remaining++; |
|
|
|
|
w += start; remaining -= start; |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", |
|
|
|
|
w, remaining); |
|
|
|
|
patch_opcodes(w, length); |
|
|
|
|
} |
|
|
|
|
unsigned remaining = binary_sizes[gpu]; |
|
|
|
|
char *w = binaries[gpu]; |
|
|
|
|
unsigned int start, length; |
|
|
|
|
|
|
|
|
|
/* Find 2nd incidence of .text, and copy the program's
|
|
|
|
|
* position and length at a fixed offset from that. Then go |
|
|
|
|
* back and find the 2nd incidence of \x7ELF (rewind by one |
|
|
|
|
* from ELF) and then patch the opcocdes */ |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
w++; remaining--; |
|
|
|
|
advance(&w, &remaining, ".text"); |
|
|
|
|
memcpy(&start, w + 285, 4); |
|
|
|
|
memcpy(&length, w + 289, 4); |
|
|
|
|
w = binaries[gpu]; remaining = binary_sizes[gpu]; |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
w++; remaining--; |
|
|
|
|
advance(&w, &remaining, "ELF"); |
|
|
|
|
w--; remaining++; |
|
|
|
|
w += start; remaining -= start; |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", |
|
|
|
|
w, remaining); |
|
|
|
|
patch_opcodes(w, length); |
|
|
|
|
|
|
|
|
|
status = clReleaseProgram(clState->program); |
|
|
|
|
if(status != CL_SUCCESS) |
|
|
|
|
{ |
|
|
|
|