1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-11 15:27:53 +00:00

Build binaries with unique filenames from the kernel generated and save them.

Try to load this cached binary if it matches on next kernel instantiation.
This speeds up start-up dramatically, and has a unique kernel binary for different kernel configurations.
This commit is contained in:
Con Kolivas 2011-06-30 10:36:19 +10:00
parent 973b2199e1
commit 4d73057772

172
ocl.c
View File

@ -10,6 +10,8 @@
#include <time.h> #include <time.h>
#include <sys/time.h> #include <sys/time.h>
#include <pthread.h> #include <pthread.h>
#include <sys/stat.h>
#include <unistd.h>
#include "findnonce.h" #include "findnonce.h"
#include "ocl.h" #include "ocl.h"
@ -307,30 +309,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (clState->max_work_size > 512) if (clState->max_work_size > 512)
clState->max_work_size = 512; clState->max_work_size = 512;
/////////////////////////////////////////////////////////////////
// Load CL file, build CL program object, create CL kernel object
/////////////////////////////////////////////////////////////////
/* Load a different kernel depending on whether it supports
* cl_amd_media_ops or not */
char filename[10];
if (clState->hasBitAlign)
strcpy(filename, "phatk.cl");
else
strcpy(filename, "poclbm.cl");
int pl;
char *source, *rawsource = file_contents(filename, &pl);
size_t sourceSize[] = {(size_t)pl};
source = malloc(pl);
retry:
if (!source) {
applog(LOG_ERR, "Unable to malloc source");
return NULL;
}
memcpy(source, rawsource, pl);
/* For some reason 2 vectors is still better even if the card says /* For some reason 2 vectors is still better even if the card says
* otherwise */ * otherwise */
if (clState->preferred_vwidth > 1) if (clState->preferred_vwidth > 1)
@ -342,6 +320,107 @@ retry:
else else
clState->work_size = clState->max_work_size / clState->preferred_vwidth; clState->work_size = clState->max_work_size / clState->preferred_vwidth;
/* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would
* have otherwise created. The filename is:
* kernelname +/i bitalign + v + vectors + w + work_size + sizeof(long) + .bin
*/
char binaryfilename[255];
char numbuf[10];
char filename[10];
FILE *binaryfile;
size_t *binary_sizes;
char **binaries;
size_t nDevices = 1;
int pl;
char *source, *rawsource;
size_t sourceSize[] = {(size_t)pl};
source = malloc(pl);
if (!source) {
applog(LOG_ERR, "Unable to malloc source");
return NULL;
}
if (clState->hasBitAlign)
strcpy(filename, "phatk.cl");
else
strcpy(filename, "poclbm.cl");
rawsource = file_contents(filename, &pl);
binary_sizes = (size_t *)malloc(sizeof(size_t)*nDevices);
if (unlikely(!binary_sizes)) {
applog(LOG_ERR, "Unable to malloc binary_sizes");
return NULL;
}
binaries = (char **)malloc(sizeof(char *)*nDevices);
if (unlikely(!binaries)) {
applog(LOG_ERR, "Unable to malloc binaries");
return NULL;
}
if (clState->hasBitAlign) {
strcpy(binaryfilename, "phatk");
strcat(binaryfilename, "bitalign");
} else
strcpy(binaryfilename, "poclbm");
strcat(binaryfilename, "v");
sprintf(numbuf, "%d", clState->preferred_vwidth);
strcat(binaryfilename, numbuf);
strcat(binaryfilename, "w");
sprintf(numbuf, "%d", (int)clState->work_size);
strcat(binaryfilename, numbuf);
strcat(binaryfilename, "long");
sprintf(numbuf, "%d", (int)sizeof(long));
strcat(binaryfilename, numbuf);
strcat(binaryfilename, ".bin");
binaryfile = fopen(binaryfilename, "r");
if (!binaryfile) {
if (opt_debug)
applog(LOG_DEBUG, "No binary found, generating from source");
} else {
struct stat binary_stat;
if (unlikely(stat(binaryfilename, &binary_stat))) {
if (opt_debug)
applog(LOG_DEBUG, "Unable to stat binary, generating from source");
fclose(binaryfile);
goto build;
}
binary_sizes[gpu] = binary_stat.st_size;
binaries[gpu] = (char *)malloc(binary_sizes[gpu]);
if (unlikely(!binaries[gpu])) {
applog(LOG_ERR, "Unable to malloc binaries");
fclose(binaryfile);
return NULL;
}
if (fread(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu]) {
applog(LOG_ERR, "Unable to fread binaries[gpu]");
fclose(binaryfile);
return NULL;
}
fclose(binaryfile);
clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
if (status != CL_SUCCESS)
{
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
return NULL;
}
if (opt_debug)
applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);
goto built;
}
/////////////////////////////////////////////////////////////////
// Load CL file, build CL program object, create CL kernel object
/////////////////////////////////////////////////////////////////
build:
memcpy(source, rawsource, pl);
/* Patch the source file with the preferred_vwidth */ /* Patch the source file with the preferred_vwidth */
if (clState->preferred_vwidth > 1) { if (clState->preferred_vwidth > 1) {
char *find = strstr(source, "VECTORSX"); char *find = strstr(source, "VECTORSX");
@ -411,22 +490,24 @@ retry:
/* Patch the kernel if the hardware supports BFI_INT */ /* Patch the kernel if the hardware supports BFI_INT */
if (patchbfi) { if (patchbfi) {
size_t nDevices; /* figure out the size of the binary for each device. */
size_t * binary_sizes; status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL );
char ** binaries; if (unlikely(status != CL_SUCCESS))
int err; {
applog(LOG_ERR, "Error: Getting program info. (clGetPlatformInfo)");
/* figure out number of devices and the sizes of the binary for each device. */ return NULL;
err = clGetProgramInfo( clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL ); }
binary_sizes = (size_t *)malloc( sizeof(size_t)*nDevices );
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL );
/* copy over all of the generated binaries. */ /* copy over all of the generated binaries. */
binaries = (char **)malloc( sizeof(char *)*nDevices );
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "binary size %d : %d", gpu, binary_sizes[gpu]); applog(LOG_DEBUG, "binary size %d : %d", gpu, binary_sizes[gpu]);
binaries[gpu] = (char *)malloc( sizeof(char)*binary_sizes[gpu] ); binaries[gpu] = (char *)malloc( sizeof(char)*binary_sizes[gpu] );
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
if (unlikely(status != CL_SUCCESS))
{
applog(LOG_ERR, "Error: Getting program info. (clGetPlatformInfo)");
return NULL;
}
unsigned remaining = binary_sizes[gpu]; unsigned remaining = binary_sizes[gpu];
char *w = binaries[gpu]; char *w = binaries[gpu];
@ -437,7 +518,7 @@ retry:
* back and find the 2nd incidence of \x7ELF (rewind by one * back and find the 2nd incidence of \x7ELF (rewind by one
* from ELF) and then patch the opcocdes */ * from ELF) and then patch the opcocdes */
if (!advance(&w, &remaining, ".text")) if (!advance(&w, &remaining, ".text"))
{patchbfi = 0; goto retry;} {patchbfi = 0; goto build;}
w++; remaining--; w++; remaining--;
if (!advance(&w, &remaining, ".text")) { if (!advance(&w, &remaining, ".text")) {
/* 32 bit builds only one ELF */ /* 32 bit builds only one ELF */
@ -447,7 +528,7 @@ retry:
memcpy(&length, w + 289, 4); memcpy(&length, w + 289, 4);
w = binaries[gpu]; remaining = binary_sizes[gpu]; w = binaries[gpu]; remaining = binary_sizes[gpu];
if (!advance(&w, &remaining, "ELF")) if (!advance(&w, &remaining, "ELF"))
{patchbfi = 0; goto retry;} {patchbfi = 0; goto build;}
w++; remaining--; w++; remaining--;
if (!advance(&w, &remaining, "ELF")) { if (!advance(&w, &remaining, "ELF")) {
/* 32 bit builds only one ELF */ /* 32 bit builds only one ELF */
@ -478,6 +559,23 @@ retry:
free(source); free(source);
free(rawsource); free(rawsource);
/* Save the binary to be loaded next time */
binaryfile = fopen(binaryfilename, "w");
if (!binaryfile) {
/* Not a fatal problem, just means we build it again next time */
if (opt_debug)
applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
} else {
if (unlikely(fwrite(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu])) {
applog(LOG_ERR, "Unable to fwrite to binaryfile");
return NULL;
}
fclose(binaryfile);
}
built:
free(binaries);
free(binary_sizes);
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size); filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);