|
|
|
@ -10,8 +10,6 @@
@@ -10,8 +10,6 @@
|
|
|
|
|
#include <time.h> |
|
|
|
|
#include <sys/time.h> |
|
|
|
|
#include <pthread.h> |
|
|
|
|
#include <sys/stat.h> |
|
|
|
|
#include <unistd.h> |
|
|
|
|
|
|
|
|
|
#include "findnonce.h" |
|
|
|
|
#include "ocl.h" |
|
|
|
@ -309,116 +307,41 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -309,116 +307,41 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
if (clState->max_work_size > 512) |
|
|
|
|
clState->max_work_size = 512; |
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
|
* otherwise */ |
|
|
|
|
if (clState->preferred_vwidth > 1) |
|
|
|
|
clState->preferred_vwidth = 2; |
|
|
|
|
if (opt_vectors) |
|
|
|
|
clState->preferred_vwidth = opt_vectors; |
|
|
|
|
if (opt_worksize && opt_worksize <= clState->max_work_size) |
|
|
|
|
clState->work_size = opt_worksize; |
|
|
|
|
else |
|
|
|
|
clState->work_size = clState->max_work_size / clState->preferred_vwidth; |
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
// Load CL file, build CL program object, create CL kernel object
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
/* Create binary filename based on parameters passed to opencl
|
|
|
|
|
* compiler to ensure we only load a binary that matches what would |
|
|
|
|
* have otherwise created. The filename is: |
|
|
|
|
* kernelname +/i bitalign + v + vectors + w + work_size + sizeof(long) + .bin |
|
|
|
|
*/ |
|
|
|
|
char binaryfilename[255]; |
|
|
|
|
char numbuf[10]; |
|
|
|
|
/* Load a different kernel depending on whether it supports
|
|
|
|
|
* cl_amd_media_ops or not */ |
|
|
|
|
char filename[10]; |
|
|
|
|
|
|
|
|
|
if (clState->hasBitAlign) |
|
|
|
|
strcpy(filename, "phatk.cl"); |
|
|
|
|
else |
|
|
|
|
strcpy(filename, "poclbm.cl"); |
|
|
|
|
FILE *binaryfile; |
|
|
|
|
size_t *binary_sizes; |
|
|
|
|
char **binaries; |
|
|
|
|
size_t nDevices = 1; |
|
|
|
|
|
|
|
|
|
int pl; |
|
|
|
|
char *source, *rawsource = file_contents(filename, &pl); |
|
|
|
|
size_t sourceSize[] = {(size_t)pl}; |
|
|
|
|
|
|
|
|
|
source = malloc(pl); |
|
|
|
|
retry: |
|
|
|
|
if (!source) { |
|
|
|
|
applog(LOG_ERR, "Unable to malloc source"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
binary_sizes = (size_t *)malloc(sizeof(size_t)*nDevices); |
|
|
|
|
if (unlikely(!binary_sizes)) { |
|
|
|
|
applog(LOG_ERR, "Unable to malloc binary_sizes"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
binaries = (char **)malloc(sizeof(char *)*nDevices); |
|
|
|
|
if (unlikely(!binaries)) { |
|
|
|
|
applog(LOG_ERR, "Unable to malloc binaries"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (clState->hasBitAlign) { |
|
|
|
|
strcpy(binaryfilename, "phatk"); |
|
|
|
|
strcat(binaryfilename, "bitalign"); |
|
|
|
|
} else |
|
|
|
|
strcpy(binaryfilename, "poclbm"); |
|
|
|
|
strcat(binaryfilename, "v"); |
|
|
|
|
sprintf(numbuf, "%d", clState->preferred_vwidth); |
|
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
|
strcat(binaryfilename, "w"); |
|
|
|
|
sprintf(numbuf, "%d", (int)clState->work_size); |
|
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
|
strcat(binaryfilename, "long"); |
|
|
|
|
sprintf(numbuf, "%d", (int)sizeof(long)); |
|
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
|
strcat(binaryfilename, ".bin"); |
|
|
|
|
|
|
|
|
|
binaryfile = fopen(binaryfilename, "r"); |
|
|
|
|
if (!binaryfile) { |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "No binary found, generating from source"); |
|
|
|
|
} else { |
|
|
|
|
struct stat binary_stat; |
|
|
|
|
|
|
|
|
|
if (unlikely(stat(binaryfilename, &binary_stat))) { |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "Unable to stat binary, generating from source"); |
|
|
|
|
fclose(binaryfile); |
|
|
|
|
goto build; |
|
|
|
|
} |
|
|
|
|
binary_sizes[gpu] = binary_stat.st_size; |
|
|
|
|
binaries[gpu] = (char *)malloc(binary_sizes[gpu]); |
|
|
|
|
if (unlikely(!binaries[gpu])) { |
|
|
|
|
applog(LOG_ERR, "Unable to malloc binaries"); |
|
|
|
|
fclose(binaryfile); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (fread(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu]) { |
|
|
|
|
applog(LOG_ERR, "Unable to fread binaries[gpu]"); |
|
|
|
|
fclose(binaryfile); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
fclose(binaryfile); |
|
|
|
|
|
|
|
|
|
clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL); |
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
{ |
|
|
|
|
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename); |
|
|
|
|
goto built; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
// Load CL file, build CL program object, create CL kernel object
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
build: |
|
|
|
|
memcpy(source, rawsource, pl); |
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
|
* otherwise */ |
|
|
|
|
if (clState->preferred_vwidth > 1) |
|
|
|
|
clState->preferred_vwidth = 2; |
|
|
|
|
if (opt_vectors) |
|
|
|
|
clState->preferred_vwidth = opt_vectors; |
|
|
|
|
if (opt_worksize && opt_worksize <= clState->max_work_size) |
|
|
|
|
clState->work_size = opt_worksize; |
|
|
|
|
else |
|
|
|
|
clState->work_size = clState->max_work_size / clState->preferred_vwidth; |
|
|
|
|
|
|
|
|
|
/* Patch the source file with the preferred_vwidth */ |
|
|
|
|
if (clState->preferred_vwidth > 1) { |
|
|
|
|
char *find = strstr(source, "VECTORSX"); |
|
|
|
@ -488,24 +411,22 @@ build:
@@ -488,24 +411,22 @@ build:
|
|
|
|
|
|
|
|
|
|
/* Patch the kernel if the hardware supports BFI_INT */ |
|
|
|
|
if (patchbfi) { |
|
|
|
|
/* figure out the size of the binary for each device. */ |
|
|
|
|
status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL ); |
|
|
|
|
if (unlikely(status != CL_SUCCESS)) |
|
|
|
|
{ |
|
|
|
|
applog(LOG_ERR, "Error: Getting program info. (clGetPlatformInfo)"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
size_t nDevices; |
|
|
|
|
size_t * binary_sizes; |
|
|
|
|
char ** binaries; |
|
|
|
|
int err; |
|
|
|
|
|
|
|
|
|
/* figure out number of devices and the sizes of the binary for each device. */ |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL ); |
|
|
|
|
binary_sizes = (size_t *)malloc( sizeof(size_t)*nDevices ); |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*nDevices, binary_sizes, NULL ); |
|
|
|
|
|
|
|
|
|
/* copy over all of the generated binaries. */ |
|
|
|
|
binaries = (char **)malloc( sizeof(char *)*nDevices ); |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "binary size %d : %d", gpu, binary_sizes[gpu]); |
|
|
|
|
binaries[gpu] = (char *)malloc( sizeof(char)*binary_sizes[gpu] ); |
|
|
|
|
status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); |
|
|
|
|
if (unlikely(status != CL_SUCCESS)) |
|
|
|
|
{ |
|
|
|
|
applog(LOG_ERR, "Error: Getting program info. (clGetPlatformInfo)"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL ); |
|
|
|
|
|
|
|
|
|
unsigned remaining = binary_sizes[gpu]; |
|
|
|
|
char *w = binaries[gpu]; |
|
|
|
@ -516,7 +437,7 @@ build:
@@ -516,7 +437,7 @@ build:
|
|
|
|
|
* back and find the 2nd incidence of \x7ELF (rewind by one |
|
|
|
|
* from ELF) and then patch the opcocdes */ |
|
|
|
|
if (!advance(&w, &remaining, ".text")) |
|
|
|
|
{patchbfi = 0; goto build;} |
|
|
|
|
{patchbfi = 0; goto retry;} |
|
|
|
|
w++; remaining--; |
|
|
|
|
if (!advance(&w, &remaining, ".text")) { |
|
|
|
|
/* 32 bit builds only one ELF */ |
|
|
|
@ -526,7 +447,7 @@ build:
@@ -526,7 +447,7 @@ build:
|
|
|
|
|
memcpy(&length, w + 289, 4); |
|
|
|
|
w = binaries[gpu]; remaining = binary_sizes[gpu]; |
|
|
|
|
if (!advance(&w, &remaining, "ELF")) |
|
|
|
|
{patchbfi = 0; goto build;} |
|
|
|
|
{patchbfi = 0; goto retry;} |
|
|
|
|
w++; remaining--; |
|
|
|
|
if (!advance(&w, &remaining, "ELF")) { |
|
|
|
|
/* 32 bit builds only one ELF */ |
|
|
|
@ -557,23 +478,6 @@ build:
@@ -557,23 +478,6 @@ build:
|
|
|
|
|
free(source); |
|
|
|
|
free(rawsource); |
|
|
|
|
|
|
|
|
|
/* Save the binary to be loaded next time */ |
|
|
|
|
binaryfile = fopen(binaryfilename, "w"); |
|
|
|
|
if (!binaryfile) { |
|
|
|
|
/* Not a fatal problem, just means we build it again next time */ |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "Unable to create file %s", binaryfilename); |
|
|
|
|
} else { |
|
|
|
|
if (unlikely(fwrite(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu])) { |
|
|
|
|
applog(LOG_ERR, "Unable to fwrite to binaryfile"); |
|
|
|
|
return NULL; |
|
|
|
|
} |
|
|
|
|
fclose(binaryfile); |
|
|
|
|
} |
|
|
|
|
built: |
|
|
|
|
free(binaries); |
|
|
|
|
free(binary_sizes); |
|
|
|
|
|
|
|
|
|
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", |
|
|
|
|
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size); |
|
|
|
|
|
|
|
|
|