mirror of
https://github.com/GOSTSec/sgminer
synced 2025-02-02 10:04:33 +00:00
Patch bitalign separately from bfi_int.
Recover from failing to patch for bfi int.
This commit is contained in:
parent
948b514cf2
commit
623b9b9fd8
97
ocl.c
97
ocl.c
@ -97,14 +97,18 @@ int clDevicesNum() {
|
|||||||
return numDevices;
|
return numDevices;
|
||||||
}
|
}
|
||||||
|
|
||||||
void advance(char **area, unsigned *remaining, const char *marker)
|
static int advance(char **area, unsigned *remaining, const char *marker)
|
||||||
{
|
{
|
||||||
char *find = memmem(*area, *remaining, marker, strlen(marker));
|
char *find = memmem(*area, *remaining, marker, strlen(marker));
|
||||||
|
|
||||||
if (!find)
|
if (!find) {
|
||||||
applog(LOG_ERR, "Marker \"%s\" not found", marker), exit(1);
|
if (opt_debug)
|
||||||
|
applog(LOG_DEBUG, "Marker \"%s\" not found", marker);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
*remaining -= find - *area;
|
*remaining -= find - *area;
|
||||||
*area = find;
|
*area = find;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OP3_INST_BFE_UINT 4ULL
|
#define OP3_INST_BFE_UINT 4ULL
|
||||||
@ -158,6 +162,7 @@ void patch_opcodes(char *w, unsigned remaining)
|
|||||||
_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||||
{
|
{
|
||||||
bool hasBitAlign = false;
|
bool hasBitAlign = false;
|
||||||
|
bool patchbfi = false;
|
||||||
cl_int status = 0;
|
cl_int status = 0;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
@ -167,39 +172,39 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
cl_platform_id platform = NULL;
|
cl_platform_id platform = NULL;
|
||||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||||
if(status != CL_SUCCESS)
|
if(status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
|
applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(numPlatforms > 0)
|
if(numPlatforms > 0)
|
||||||
{
|
{
|
||||||
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
|
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
|
||||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||||
if(status != CL_SUCCESS)
|
if(status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
|
applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i = 0; i < numPlatforms; ++i)
|
for(i = 0; i < numPlatforms; ++i)
|
||||||
{
|
{
|
||||||
char pbuff[100];
|
char pbuff[100];
|
||||||
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
|
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
|
||||||
if(status != CL_SUCCESS)
|
if(status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
|
applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
|
||||||
free(platforms);
|
free(platforms);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
platform = platforms[i];
|
platform = platforms[i];
|
||||||
if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
|
if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(platforms);
|
free(platforms);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(platform == NULL) {
|
if(platform == NULL) {
|
||||||
perror("NULL platform found!\n");
|
perror("NULL platform found!\n");
|
||||||
@ -282,7 +287,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
}
|
}
|
||||||
find = strstr(extensions, camo);
|
find = strstr(extensions, camo);
|
||||||
if (find)
|
if (find)
|
||||||
hasBitAlign = true;
|
hasBitAlign = patchbfi = true;
|
||||||
|
|
||||||
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
|
||||||
if (status != CL_SUCCESS) {
|
if (status != CL_SUCCESS) {
|
||||||
@ -309,8 +314,15 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
char *filename = "poclbm.cl";
|
char *filename = "poclbm.cl";
|
||||||
|
|
||||||
int pl;
|
int pl;
|
||||||
char *source = file_contents(filename, &pl);
|
char *source, *rawsource = file_contents(filename, &pl);
|
||||||
size_t sourceSize[] = {(size_t)pl};
|
size_t sourceSize[] = {(size_t)pl};
|
||||||
|
source = malloc(pl);
|
||||||
|
retry:
|
||||||
|
if (!source) {
|
||||||
|
applog(LOG_ERR, "Unable to malloc source");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
memcpy(source, rawsource, pl);
|
||||||
|
|
||||||
if (opt_vectors)
|
if (opt_vectors)
|
||||||
clState->preferred_vwidth = opt_vectors;
|
clState->preferred_vwidth = opt_vectors;
|
||||||
@ -336,8 +348,22 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth);
|
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Patch the source file defining BFI_INT */
|
/* Patch the source file defining BITALIGN */
|
||||||
if (hasBitAlign == true) {
|
if (hasBitAlign == true) {
|
||||||
|
char *find = strstr(source, "BITALIGNX");
|
||||||
|
|
||||||
|
if (unlikely(!find)) {
|
||||||
|
applog(LOG_ERR, "Unable to find BITALIGNX in source");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
find += 8; // "BITALIGN"
|
||||||
|
strncpy(find, " ", 1);
|
||||||
|
if (opt_debug)
|
||||||
|
applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BITALIGN");
|
||||||
|
} else if (opt_debug)
|
||||||
|
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BITALIGN patch");
|
||||||
|
|
||||||
|
if (patchbfi == true) {
|
||||||
char *find = strstr(source, "BFI_INTX");
|
char *find = strstr(source, "BFI_INTX");
|
||||||
|
|
||||||
if (unlikely(!find)) {
|
if (unlikely(!find)) {
|
||||||
@ -351,20 +377,17 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
} else if (opt_debug)
|
} else if (opt_debug)
|
||||||
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");
|
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");
|
||||||
|
|
||||||
applog(LOG_INFO, "Initialising kernel with%s BFI_INT patching, %d vectors and worksize %d",
|
|
||||||
hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->work_size);
|
|
||||||
|
|
||||||
clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
|
clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
|
||||||
if(status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithSource)");
|
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithSource)");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create a cl program executable for all the devices specified */
|
/* create a cl program executable for all the devices specified */
|
||||||
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
|
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
|
||||||
if(status != CL_SUCCESS)
|
if (status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
|
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
|
||||||
size_t logSize;
|
size_t logSize;
|
||||||
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
||||||
@ -376,7 +399,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Patch the kernel if the hardware supports BFI_INT */
|
/* Patch the kernel if the hardware supports BFI_INT */
|
||||||
if (hasBitAlign == true) {
|
if (patchbfi == true) {
|
||||||
size_t nDevices;
|
size_t nDevices;
|
||||||
size_t * binary_sizes;
|
size_t * binary_sizes;
|
||||||
char ** binaries;
|
char ** binaries;
|
||||||
@ -402,15 +425,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
* position and length at a fixed offset from that. Then go
|
* position and length at a fixed offset from that. Then go
|
||||||
* back and find the 2nd incidence of \x7ELF (rewind by one
|
* back and find the 2nd incidence of \x7ELF (rewind by one
|
||||||
* from ELF) and then patch the opcocdes */
|
* from ELF) and then patch the opcocdes */
|
||||||
advance(&w, &remaining, ".text");
|
if (!advance(&w, &remaining, ".text"))
|
||||||
|
{patchbfi = false; goto retry;}
|
||||||
w++; remaining--;
|
w++; remaining--;
|
||||||
advance(&w, &remaining, ".text");
|
if (!advance(&w, &remaining, ".text"))
|
||||||
|
{patchbfi = false; goto retry;}
|
||||||
memcpy(&start, w + 285, 4);
|
memcpy(&start, w + 285, 4);
|
||||||
memcpy(&length, w + 289, 4);
|
memcpy(&length, w + 289, 4);
|
||||||
w = binaries[gpu]; remaining = binary_sizes[gpu];
|
w = binaries[gpu]; remaining = binary_sizes[gpu];
|
||||||
advance(&w, &remaining, "ELF");
|
if (!advance(&w, &remaining, "ELF"))
|
||||||
|
{patchbfi = false; goto retry;}
|
||||||
w++; remaining--;
|
w++; remaining--;
|
||||||
advance(&w, &remaining, "ELF");
|
if (!advance(&w, &remaining, "ELF"))
|
||||||
|
{patchbfi = false; goto retry;}
|
||||||
w--; remaining++;
|
w--; remaining++;
|
||||||
w += start; remaining -= start;
|
w += start; remaining -= start;
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
@ -427,16 +454,22 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|||||||
|
|
||||||
clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
|
clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
|
||||||
if(status != CL_SUCCESS)
|
if(status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
|
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(source);
|
||||||
|
free(rawsource);
|
||||||
|
|
||||||
|
applog(LOG_INFO, "Initialising kernel with%s BFI_INT patching, %d vectors and worksize %d",
|
||||||
|
patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
|
||||||
|
|
||||||
/* create a cl program executable for all the devices specified */
|
/* create a cl program executable for all the devices specified */
|
||||||
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
|
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
|
||||||
if(status != CL_SUCCESS)
|
if(status != CL_SUCCESS)
|
||||||
{
|
{
|
||||||
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
|
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
|
||||||
size_t logSize;
|
size_t logSize;
|
||||||
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
||||||
|
@ -33,10 +33,9 @@ __constant uint K[64] = {
|
|||||||
// primitives.
|
// primitives.
|
||||||
|
|
||||||
#define BFI_INTX
|
#define BFI_INTX
|
||||||
|
#define BITALIGNX
|
||||||
|
|
||||||
#ifdef BFI_INT
|
#ifdef BFI_INT
|
||||||
|
|
||||||
#define BITALIGN
|
|
||||||
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
||||||
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
||||||
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
|
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
|
||||||
|
Loading…
x
Reference in New Issue
Block a user