|
|
@ -52,8 +52,6 @@ char *file_contents(const char *filename, int *length) |
|
|
|
return (char*)buffer; |
|
|
|
return (char*)buffer; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static cl_uint numDevices; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int clDevicesNum() { |
|
|
|
int clDevicesNum() { |
|
|
|
cl_int status = 0; |
|
|
|
cl_int status = 0; |
|
|
|
|
|
|
|
|
|
|
@ -113,95 +111,6 @@ int clDevicesNum() { |
|
|
|
return numDevices; |
|
|
|
return numDevices; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static cl_platform_id platform = NULL; |
|
|
|
|
|
|
|
static cl_device_id *devices; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int preinit_devices(void) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
cl_int status; |
|
|
|
|
|
|
|
cl_uint numPlatforms; |
|
|
|
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
status = clGetPlatformIDs(0, NULL, &numPlatforms); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (numPlatforms > 0) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id)); |
|
|
|
|
|
|
|
status = clGetPlatformIDs(numPlatforms, platforms, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < numPlatforms; ++i) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
char pbuff[100]; |
|
|
|
|
|
|
|
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)"); |
|
|
|
|
|
|
|
free(platforms); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
platform = platforms[i]; |
|
|
|
|
|
|
|
if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
free(platforms); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (platform == NULL) { |
|
|
|
|
|
|
|
perror("NULL platform found!\n"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Device IDs (num)"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (numDevices > 0 ) { |
|
|
|
|
|
|
|
devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Now, get the device list data */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Device IDs (list)"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
applog(LOG_INFO, "List of devices:"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int i; |
|
|
|
|
|
|
|
for(i=0; i<numDevices; i++) { |
|
|
|
|
|
|
|
char pbuff[100]; |
|
|
|
|
|
|
|
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Device Info"); |
|
|
|
|
|
|
|
return -1; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
applog(LOG_INFO, "\t%i\t%s", i, pbuff); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else return -1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return 0; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int advance(char **area, unsigned *remaining, const char *marker) |
|
|
|
static int advance(char **area, unsigned *remaining, const char *marker) |
|
|
|
{ |
|
|
|
{ |
|
|
|
char *find = memmem(*area, *remaining, marker, strlen(marker)); |
|
|
|
char *find = memmem(*area, *remaining, marker, strlen(marker)); |
|
|
@ -264,80 +173,94 @@ void patch_opcodes(char *w, unsigned remaining) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
_clState *initCQ(_clState *clState, unsigned int gpu) |
|
|
|
_clState *initCl(unsigned int gpu, char *name, size_t nameSize) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
int patchbfi = 0; |
|
|
|
cl_int status = 0; |
|
|
|
cl_int status = 0; |
|
|
|
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; |
|
|
|
unsigned int i; |
|
|
|
|
|
|
|
|
|
|
|
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); |
|
|
|
_clState *clState = calloc(1, sizeof(_clState)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cl_uint numPlatforms; |
|
|
|
|
|
|
|
cl_platform_id platform = NULL; |
|
|
|
|
|
|
|
status = clGetPlatformIDs(0, NULL, &numPlatforms); |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); |
|
|
|
applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)"); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* create a cl program executable for the device specified */ |
|
|
|
if (numPlatforms > 0) |
|
|
|
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); |
|
|
|
{ |
|
|
|
|
|
|
|
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id)); |
|
|
|
|
|
|
|
status = clGetPlatformIDs(numPlatforms, platforms, NULL); |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
applog(LOG_ERR, "Error: Building Program (clBuildProgram)"); |
|
|
|
applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)"); |
|
|
|
size_t logSize; |
|
|
|
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char *log = malloc(logSize); |
|
|
|
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL); |
|
|
|
|
|
|
|
applog(LOG_INFO, "%s", log); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* get a kernel object handle for a kernel with the given name */ |
|
|
|
for(i = 0; i < numPlatforms; ++i) |
|
|
|
clState->kernel = clCreateKernel(clState->program, "search", &status); |
|
|
|
{ |
|
|
|
|
|
|
|
char pbuff[100]; |
|
|
|
|
|
|
|
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL); |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)"); |
|
|
|
applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)"); |
|
|
|
|
|
|
|
free(platforms); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
platform = platforms[i]; |
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) |
|
|
|
// Create an OpenCL command queue
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], |
|
|
|
|
|
|
|
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) /* Try again without OOE enable */ |
|
|
|
|
|
|
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)"); |
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
free(platforms); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (platform == NULL) { |
|
|
|
|
|
|
|
perror("NULL platform found!\n"); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status); |
|
|
|
size_t nDevices; |
|
|
|
if (status != CL_SUCCESS) { |
|
|
|
cl_uint numDevices; |
|
|
|
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)"); |
|
|
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Device IDs (num)"); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return clState; |
|
|
|
cl_device_id *devices; |
|
|
|
} |
|
|
|
if (numDevices > 0 ) { |
|
|
|
|
|
|
|
devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id)); |
|
|
|
|
|
|
|
|
|
|
|
_clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
/* Now, get the device list data */ |
|
|
|
{ |
|
|
|
|
|
|
|
unsigned int gpu = cgpu->cpu_gpu; |
|
|
|
|
|
|
|
int patchbfi = 0; |
|
|
|
|
|
|
|
cl_int status = 0; |
|
|
|
|
|
|
|
size_t nDevices; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_clState *clState = calloc(1, sizeof(_clState)); |
|
|
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Getting Device IDs (list)"); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; |
|
|
|
applog(LOG_INFO, "List of devices:"); |
|
|
|
|
|
|
|
|
|
|
|
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); |
|
|
|
unsigned int i; |
|
|
|
|
|
|
|
for(i=0; i<numDevices; i++) { |
|
|
|
|
|
|
|
char pbuff[100]; |
|
|
|
|
|
|
|
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
if (status != CL_SUCCESS) |
|
|
|
{ |
|
|
|
{ |
|
|
|
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); |
|
|
|
applog(LOG_ERR, "Error: Getting Device Info"); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
applog(LOG_INFO, "\t%i\t%s", i, pbuff); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (gpu < numDevices) { |
|
|
|
if (gpu < numDevices) { |
|
|
|
char pbuff[100]; |
|
|
|
char pbuff[100]; |
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices); |
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices); |
|
|
@ -354,6 +277,17 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else return NULL; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)"); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Check for BFI INT support. Hopefully people don't mix devices with
|
|
|
|
/* Check for BFI INT support. Hopefully people don't mix devices with
|
|
|
|
* and without it! */ |
|
|
|
* and without it! */ |
|
|
|
char * extensions = malloc(1024); |
|
|
|
char * extensions = malloc(1024); |
|
|
@ -367,7 +301,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
} |
|
|
|
} |
|
|
|
find = strstr(extensions, camo); |
|
|
|
find = strstr(extensions, camo); |
|
|
|
if (find) |
|
|
|
if (find) |
|
|
|
cgpu->hasBitAlign = patchbfi = 1; |
|
|
|
clState->hasBitAlign = patchbfi = 1; |
|
|
|
|
|
|
|
|
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); |
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL); |
|
|
|
if (status != CL_SUCCESS) { |
|
|
|
if (status != CL_SUCCESS) { |
|
|
@ -377,27 +311,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
if (opt_debug) |
|
|
|
if (opt_debug) |
|
|
|
applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth); |
|
|
|
applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth); |
|
|
|
|
|
|
|
|
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&cgpu->max_work_size, NULL); |
|
|
|
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL); |
|
|
|
if (status != CL_SUCCESS) { |
|
|
|
if (status != CL_SUCCESS) { |
|
|
|
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); |
|
|
|
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
if (opt_debug) |
|
|
|
if (opt_debug) |
|
|
|
applog(LOG_DEBUG, "Max work group size reported %d", cgpu->max_work_size); |
|
|
|
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); |
|
|
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
* otherwise, and many cards lie about their max so use 256 as max |
|
|
|
* otherwise, and many cards lie about their max so use 256 as max |
|
|
|
* unless explicitly set on the command line */ |
|
|
|
* unless explicitly set on the command line */ |
|
|
|
cgpu->vwidth = clState->preferred_vwidth; |
|
|
|
|
|
|
|
if (clState->preferred_vwidth > 1) |
|
|
|
if (clState->preferred_vwidth > 1) |
|
|
|
cgpu->vwidth = 2; |
|
|
|
clState->preferred_vwidth = 2; |
|
|
|
if (opt_vectors) |
|
|
|
if (opt_vectors) |
|
|
|
cgpu->vwidth = opt_vectors; |
|
|
|
clState->preferred_vwidth = opt_vectors; |
|
|
|
if (opt_worksize && opt_worksize <= cgpu->max_work_size) |
|
|
|
if (opt_worksize && opt_worksize <= clState->max_work_size) |
|
|
|
cgpu->work_size = opt_worksize; |
|
|
|
clState->work_size = opt_worksize; |
|
|
|
else |
|
|
|
else |
|
|
|
cgpu->work_size = (cgpu->max_work_size <= 256 ? cgpu->max_work_size : 256) / |
|
|
|
clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / |
|
|
|
cgpu->vwidth; |
|
|
|
clState->preferred_vwidth; |
|
|
|
|
|
|
|
|
|
|
|
/* Create binary filename based on parameters passed to opencl
|
|
|
|
/* Create binary filename based on parameters passed to opencl
|
|
|
|
* compiler to ensure we only load a binary that matches what would |
|
|
|
* compiler to ensure we only load a binary that matches what would |
|
|
@ -409,7 +342,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
char filename[16]; |
|
|
|
char filename[16]; |
|
|
|
|
|
|
|
|
|
|
|
if (chosen_kernel == KL_NONE) { |
|
|
|
if (chosen_kernel == KL_NONE) { |
|
|
|
if (cgpu->hasBitAlign) |
|
|
|
if (clState->hasBitAlign) |
|
|
|
chosen_kernel = KL_PHATK; |
|
|
|
chosen_kernel = KL_PHATK; |
|
|
|
else |
|
|
|
else |
|
|
|
chosen_kernel = KL_POCLBM; |
|
|
|
chosen_kernel = KL_POCLBM; |
|
|
@ -452,14 +385,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
strcat(binaryfilename, name); |
|
|
|
strcat(binaryfilename, name); |
|
|
|
if (cgpu->hasBitAlign) |
|
|
|
if (clState->hasBitAlign) |
|
|
|
strcat(binaryfilename, "bitalign"); |
|
|
|
strcat(binaryfilename, "bitalign"); |
|
|
|
|
|
|
|
|
|
|
|
strcat(binaryfilename, "v"); |
|
|
|
strcat(binaryfilename, "v"); |
|
|
|
sprintf(numbuf, "%d", cgpu->vwidth); |
|
|
|
sprintf(numbuf, "%d", clState->preferred_vwidth); |
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
strcat(binaryfilename, "w"); |
|
|
|
strcat(binaryfilename, "w"); |
|
|
|
sprintf(numbuf, "%d", (int)cgpu->work_size); |
|
|
|
sprintf(numbuf, "%d", (int)clState->work_size); |
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
strcat(binaryfilename, numbuf); |
|
|
|
strcat(binaryfilename, "long"); |
|
|
|
strcat(binaryfilename, "long"); |
|
|
|
sprintf(numbuf, "%d", (int)sizeof(long)); |
|
|
|
sprintf(numbuf, "%d", (int)sizeof(long)); |
|
|
@ -515,7 +448,7 @@ build: |
|
|
|
memcpy(source, rawsource, pl); |
|
|
|
memcpy(source, rawsource, pl); |
|
|
|
|
|
|
|
|
|
|
|
/* Patch the source file with the preferred_vwidth */ |
|
|
|
/* Patch the source file with the preferred_vwidth */ |
|
|
|
if (cgpu->vwidth > 1) { |
|
|
|
if (clState->preferred_vwidth > 1) { |
|
|
|
char *find = strstr(source, "VECTORSX"); |
|
|
|
char *find = strstr(source, "VECTORSX"); |
|
|
|
|
|
|
|
|
|
|
|
if (unlikely(!find)) { |
|
|
|
if (unlikely(!find)) { |
|
|
@ -523,7 +456,7 @@ build: |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
} |
|
|
|
find += 7; // "VECTORS"
|
|
|
|
find += 7; // "VECTORS"
|
|
|
|
if (cgpu->vwidth == 2) |
|
|
|
if (clState->preferred_vwidth == 2) |
|
|
|
strncpy(find, "2", 1); |
|
|
|
strncpy(find, "2", 1); |
|
|
|
else |
|
|
|
else |
|
|
|
strncpy(find, "4", 1); |
|
|
|
strncpy(find, "4", 1); |
|
|
@ -532,7 +465,7 @@ build: |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Patch the source file defining BITALIGN */ |
|
|
|
/* Patch the source file defining BITALIGN */ |
|
|
|
if (cgpu->hasBitAlign) { |
|
|
|
if (clState->hasBitAlign) { |
|
|
|
char *find = strstr(source, "BITALIGNX"); |
|
|
|
char *find = strstr(source, "BITALIGNX"); |
|
|
|
|
|
|
|
|
|
|
|
if (unlikely(!find)) { |
|
|
|
if (unlikely(!find)) { |
|
|
@ -690,13 +623,51 @@ built: |
|
|
|
free(binaries); |
|
|
|
free(binaries); |
|
|
|
free(binary_sizes); |
|
|
|
free(binary_sizes); |
|
|
|
|
|
|
|
|
|
|
|
/* We throw everything out now and create the real context we're using in initCQ */ |
|
|
|
|
|
|
|
clReleaseContext(clState->context); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", |
|
|
|
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d", |
|
|
|
filename, patchbfi ? "" : "out", cgpu->vwidth, cgpu->work_size); |
|
|
|
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* create a cl program executable for all the devices specified */ |
|
|
|
|
|
|
|
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Building Program (clBuildProgram)"); |
|
|
|
|
|
|
|
size_t logSize; |
|
|
|
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char *log = malloc(logSize); |
|
|
|
|
|
|
|
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL); |
|
|
|
|
|
|
|
applog(LOG_INFO, "%s", log); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return initCQ(clState, gpu); |
|
|
|
/* get a kernel object handle for a kernel with the given name */ |
|
|
|
|
|
|
|
clState->kernel = clCreateKernel(clState->program, "search", &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)"); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// Create an OpenCL command queue
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], |
|
|
|
|
|
|
|
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) /* Try again without OOE enable */ |
|
|
|
|
|
|
|
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)"); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status); |
|
|
|
|
|
|
|
if (status != CL_SUCCESS) { |
|
|
|
|
|
|
|
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)"); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return clState; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif /* HAVE_OPENCL */ |
|
|
|
#endif /* HAVE_OPENCL */ |
|
|
|
|
|
|
|
|
|
|
|