Browse Source

Make the optimisations per-gpu card and update code to work properly with multiple cards.

nfactor-troky
ckolivas 14 years ago committed by Con Kolivas
parent
commit
26546ad5b9
  1. 12
      cpu-miner.c
  2. 50
      ocl.c
  3. 6
      ocl.h

12
cpu-miner.c

@ -825,12 +825,12 @@ static void *gpuminer_thread(void *userdata)
struct work *work = malloc(sizeof(struct work)); struct work *work = malloc(sizeof(struct work));
bool need_work = true; bool need_work = true;
unsigned int threads = 1 << (15 + scan_intensity); unsigned int threads = 1 << (15 + scan_intensity);
unsigned int vectors = preferred_vwidth; unsigned int vectors = clState->preferred_vwidth;
unsigned int hashes_done = threads * vectors; unsigned int hashes_done = threads * vectors;
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
globalThreads[0] = threads; globalThreads[0] = threads;
localThreads[0] = max_work_size / vectors; localThreads[0] = clState->max_work_size / vectors;
while (1) { while (1) {
struct timeval tv_end, diff; struct timeval tv_end, diff;
@ -1165,7 +1165,7 @@ static void parse_cmdline(int argc, char *argv[])
int main (int argc, char *argv[]) int main (int argc, char *argv[])
{ {
struct thr_info *thr; struct thr_info *thr;
int i; unsigned int i;
char name[32]; char name[32];
#ifdef WIN32 #ifdef WIN32
@ -1177,7 +1177,7 @@ int main (int argc, char *argv[])
nDevs = clDevicesNum(); nDevs = clDevicesNum();
if (opt_ndevs) { if (opt_ndevs) {
printf("%i\n", nDevs); applog(LOG_INFO, "%i", nDevs);
return nDevs; return nDevs;
} }
@ -1258,13 +1258,13 @@ int main (int argc, char *argv[])
if (!thr->q) if (!thr->q)
return 1; return 1;
printf("Init GPU %i\n", i); applog(LOG_INFO, "Init GPU %i", i);
clStates[i] = initCl(i, name, sizeof(name)); clStates[i] = initCl(i, name, sizeof(name));
if (!clStates[i]) { if (!clStates[i]) {
applog(LOG_ERR, "Failed to init GPU %d", i); applog(LOG_ERR, "Failed to init GPU %d", i);
continue; continue;
} }
printf("initCl() finished. Found %s\n", name); applog(LOG_INFO, "initCl() finished. Found %s", name);
if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) { if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) {
applog(LOG_ERR, "thread %d create failed", i); applog(LOG_ERR, "thread %d create failed", i);

50
ocl.c

@ -14,16 +14,13 @@
#include "findnonce.h" #include "findnonce.h"
#include "ocl.h" #include "ocl.h"
cl_uint preferred_vwidth = 1;
size_t max_work_size;
char *file_contents(const char *filename, int *length) char *file_contents(const char *filename, int *length)
{ {
FILE *f = fopen(filename, "r"); FILE *f = fopen(filename, "r");
void *buffer; void *buffer;
if (!f) { if (!f) {
fprintf(stderr, "Unable to open %s for reading\n", filename); applog(LOG_ERR, "Unable to open %s for reading", filename);
return NULL; return NULL;
} }
@ -102,7 +99,7 @@ void advance(char **area, unsigned *remaining, const char *marker)
char *find = memmem(*area, *remaining, marker, strlen(marker)); char *find = memmem(*area, *remaining, marker, strlen(marker));
if (!find) if (!find)
fprintf(stderr, "Marker \"%s\" not found\n", marker), exit(1); applog(LOG_ERR, "Marker \"%s\" not found", marker), exit(1);
*remaining -= find - *area; *remaining -= find - *area;
*area = find; *area = find;
} }
@ -155,7 +152,7 @@ void patch_opcodes(char *w, unsigned remaining)
} }
} }
_clState *initCl(int gpu, char *name, size_t nameSize) _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{ {
bool hasBitAlign = false; bool hasBitAlign = false;
cl_int status = 0; cl_int status = 0;
@ -227,7 +224,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
return NULL; return NULL;
} }
printf("List of devices:\n"); applog(LOG_INFO, "List of devices:");
unsigned int i; unsigned int i;
for(i=0; i<numDevices; i++) { for(i=0; i<numDevices; i++) {
@ -239,10 +236,10 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
return NULL; return NULL;
} }
printf("\t%i\t%s\n", i, pbuff); applog(LOG_INFO, "\t%i\t%s", i, pbuff);
} }
if (gpu >= 0 && gpu < numDevices) { if (gpu < numDevices) {
char pbuff[100]; char pbuff[100];
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if(status != CL_SUCCESS) if(status != CL_SUCCESS)
@ -251,7 +248,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
return NULL; return NULL;
} }
printf("Selected %i: %s\n", gpu, pbuff); applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
strncpy(name, pbuff, nameSize); strncpy(name, pbuff, nameSize);
} else { } else {
printf("Invalid GPU %i\n", gpu); printf("Invalid GPU %i\n", gpu);
@ -272,14 +269,10 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
/* Check for BFI INT support. Hopefully people don't mix devices with /* Check for BFI INT support. Hopefully people don't mix devices with
* and without it! */ * and without it! */
char * extensions = malloc(1024); char * extensions = malloc(1024);
/* This needs to create separate programs for each GPU, but for now
* assume they all have the same capabilities D: */
for (i = 0; i < numDevices; i++) {
const char * camo = "cl_amd_media_ops"; const char * camo = "cl_amd_media_ops";
char *find; char *find;
status = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS"); applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS");
return NULL; return NULL;
@ -288,20 +281,20 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
if (find) if (find)
hasBitAlign = true; hasBitAlign = true;
status = clGetDeviceInfo(devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT");
return NULL; return NULL;
} }
applog(LOG_INFO, "Preferred vector width reported %d", preferred_vwidth); applog(LOG_INFO, "Preferred vector width reported %d", clState->preferred_vwidth);
status = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&max_work_size, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE"); applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE");
return NULL; return NULL;
} }
applog(LOG_INFO, "Max work group size reported %d", max_work_size); if (opt_debug)
} applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Load CL file, build CL program object, create CL kernel object // Load CL file, build CL program object, create CL kernel object
@ -316,7 +309,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
size_t sourceSize[] = {(size_t)pl}; size_t sourceSize[] = {(size_t)pl};
/* Patch the source file with the preferred_vwidth */ /* Patch the source file with the preferred_vwidth */
if (preferred_vwidth > 1) { if (clState->preferred_vwidth > 1) {
char *find = strstr(source, "VECTORSX"); char *find = strstr(source, "VECTORSX");
if (unlikely(!find)) { if (unlikely(!find)) {
@ -324,11 +317,12 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
return NULL; return NULL;
} }
find += 7; // "VECTORS" find += 7; // "VECTORS"
if (preferred_vwidth == 2) if (clState->preferred_vwidth == 2)
strncpy(find, "2", 1); strncpy(find, "2", 1);
else else
strncpy(find, "4", 1); strncpy(find, "4", 1);
applog(LOG_INFO, "Patched source to suit %d vectors", preferred_vwidth); if (opt_debug)
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth);
} }
/* Patch the source file defining BFI_INT */ /* Patch the source file defining BFI_INT */
@ -341,9 +335,13 @@ _clState *initCl(int gpu, char *name, size_t nameSize)
} }
find += 7; // "BFI_INT" find += 7; // "BFI_INT"
strncpy(find, " ", 1); strncpy(find, " ", 1);
applog(LOG_INFO, "cl_amd_media_ops found, patched source with BFI_INT"); if (opt_debug)
} else applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BFI_INT");
applog(LOG_INFO, "cl_amd_media_ops not found, will not BFI_INT patch"); } else if (opt_debug)
applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");
applog(LOG_INFO, "Initialising kernel with%s BFI_INT patching, %d vectors and %d worksize",
hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->max_work_size);
clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status); clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
if(status != CL_SUCCESS) if(status != CL_SUCCESS)

6
ocl.h

@ -12,12 +12,12 @@ typedef struct {
cl_command_queue commandQueue; cl_command_queue commandQueue;
cl_program program; cl_program program;
cl_mem outputBuffer; cl_mem outputBuffer;
cl_uint preferred_vwidth;
size_t max_work_size;
} _clState; } _clState;
extern char *file_contents(const char *filename, int *length); extern char *file_contents(const char *filename, int *length);
extern int clDevicesNum(); extern int clDevicesNum();
extern _clState *initCl(int gpu, char *name, size_t nameSize); extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
extern cl_uint preferred_vwidth;
extern size_t max_work_size;
#endif /* __OCL_H__ */ #endif /* __OCL_H__ */

Loading…
Cancel
Save