From eea05c05b811d836a28df4dd63b25d9e51371c42 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 15 Jul 2011 13:04:25 +1000 Subject: [PATCH] Update kernel with a shorter output path, and use 4k output buffer to match OS page sizes. --- findnonce.h | 4 +-- ocl.c | 4 +-- phatk110711.cl => phatk110714.cl | 50 ++++++-------------------------- 3 files changed, 13 insertions(+), 45 deletions(-) rename phatk110711.cl => phatk110714.cl (88%) diff --git a/findnonce.h b/findnonce.h index e526aab3..3ca7c8bd 100644 --- a/findnonce.h +++ b/findnonce.h @@ -4,8 +4,8 @@ #include "config.h" #define MAXTHREADS (0xFFFFFFFEULL) -/* Maximum worksize 512 * maximum vectors 4 plus one flag entry */ -#define MAXBUFFERS (4 * 512) +/* Maximum worksize 4k to match page size */ +#define MAXBUFFERS (4095) #define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1)) #ifdef HAVE_OPENCL diff --git a/ocl.c b/ocl.c index 9db9df81..b3ccaeaa 100644 --- a/ocl.c +++ b/ocl.c @@ -341,7 +341,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) char numbuf[10]; char filename[15]; if (clState->hasBitAlign) - strcpy(filename, "phatk110711.cl"); + strcpy(filename, "phatk110714.cl"); else strcpy(filename, "poclbm.cl"); FILE *binaryfile; @@ -370,7 +370,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) strcpy(binaryfilename, name); if (clState->hasBitAlign) { - strcat(binaryfilename, "phatk110711"); + strcat(binaryfilename, "phatk110714"); strcat(binaryfilename, "bitalign"); } else strcat(binaryfilename, "poclbm"); diff --git a/phatk110711.cl b/phatk110714.cl similarity index 88% rename from phatk110711.cl rename to phatk110714.cl index 14069c93..e6ba9f96 100644 --- a/phatk110711.cl +++ b/phatk110714.cl @@ -3,6 +3,7 @@ // 2011-07-11: further modified by Diapolo and still public-domain // -ck version to be compatible with cgminer +// 2011-07-14: shorter code #define VECTORSX @@ -84,7 +85,6 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c { u W[124]; u Vals[8]; - uint it = get_local_id(0); Vals[1] = B1; Vals[2] = C1; @@ -380,70 +380,38 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c // Round 124 Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124); -#define MAXBUFFERS (4 * 512) +#define MAXBUFFERS (4095) +#define NFLAG (0xFFFUL) #if defined(VECTORS4) || defined(VECTORS2) if (Vals[7].x == -H[7]) { - // Unlikely event there is something here already ! - if (output[it]) { - for (it = 0; it < MAXBUFFERS; it++) { - if (!output[it]) - break; - } - } - output[it] = W[3].x; + output[W[3].x & NFLAG] = W[3].x; output[MAXBUFFERS] = 1; } if (Vals[7].y == -H[7]) { - it += 512; - if (output[it]) { - for (it = 0; it < MAXBUFFERS; it++) { - if (!output[it]) - break; - } - } - output[it] = W[3].y; + output[W[3].y & NFLAG] = W[3].y; output[MAXBUFFERS] = 1; } #ifdef VECTORS4 if (Vals[7].z == -H[7]) { - it += 1024; - if (output[it]) { - for (it = 0; it < MAXBUFFERS; it++) { - if (!output[it]) - break; - } - } - output[it] = W[3].z; + output[W[3].z & NFLAG] = W[3].z; output[MAXBUFFERS] = 1; } if (Vals[7].w == -H[7]) { - it += 1536; - if (output[it]) { - for (it = 0; it < MAXBUFFERS; it++) { - if (!output[it]) - break; - } - } - output[it] = W[3].w; + output[W[3].w & NFLAG] = W[3].w; output[MAXBUFFERS] = 1; } #endif #else if (Vals[7] == -H[7]) { - if (output[it]) { - for (it = 0; it < MAXBUFFERS; it++) { - if (!output[it]) - break; - } - } - output[it] = W[3]; + output[W[3] & NFLAG] = W[3]; output[MAXBUFFERS] = 1; } #endif + } \ No newline at end of file