Browse Source

Update kernel with a shorter output path, and use 4k output buffer to match OS page sizes.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
eea05c05b8
  1. 4
      findnonce.h
  2. 4
      ocl.c
  3. 50
      phatk110714.cl

4
findnonce.h

@ -4,8 +4,8 @@ @@ -4,8 +4,8 @@
#include "config.h"
#define MAXTHREADS (0xFFFFFFFEULL)
/* Maximum worksize 512 * maximum vectors 4 plus one flag entry */
#define MAXBUFFERS (4 * 512)
/* Maximum worksize 4k to match page size */
#define MAXBUFFERS (4095)
#define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
#ifdef HAVE_OPENCL

4
ocl.c

@ -341,7 +341,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -341,7 +341,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
char numbuf[10];
char filename[15];
if (clState->hasBitAlign)
strcpy(filename, "phatk110711.cl");
strcpy(filename, "phatk110714.cl");
else
strcpy(filename, "poclbm.cl");
FILE *binaryfile;
@ -370,7 +370,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -370,7 +370,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
strcpy(binaryfilename, name);
if (clState->hasBitAlign) {
strcat(binaryfilename, "phatk110711");
strcat(binaryfilename, "phatk110714");
strcat(binaryfilename, "bitalign");
} else
strcat(binaryfilename, "poclbm");

50
phatk110711.cl → phatk110714.cl

@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
// 2011-07-11: further modified by Diapolo and still public-domain
// -ck version to be compatible with cgminer
// 2011-07-14: shorter code
#define VECTORSX
@ -84,7 +85,6 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c @@ -84,7 +85,6 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
{
u W[124];
u Vals[8];
uint it = get_local_id(0);
Vals[1] = B1;
Vals[2] = C1;
@ -380,70 +380,38 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c @@ -380,70 +380,38 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
// Round 124
Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124);
#define MAXBUFFERS (4 * 512)
#define MAXBUFFERS (4095)
#define NFLAG (0xFFFUL)
#if defined(VECTORS4) || defined(VECTORS2)
if (Vals[7].x == -H[7])
{
// Unlikely event there is something here already !
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].x;
output[W[3].x & NFLAG] = W[3].x;
output[MAXBUFFERS] = 1;
}
if (Vals[7].y == -H[7])
{
it += 512;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].y;
output[W[3].y & NFLAG] = W[3].y;
output[MAXBUFFERS] = 1;
}
#ifdef VECTORS4
if (Vals[7].z == -H[7])
{
it += 1024;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].z;
output[W[3].z & NFLAG] = W[3].z;
output[MAXBUFFERS] = 1;
}
if (Vals[7].w == -H[7])
{
it += 1536;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].w;
output[W[3].w & NFLAG] = W[3].w;
output[MAXBUFFERS] = 1;
}
#endif
#else
if (Vals[7] == -H[7])
{
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3];
output[W[3] & NFLAG] = W[3];
output[MAXBUFFERS] = 1;
}
#endif
}
Loading…
Cancel
Save