Browse Source

Update kernel with a shorter output path, and use 4k output buffer to match OS page sizes.

nfactor-troky
Con Kolivas 14 years ago
parent
commit
eea05c05b8
  1. 4
      findnonce.h
  2. 4
      ocl.c
  3. 50
      phatk110714.cl

4
findnonce.h

@ -4,8 +4,8 @@
#include "config.h" #include "config.h"
#define MAXTHREADS (0xFFFFFFFEULL) #define MAXTHREADS (0xFFFFFFFEULL)
/* Maximum worksize 512 * maximum vectors 4 plus one flag entry */ /* Maximum worksize 4k to match page size */
#define MAXBUFFERS (4 * 512) #define MAXBUFFERS (4095)
#define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1)) #define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL

4
ocl.c

@ -341,7 +341,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
char numbuf[10]; char numbuf[10];
char filename[15]; char filename[15];
if (clState->hasBitAlign) if (clState->hasBitAlign)
strcpy(filename, "phatk110711.cl"); strcpy(filename, "phatk110714.cl");
else else
strcpy(filename, "poclbm.cl"); strcpy(filename, "poclbm.cl");
FILE *binaryfile; FILE *binaryfile;
@ -370,7 +370,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
strcpy(binaryfilename, name); strcpy(binaryfilename, name);
if (clState->hasBitAlign) { if (clState->hasBitAlign) {
strcat(binaryfilename, "phatk110711"); strcat(binaryfilename, "phatk110714");
strcat(binaryfilename, "bitalign"); strcat(binaryfilename, "bitalign");
} else } else
strcat(binaryfilename, "poclbm"); strcat(binaryfilename, "poclbm");

50
phatk110711.cl → phatk110714.cl

@ -3,6 +3,7 @@
// 2011-07-11: further modified by Diapolo and still public-domain // 2011-07-11: further modified by Diapolo and still public-domain
// -ck version to be compatible with cgminer // -ck version to be compatible with cgminer
// 2011-07-14: shorter code
#define VECTORSX #define VECTORSX
@ -84,7 +85,6 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
{ {
u W[124]; u W[124];
u Vals[8]; u Vals[8];
uint it = get_local_id(0);
Vals[1] = B1; Vals[1] = B1;
Vals[2] = C1; Vals[2] = C1;
@ -380,70 +380,38 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
// Round 124 // Round 124
Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124); Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124);
#define MAXBUFFERS (4 * 512) #define MAXBUFFERS (4095)
#define NFLAG (0xFFFUL)
#if defined(VECTORS4) || defined(VECTORS2) #if defined(VECTORS4) || defined(VECTORS2)
if (Vals[7].x == -H[7]) if (Vals[7].x == -H[7])
{ {
// Unlikely event there is something here already ! output[W[3].x & NFLAG] = W[3].x;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].x;
output[MAXBUFFERS] = 1; output[MAXBUFFERS] = 1;
} }
if (Vals[7].y == -H[7]) if (Vals[7].y == -H[7])
{ {
it += 512; output[W[3].y & NFLAG] = W[3].y;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].y;
output[MAXBUFFERS] = 1; output[MAXBUFFERS] = 1;
} }
#ifdef VECTORS4 #ifdef VECTORS4
if (Vals[7].z == -H[7]) if (Vals[7].z == -H[7])
{ {
it += 1024; output[W[3].z & NFLAG] = W[3].z;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].z;
output[MAXBUFFERS] = 1; output[MAXBUFFERS] = 1;
} }
if (Vals[7].w == -H[7]) if (Vals[7].w == -H[7])
{ {
it += 1536; output[W[3].w & NFLAG] = W[3].w;
if (output[it]) {
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3].w;
output[MAXBUFFERS] = 1; output[MAXBUFFERS] = 1;
} }
#endif #endif
#else #else
if (Vals[7] == -H[7]) if (Vals[7] == -H[7])
{ {
if (output[it]) { output[W[3] & NFLAG] = W[3];
for (it = 0; it < MAXBUFFERS; it++) {
if (!output[it])
break;
}
}
output[it] = W[3];
output[MAXBUFFERS] = 1; output[MAXBUFFERS] = 1;
} }
#endif #endif
} }
Loading…
Cancel
Save