Browse Source

Use 256 output slots for kernels to allow 1 for each worksize.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
1b5c676de7
  1. 4
      diablo120328.cl
  2. 4
      diakgcn120427.cl
  3. 6
      findnonce.h
  4. 4
      phatk120223.cl
  5. 4
      poclbm120327.cl
  6. 4
      scrypt120713.cl

4
diablo120328.cl

@ -1242,8 +1242,8 @@ void search(
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]); ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
#define FOUND (0x80) #define FOUND (0x800)
#define NFLAG (0x7F) #define NFLAG (0x7FF)
#if defined(VECTORS4) #if defined(VECTORS4)
bool result = any(ZA[924] == 0x136032EDU); bool result = any(ZA[924] == 0x136032EDU);

4
diakgcn120427.cl

@ -571,8 +571,8 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
#define FOUND (0x80) #define FOUND (0x800)
#define NFLAG (0x7F) #define NFLAG (0x7FF)
#ifdef VECTORS4 #ifdef VECTORS4
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))

6
findnonce.h

@ -4,10 +4,10 @@
#include "config.h" #include "config.h"
#define MAXTHREADS (0xFFFFFFFEULL) #define MAXTHREADS (0xFFFFFFFEULL)
#define MAXBUFFERS (0xFF) #define MAXBUFFERS (0xFFF)
#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS) #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
#define FOUND (0x80) #define FOUND (0x800)
/* #define NFLAG (0x7F) Just for reference */ /* #define NFLAG (0x7FF) Just for reference */
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);

4
phatk120223.cl

@ -387,8 +387,8 @@ void search( const uint state0, const uint state1, const uint state2, const uint
W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) - W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
#define FOUND (0x80) #define FOUND (0x800)
#define NFLAG (0x7F) #define NFLAG (0x7FF)
#ifdef VECTORS4 #ifdef VECTORS4
bool result = W[117].x & W[117].y & W[117].z & W[117].w; bool result = W[117].x & W[117].y & W[117].z & W[117].w;

4
poclbm120327.cl

@ -1311,8 +1311,8 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
Vals[1]+=K[59]; Vals[1]+=K[59];
Vals[1]+=Vals[5]; Vals[1]+=Vals[5];
#define FOUND (0x80) #define FOUND (0x800)
#define NFLAG (0x7F) #define NFLAG (0x7FF)
#if defined(VECTORS2) || defined(VECTORS4) #if defined(VECTORS2) || defined(VECTORS4)
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);

4
scrypt120713.cl

@ -685,8 +685,8 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
unshittify(X); unshittify(X);
} }
#define FOUND (0x80) #define FOUND (0x800)
#define NFLAG (0x7F) #define NFLAG (0x7FF)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 * restrict input, __kernel void search(__global const uint4 * restrict input,

Loading…
Cancel
Save