Browse Source

First working port of the diakgcn kernel.

nfactor-troky
ckolivas 12 years ago committed by Con Kolivas
parent
commit
033913ca8e
  1. 175
      device-gpu.c
  2. 78
      diakgcn120208.cl
  3. 10
      findnonce.c
  4. 4
      miner.h

175
device-gpu.c

@ -673,39 +673,42 @@ void manage_gpu(void) @@ -673,39 +673,42 @@ void manage_gpu(void)
#ifdef HAVE_OPENCL
static _clState *clStates[MAX_GPUDEVICES];
#define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
{
cl_kernel *kernel = &clState->kernel;
cl_int status = 0;
int num = 0;
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->nonce);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW0);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW1);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW2);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW3);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW15);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW01r);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e2);
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
(void *)&clState->outputBuffer);
CL_SET_BLKARG(ctx_a);
CL_SET_BLKARG(ctx_b);
CL_SET_BLKARG(ctx_c);
CL_SET_BLKARG(ctx_d);
CL_SET_BLKARG(ctx_e);
CL_SET_BLKARG(ctx_f);
CL_SET_BLKARG(ctx_g);
CL_SET_BLKARG(ctx_h);
CL_SET_BLKARG(cty_b);
CL_SET_BLKARG(cty_c);
CL_SET_BLKARG(cty_d);
CL_SET_BLKARG(cty_f);
CL_SET_BLKARG(cty_g);
CL_SET_BLKARG(cty_h);
CL_SET_BLKARG(nonce);
CL_SET_BLKARG(fW0);
CL_SET_BLKARG(fW1);
CL_SET_BLKARG(fW2);
CL_SET_BLKARG(fW3);
CL_SET_BLKARG(fW15);
CL_SET_BLKARG(fW01r);
CL_SET_BLKARG(fcty_e);
CL_SET_BLKARG(fcty_e2);
CL_SET_ARG(clState->outputBuffer);
return status;
}
@ -718,75 +721,87 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) @@ -718,75 +721,87 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
int i, num = 0;
uint *nonces;
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
CL_SET_BLKARG(ctx_a);
CL_SET_BLKARG(ctx_b);
CL_SET_BLKARG(ctx_c);
CL_SET_BLKARG(ctx_d);
CL_SET_BLKARG(ctx_e);
CL_SET_BLKARG(ctx_f);
CL_SET_BLKARG(ctx_g);
CL_SET_BLKARG(ctx_h);
CL_SET_BLKARG(cty_b);
CL_SET_BLKARG(cty_c);
CL_SET_BLKARG(cty_d);
CL_SET_BLKARG(cty_f);
CL_SET_BLKARG(cty_g);
CL_SET_BLKARG(cty_h);
nonces = alloca(sizeof(uint) * vwidth);
for (i = 0; i < vwidth; i++)
nonces[i] = blk->nonce + i;
status |= clSetKernelArg(*kernel, num++, vwidth * sizeof(uint), (void *)nonces);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W16);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W17);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal4_2);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal0);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW18);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW19);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW31);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW32);
CL_SET_BLKARG(W16);
CL_SET_BLKARG(W17);
CL_SET_BLKARG(PreVal4_2);
CL_SET_BLKARG(PreVal0);
CL_SET_BLKARG(PreW18);
CL_SET_BLKARG(PreW19);
CL_SET_BLKARG(PreW31);
CL_SET_BLKARG(PreW32);
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
(void *)&clState->outputBuffer);
CL_SET_ARG(clState->outputBuffer);
return status;
}
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
{
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel;
cl_int status = 0;
int num = 0;
int i, num = 0;
uint *nonces;
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->nonce);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW0);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW1);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW2);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW3);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW15);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW01r);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e);
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e2);
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
(void *)&clState->outputBuffer);
nonces = alloca(sizeof(uint) * vwidth);
for (i = 0; i < vwidth; i++)
nonces[i] = blk->nonce + i;
CL_SET_VARG(vwidth, nonces);
CL_SET_BLKARG(PreVal4);
CL_SET_BLKARG(cty_h);
CL_SET_BLKARG(cty_d);
CL_SET_BLKARG(PreVal0);
CL_SET_BLKARG(cty_b);
CL_SET_BLKARG(cty_c);
CL_SET_BLKARG(cty_f);
CL_SET_BLKARG(cty_g);
CL_SET_BLKARG(C1addK5);
CL_SET_BLKARG(B1addK6);
CL_SET_BLKARG(PreVal0addK7);
CL_SET_BLKARG(W16addK16);
CL_SET_BLKARG(W17addK17);
CL_SET_BLKARG(PreW18);
CL_SET_BLKARG(PreW19);
CL_SET_BLKARG(W16);
CL_SET_BLKARG(W17);
CL_SET_BLKARG(PreW31);
CL_SET_BLKARG(PreW32);
CL_SET_BLKARG(ctx_a);
CL_SET_BLKARG(ctx_b);
CL_SET_BLKARG(ctx_c);
CL_SET_BLKARG(ctx_d);
CL_SET_BLKARG(ctx_e);
CL_SET_BLKARG(ctx_f);
CL_SET_BLKARG(ctx_g);
CL_SET_BLKARG(ctx_h);
CL_SET_BLKARG(A0);
CL_SET_BLKARG(B0);
CL_SET_ARG(clState->outputBuffer);
return status;
}

78
diakgcn120208.cl

@ -3,9 +3,7 @@ @@ -3,9 +3,7 @@
// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
// The kernel was rewritten by me (Diapolo) and is still public-domain!
#ifdef VECTORS8
typedef uint8 u;
#elif defined VECTORS4
#if defined VECTORS4
typedef uint4 u;
#elif defined VECTORS2
typedef uint2 u;
@ -31,9 +29,7 @@ @@ -31,9 +29,7 @@
#ifdef GOFFSET
typedef uint uu;
#else
#ifdef VECTORS8
typedef uint8 uu;
#elif defined VECTORS4
#if defined VECTORS4
typedef uint4 uu;
#elif defined VECTORS2
typedef uint2 uu;
@ -67,29 +63,23 @@ __kernel @@ -67,29 +63,23 @@ __kernel
u W[17];
u V[8];
#ifdef VECTORS8
#if defined VECTORS4
#ifdef GOFFSET
u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
u nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
#else
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + ((uint)get_local_id(0) * 8U) + base;
#endif
#elif defined VECTORS4
#ifdef GOFFSET
u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
#else
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + ((uint)get_local_id(0) * 4U) + base;
u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
#endif
#elif defined VECTORS2
#ifdef GOFFSET
u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
u nonce = base + (get_global_id(0)<<1) + (uint2)(0, 1);
#else
u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) * 2U) + ((uint)get_local_id(0) * 2U) + base;
u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
#endif
#else
#ifdef GOFFSET
u nonce = (uint)get_global_id(0);
u nonce = base + get_global_id(0);
#else
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + (uint)get_local_id(0) + base;
u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
#endif
#endif
@ -589,22 +579,40 @@ __kernel @@ -589,22 +579,40 @@ __kernel
V[7] += V[3] + W[10] + ch(124) + rot26(V[0]);
#ifdef VECTORS8
u result = (u)(((V[7].s0 == 0x136032ed) * nonce.s0), ((V[7].s1 == 0x136032ed) * nonce.s1), ((V[7].s2 == 0x136032ed) * nonce.s2), ((V[7].s3 == 0x136032ed) * nonce.s3),
((V[7].s4 == 0x136032ed) * nonce.s4), ((V[7].s5 == 0x136032ed) * nonce.s5), ((V[7].s6 == 0x136032ed) * nonce.s6), ((V[7].s7 == 0x136032ed) * nonce.s7));
output[0 + (upsample(result.s0, result.s1) > 0)] = upsample(result.s0, result.s1);
output[2 + (upsample(result.s2, result.s3) > 1)] = upsample(result.s2, result.s3);
output[4 + (upsample(result.s4, result.s5) > 0)] = upsample(result.s4, result.s5);
output[6 + (upsample(result.s6, result.s7) > 1)] = upsample(result.s6, result.s7);
#elif defined VECTORS4
u result = (u)(((V[7].x == 0x136032ed) * nonce.x), ((V[7].y == 0x136032ed) * nonce.y), ((V[7].z == 0x136032ed) * nonce.z), ((V[7].w == 0x136032ed) * nonce.w));
output[0 + (upsample(result.x, result.y) > 0)] = upsample(result.x, result.y);
output[2 + (upsample(result.z, result.w) > 1)] = upsample(result.z, result.w);
#elif defined VECTORS2
u result = (u)(((V[7].x == 0x136032ed) * nonce.x), ((V[7].y == 0x136032ed) * nonce.y));
output[upsample(result.x, result.y) > 0] = upsample(result.x, result.y);
#define FOUND (0x80)
#define NFLAG (0x7F)
#ifdef VECTORS4
V[7] ^= 0x136032ed;
bool result = V[7].x & V[7].y & V[7].z & V[7].w;
if (!result) {
if (!V[7].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
if (!V[7].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
if (!V[7].z)
output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
if (!V[7].w)
output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
}
#else
u result = (V[7] == 0x136032ed) * nonce;
output[result != 0] = result;
#ifdef VECTORS2
V[7] ^= 0x136032ed;
bool result = V[7].x & V[7].y;
if (!result) {
if (!V[7].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
if (!V[7].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
}
#else
if (V[7] == 0x136032ed)
output[FOUND] = output[NFLAG & W[3]] = W[3];
#endif
#endif
}

10
findnonce.c

@ -115,7 +115,15 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { @@ -115,7 +115,15 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
blk->T1substate0 = state[0] - blk->T1;
blk->T1substate0 = blk->ctx_a - blk->T1;
blk->B1addK6 = blk->cty_b + 0x923f82a4;
blk->PreVal0addK7 = blk->PreVal0 + 0xab1c5ed5;
blk->W16addK16 = blk->W16 + 0xe49b69c1;
blk->W17addK17 = blk->W17 + 0xefbe4786;
blk->A0 = blk->ctx_a + 0x98c7e2a2;
blk->B0 = blk->ctx_a + 0xfc08884d;
}
#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))

4
miner.h

@ -610,6 +610,10 @@ typedef struct { @@ -610,6 +610,10 @@ typedef struct {
cl_uint PreW19;
cl_uint PreW31;
cl_uint PreW32;
/* For diakgcn */
cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
cl_uint A0, B0;
} dev_blk_ctx;
#else
typedef struct {

Loading…
Cancel
Save