mirror of
https://github.com/GOSTSec/sgminer
synced 2025-02-02 01:44:23 +00:00
First working port of the diakgcn kernel.
This commit is contained in:
parent
a6c6866a0d
commit
53d33c753f
165
device-gpu.c
165
device-gpu.c
@ -673,39 +673,42 @@ void manage_gpu(void)
|
||||
#ifdef HAVE_OPENCL
|
||||
static _clState *clStates[MAX_GPUDEVICES];
|
||||
|
||||
#define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
|
||||
#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
|
||||
#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
|
||||
|
||||
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
cl_int status = 0;
|
||||
int num = 0;
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->nonce);
|
||||
CL_SET_BLKARG(ctx_a);
|
||||
CL_SET_BLKARG(ctx_b);
|
||||
CL_SET_BLKARG(ctx_c);
|
||||
CL_SET_BLKARG(ctx_d);
|
||||
CL_SET_BLKARG(ctx_e);
|
||||
CL_SET_BLKARG(ctx_f);
|
||||
CL_SET_BLKARG(ctx_g);
|
||||
CL_SET_BLKARG(ctx_h);
|
||||
CL_SET_BLKARG(cty_b);
|
||||
CL_SET_BLKARG(cty_c);
|
||||
CL_SET_BLKARG(cty_d);
|
||||
CL_SET_BLKARG(cty_f);
|
||||
CL_SET_BLKARG(cty_g);
|
||||
CL_SET_BLKARG(cty_h);
|
||||
CL_SET_BLKARG(nonce);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW0);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW1);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW2);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW3);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW15);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW01r);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e2);
|
||||
CL_SET_BLKARG(fW0);
|
||||
CL_SET_BLKARG(fW1);
|
||||
CL_SET_BLKARG(fW2);
|
||||
CL_SET_BLKARG(fW3);
|
||||
CL_SET_BLKARG(fW15);
|
||||
CL_SET_BLKARG(fW01r);
|
||||
CL_SET_BLKARG(fcty_e);
|
||||
CL_SET_BLKARG(fcty_e2);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
|
||||
(void *)&clState->outputBuffer);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -718,75 +721,87 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
|
||||
int i, num = 0;
|
||||
uint *nonces;
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
|
||||
CL_SET_BLKARG(ctx_a);
|
||||
CL_SET_BLKARG(ctx_b);
|
||||
CL_SET_BLKARG(ctx_c);
|
||||
CL_SET_BLKARG(ctx_d);
|
||||
CL_SET_BLKARG(ctx_e);
|
||||
CL_SET_BLKARG(ctx_f);
|
||||
CL_SET_BLKARG(ctx_g);
|
||||
CL_SET_BLKARG(ctx_h);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
|
||||
CL_SET_BLKARG(cty_b);
|
||||
CL_SET_BLKARG(cty_c);
|
||||
CL_SET_BLKARG(cty_d);
|
||||
CL_SET_BLKARG(cty_f);
|
||||
CL_SET_BLKARG(cty_g);
|
||||
CL_SET_BLKARG(cty_h);
|
||||
|
||||
nonces = alloca(sizeof(uint) * vwidth);
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + i;
|
||||
status |= clSetKernelArg(*kernel, num++, vwidth * sizeof(uint), (void *)nonces);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W16);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W17);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal4_2);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal0);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW18);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW19);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW31);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW32);
|
||||
CL_SET_BLKARG(W16);
|
||||
CL_SET_BLKARG(W17);
|
||||
CL_SET_BLKARG(PreVal4_2);
|
||||
CL_SET_BLKARG(PreVal0);
|
||||
CL_SET_BLKARG(PreW18);
|
||||
CL_SET_BLKARG(PreW19);
|
||||
CL_SET_BLKARG(PreW31);
|
||||
CL_SET_BLKARG(PreW32);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
|
||||
(void *)&clState->outputBuffer);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
|
||||
{
|
||||
cl_uint vwidth = clState->preferred_vwidth;
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
cl_int status = 0;
|
||||
int num = 0;
|
||||
int i, num = 0;
|
||||
uint *nonces;
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_a);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_e);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->nonce);
|
||||
nonces = alloca(sizeof(uint) * vwidth);
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + i;
|
||||
CL_SET_VARG(vwidth, nonces);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW0);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW1);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW2);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW3);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW15);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fW01r);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e);
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->fcty_e2);
|
||||
CL_SET_BLKARG(PreVal4);
|
||||
CL_SET_BLKARG(cty_h);
|
||||
CL_SET_BLKARG(cty_d);
|
||||
CL_SET_BLKARG(PreVal0);
|
||||
CL_SET_BLKARG(cty_b);
|
||||
CL_SET_BLKARG(cty_c);
|
||||
CL_SET_BLKARG(cty_f);
|
||||
CL_SET_BLKARG(cty_g);
|
||||
CL_SET_BLKARG(C1addK5);
|
||||
CL_SET_BLKARG(B1addK6);
|
||||
CL_SET_BLKARG(PreVal0addK7);
|
||||
CL_SET_BLKARG(W16addK16);
|
||||
CL_SET_BLKARG(W17addK17);
|
||||
CL_SET_BLKARG(PreW18);
|
||||
CL_SET_BLKARG(PreW19);
|
||||
CL_SET_BLKARG(W16);
|
||||
CL_SET_BLKARG(W17);
|
||||
CL_SET_BLKARG(PreW31);
|
||||
CL_SET_BLKARG(PreW32);
|
||||
|
||||
status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer),
|
||||
(void *)&clState->outputBuffer);
|
||||
CL_SET_BLKARG(ctx_a);
|
||||
CL_SET_BLKARG(ctx_b);
|
||||
CL_SET_BLKARG(ctx_c);
|
||||
CL_SET_BLKARG(ctx_d);
|
||||
CL_SET_BLKARG(ctx_e);
|
||||
CL_SET_BLKARG(ctx_f);
|
||||
CL_SET_BLKARG(ctx_g);
|
||||
CL_SET_BLKARG(ctx_h);
|
||||
|
||||
CL_SET_BLKARG(A0);
|
||||
CL_SET_BLKARG(B0);
|
||||
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -3,9 +3,7 @@
|
||||
// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
|
||||
// The kernel was rewritten by me (Diapolo) and is still public-domain!
|
||||
|
||||
#ifdef VECTORS8
|
||||
typedef uint8 u;
|
||||
#elif defined VECTORS4
|
||||
#if defined VECTORS4
|
||||
typedef uint4 u;
|
||||
#elif defined VECTORS2
|
||||
typedef uint2 u;
|
||||
@ -31,9 +29,7 @@
|
||||
#ifdef GOFFSET
|
||||
typedef uint uu;
|
||||
#else
|
||||
#ifdef VECTORS8
|
||||
typedef uint8 uu;
|
||||
#elif defined VECTORS4
|
||||
#if defined VECTORS4
|
||||
typedef uint4 uu;
|
||||
#elif defined VECTORS2
|
||||
typedef uint2 uu;
|
||||
@ -67,29 +63,23 @@ __kernel
|
||||
u W[17];
|
||||
u V[8];
|
||||
|
||||
#ifdef VECTORS8
|
||||
#if defined VECTORS4
|
||||
#ifdef GOFFSET
|
||||
u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
u nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
|
||||
#else
|
||||
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + ((uint)get_local_id(0) * 8U) + base;
|
||||
#endif
|
||||
#elif defined VECTORS4
|
||||
#ifdef GOFFSET
|
||||
u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
|
||||
#else
|
||||
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + ((uint)get_local_id(0) * 4U) + base;
|
||||
u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
||||
#endif
|
||||
#elif defined VECTORS2
|
||||
#ifdef GOFFSET
|
||||
u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
|
||||
u nonce = base + (get_global_id(0)<<1) + (uint2)(0, 1);
|
||||
#else
|
||||
u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) * 2U) + ((uint)get_local_id(0) * 2U) + base;
|
||||
u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
||||
#endif
|
||||
#else
|
||||
#ifdef GOFFSET
|
||||
u nonce = (uint)get_global_id(0);
|
||||
u nonce = base + get_global_id(0);
|
||||
#else
|
||||
u nonce = ((uint)get_group_id(0) * (uint)WORKSIZExVECSIZE) + (uint)get_local_id(0) + base;
|
||||
u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -589,22 +579,40 @@ __kernel
|
||||
|
||||
V[7] += V[3] + W[10] + ch(124) + rot26(V[0]);
|
||||
|
||||
#ifdef VECTORS8
|
||||
u result = (u)(((V[7].s0 == 0x136032ed) * nonce.s0), ((V[7].s1 == 0x136032ed) * nonce.s1), ((V[7].s2 == 0x136032ed) * nonce.s2), ((V[7].s3 == 0x136032ed) * nonce.s3),
|
||||
((V[7].s4 == 0x136032ed) * nonce.s4), ((V[7].s5 == 0x136032ed) * nonce.s5), ((V[7].s6 == 0x136032ed) * nonce.s6), ((V[7].s7 == 0x136032ed) * nonce.s7));
|
||||
output[0 + (upsample(result.s0, result.s1) > 0)] = upsample(result.s0, result.s1);
|
||||
output[2 + (upsample(result.s2, result.s3) > 1)] = upsample(result.s2, result.s3);
|
||||
output[4 + (upsample(result.s4, result.s5) > 0)] = upsample(result.s4, result.s5);
|
||||
output[6 + (upsample(result.s6, result.s7) > 1)] = upsample(result.s6, result.s7);
|
||||
#elif defined VECTORS4
|
||||
u result = (u)(((V[7].x == 0x136032ed) * nonce.x), ((V[7].y == 0x136032ed) * nonce.y), ((V[7].z == 0x136032ed) * nonce.z), ((V[7].w == 0x136032ed) * nonce.w));
|
||||
output[0 + (upsample(result.x, result.y) > 0)] = upsample(result.x, result.y);
|
||||
output[2 + (upsample(result.z, result.w) > 1)] = upsample(result.z, result.w);
|
||||
#elif defined VECTORS2
|
||||
u result = (u)(((V[7].x == 0x136032ed) * nonce.x), ((V[7].y == 0x136032ed) * nonce.y));
|
||||
output[upsample(result.x, result.y) > 0] = upsample(result.x, result.y);
|
||||
|
||||
#define FOUND (0x80)
|
||||
#define NFLAG (0x7F)
|
||||
|
||||
#ifdef VECTORS4
|
||||
V[7] ^= 0x136032ed;
|
||||
|
||||
bool result = V[7].x & V[7].y & V[7].z & V[7].w;
|
||||
|
||||
if (!result) {
|
||||
if (!V[7].x)
|
||||
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
|
||||
if (!V[7].y)
|
||||
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
|
||||
if (!V[7].z)
|
||||
output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
|
||||
if (!V[7].w)
|
||||
output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
|
||||
}
|
||||
#else
|
||||
u result = (V[7] == 0x136032ed) * nonce;
|
||||
output[result != 0] = result;
|
||||
#ifdef VECTORS2
|
||||
V[7] ^= 0x136032ed;
|
||||
|
||||
bool result = V[7].x & V[7].y;
|
||||
|
||||
if (!result) {
|
||||
if (!V[7].x)
|
||||
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
|
||||
if (!V[7].y)
|
||||
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
|
||||
}
|
||||
#else
|
||||
if (V[7] == 0x136032ed)
|
||||
output[FOUND] = output[NFLAG & W[3]] = W[3];
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
10
findnonce.c
10
findnonce.c
@ -115,7 +115,15 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
|
||||
|
||||
|
||||
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
|
||||
blk->T1substate0 = state[0] - blk->T1;
|
||||
blk->T1substate0 = blk->ctx_a - blk->T1;
|
||||
|
||||
blk->B1addK6 = blk->cty_b + 0x923f82a4;
|
||||
blk->PreVal0addK7 = blk->PreVal0 + 0xab1c5ed5;
|
||||
blk->W16addK16 = blk->W16 + 0xe49b69c1;
|
||||
blk->W17addK17 = blk->W17 + 0xefbe4786;
|
||||
|
||||
blk->A0 = blk->ctx_a + 0x98c7e2a2;
|
||||
blk->B0 = blk->ctx_a + 0xfc08884d;
|
||||
}
|
||||
|
||||
#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
|
||||
|
Loading…
x
Reference in New Issue
Block a user