Browse Source

Merge branch 'diakgcn' of https://github.com/Diapolo/cgminer into diakgcn

nfactor-troky
Con Kolivas 13 years ago
parent
commit
c462ba5bc9
  1. 2
      device-gpu.c
  2. 2
      diakgcn120208.cl
  3. 10
      findnonce.c
  4. 13
      ocl.c
  5. 1
      ocl.h

2
device-gpu.c

@ -740,7 +740,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) @@ -740,7 +740,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
nonces = alloca(sizeof(uint) * vwidth);
for (i = 0; i < vwidth; i++)
nonces[i] = blk->nonce + i;
status |= clSetKernelArg(*kernel, num++, vwidth * sizeof(uint), (void *)nonces);
CL_SET_VARG(vwidth, nonces);
CL_SET_BLKARG(W16);
CL_SET_BLKARG(W17);

2
diakgcn120208.cl

@ -57,7 +57,7 @@ __kernel @@ -57,7 +57,7 @@ __kernel
const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
const uint state0A, const uint state0B,
__global int * output)
__global uint * output)
{
u W[17];
u V[8];

10
findnonce.c

@ -66,9 +66,6 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { @@ -66,9 +66,6 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->cty_a = A;
blk->cty_b = B;
blk->cty_c = C;
blk->C1addK5 = C + SHA256_K[5];
blk->cty_d = D;
blk->D1A = D + 0xb956c25b;
@ -93,12 +90,12 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { @@ -93,12 +90,12 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
blk->PreVal4 = blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
blk->PreVal4_2 = blk->PreVal4 + blk->T1;
blk->PreVal0 = blk->PreVal4 + state[0];
blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
blk->PreW32 = blk->W16 + ((rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3)));
blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
@ -117,6 +114,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { @@ -117,6 +114,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
blk->T1substate0 = blk->ctx_a - blk->T1;
blk->C1addK5 = blk->cty_c + SHA256_K[5];
blk->B1addK6 = blk->cty_b + SHA256_K[6];
blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
blk->W16addK16 = blk->W16 + SHA256_K[16];

13
ocl.c

@ -302,6 +302,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -302,6 +302,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
find = strstr(extensions, camo);
if (find)
clState->hasBitAlign = true;
/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
char * devoclver = malloc(1024);
const char * ocl10 = "OpenCL 1.0";
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION");
return NULL;
}
find = strstr(devoclver, ocl10);
if !(find)
clState->hasOpenCL11plus = true;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) {

1
ocl.h

@ -18,6 +18,7 @@ typedef struct { @@ -18,6 +18,7 @@ typedef struct {
cl_program program;
cl_mem outputBuffer;
bool hasBitAlign;
bool hasOpenCL11plus;
cl_uint preferred_vwidth;
size_t max_work_size;
size_t work_size;

Loading…
Cancel
Save