diff --git a/findnonce.c b/findnonce.c index 660971c1..229b3e6d 100644 --- a/findnonce.c +++ b/findnonce.c @@ -55,7 +55,7 @@ inline uint32_t ByteReverse(uint32_t value) void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { cl_uint A, B, C, D, E, F, G, H; - + A = state[0]; B = state[1]; C = state[2]; @@ -96,9 +96,19 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { blk->merkle = data[0]; blk->ntime = data[1]; blk->nbits = data[2]; - + blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3)); blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000; + blk->PreVal4 = blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5; + blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + blk->PreVal4_2 = blk->PreVal4 + blk->T1; + blk->PreVal0 = blk->PreVal4 + state[0]; + blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3)); + blk->PreW32 = blk->W16 + ((rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3))); + blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10)); + blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10)); + + blk->W2 = data[2]; blk->W2A = blk->W2 + (rotr(blk->W16, 19) ^ rotr(blk->W16, 17) ^ (blk->W16 >> 10)); @@ -109,9 +119,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3)); blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3)); - blk->PreVal4 = blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5; - blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); - + blk->PreVal4addT1 = blk->PreVal4 + blk->T1; blk->T1substate0 = state[0] - blk->T1; } diff --git a/main.c b/main.c index 8f2a3aba..7f1165ac 100644 --- a/main.c +++ b/main.c @@ -3411,7 +3411,7 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk) static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) { - cl_kernel *kernel = &clState->kernel; + cl_kernel *kernel = &clState->kernel; cl_int status = 0; int num = 0; @@ -3423,21 +3423,27 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_f); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_g); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->ctx_h); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_b); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_c); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->C1addK5); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->D1A); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_d); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_f); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_g); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->cty_h); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->nonce); + + uint nonces[2]; + nonces[0] = blk->nonce; + nonces[1] = (blk->nonce)+1; + status |= clSetKernelArg(*kernel, num++, 2 * sizeof(uint), (void *)nonces); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W2A); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W16); status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W17); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->W17_2); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal4addT1); - status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->T1substate0); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal4_2); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreVal0); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW18); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW19); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW31); + status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->PreW32); status |= clSetKernelArg(*kernel, num++, sizeof(clState->outputBuffer), (void *)&clState->outputBuffer); diff --git a/miner.h b/miner.h index 402ef9f2..910693ce 100644 --- a/miner.h +++ b/miner.h @@ -288,6 +288,12 @@ typedef struct { cl_uint PreVal4; cl_uint T1; cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2; cl_uint PreVal4addT1; cl_uint T1substate0; + cl_uint PreVal4_2; + cl_uint PreVal0; + cl_uint PreW18; + cl_uint PreW19; + cl_uint PreW31; + cl_uint PreW32; } dev_blk_ctx; #else typedef struct { diff --git a/ocl.c b/ocl.c index 45f70955..15c2517b 100644 --- a/ocl.c +++ b/ocl.c @@ -355,8 +355,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) break; case KL_NONE: /* Shouldn't happen */ case KL_PHATK: - strcpy(filename, "phatk110722.cl"); - strcpy(binaryfilename, "phatk110722"); + strcpy(filename, "phatk2_2.cl"); + strcpy(binaryfilename, "phatk2_2"); break; } @@ -501,7 +501,14 @@ build: } /* create a cl program executable for all the devices specified */ - status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); + char CompilerOptions[256]; + sprintf(CompilerOptions, "%s%i", "-DWORKSIZE=", clState->work_size); + //int n = 1000; + //while(n--) + // printf("%s", CompilerOptions); + //return 1; + status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL); + if (status != CL_SUCCESS) { applog(LOG_ERR, "Error: Building Program (clBuildProgram)");