mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-15 00:59:59 +00:00
Move phatk kernel to offset vector based nonce bases as well.
This commit is contained in:
parent
5c0214dcc1
commit
71cbe8cd72
@ -819,7 +819,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
|
|||||||
|
|
||||||
nonces = alloca(sizeof(uint) * vwidth);
|
nonces = alloca(sizeof(uint) * vwidth);
|
||||||
for (i = 0; i < vwidth; i++)
|
for (i = 0; i < vwidth; i++)
|
||||||
nonces[i] = blk->nonce + i;
|
nonces[i] = blk->nonce + (i * threads);
|
||||||
CL_SET_VARG(vwidth, nonces);
|
CL_SET_VARG(vwidth, nonces);
|
||||||
|
|
||||||
CL_SET_BLKARG(W16);
|
CL_SET_BLKARG(W16);
|
||||||
|
@ -168,8 +168,8 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
|||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
u W[124];
|
u W[132];
|
||||||
u Vals[8];
|
u *Vals=&W[124];
|
||||||
|
|
||||||
//Dummy Variable to prevent compiler from reordering between rounds
|
//Dummy Variable to prevent compiler from reordering between rounds
|
||||||
u t1;
|
u t1;
|
||||||
@ -186,23 +186,11 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
|||||||
W[16] = W16;
|
W[16] = W16;
|
||||||
W[17] = W17;
|
W[17] = W17;
|
||||||
|
|
||||||
#ifdef VECTORS4
|
W[3] = base + (uint)get_global_id(0);
|
||||||
//Less dependencies to get both the local id and group id and then add them
|
|
||||||
W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
|
||||||
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
|
|
||||||
//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3
|
|
||||||
W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U};
|
|
||||||
#elif defined VECTORS2
|
|
||||||
W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
|
||||||
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
|
|
||||||
W[18] = PreW18 + (u){r, r ^ 0x2004000U};
|
|
||||||
#else
|
|
||||||
W[3] = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
|
||||||
u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U);
|
u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U);
|
||||||
W[18] = PreW18 + r;
|
W[18] = PreW18 + r;
|
||||||
#endif
|
|
||||||
//the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions
|
|
||||||
|
|
||||||
|
//the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions
|
||||||
|
|
||||||
|
|
||||||
Vals[4] = PreVal4 + W[3];
|
Vals[4] = PreVal4 + W[3];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user