mirror of https://github.com/GOSTSec/sgminer
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
184 lines
4.9 KiB
184 lines
4.9 KiB
/* |
|
* Lyra2 kernel implementation. |
|
* |
|
* ==========================(LICENSE BEGIN)============================ |
|
* Copyright (c) 2014 djm34 |
|
* |
|
* |
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
* a copy of this software and associated documentation files (the |
|
* "Software"), to deal in the Software without restriction, including |
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
* permit persons to whom the Software is furnished to do so, subject to |
|
* the following conditions: |
|
* |
|
* The above copyright notice and this permission notice shall be |
|
* included in all copies or substantial portions of the Software. |
|
* |
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
* |
|
* ===========================(LICENSE END)============================= |
|
* |
|
* @author djm34 |
|
*/ |
|
|
|
|
|
|
|
#define ROTL64(x,n) rotate(x,(ulong)n) |
|
#define ROTR64(x,n) rotate(x,(ulong)(64-n)) |
|
#define SWAP32(x) as_ulong(as_uint2(x).s10) |
|
#define SWAP24(x) as_ulong(as_uchar8(x).s34567012) |
|
#define SWAP16(x) as_ulong(as_uchar8(x).s23456701) |
|
|
|
#define G(a,b,c,d) \ |
|
do { \ |
|
a += b; d ^= a; d = SWAP32(d); \ |
|
c += d; b ^= c; b = ROTR64(b,24); \ |
|
a += b; d ^= a; d = ROTR64(d,16); \ |
|
c += d; b ^= c; b = ROTR64(b, 63); \ |
|
\ |
|
} while (0) |
|
|
|
#define G_old(a,b,c,d) \ |
|
do { \ |
|
a += b; d ^= a; d = ROTR64(d, 32); \ |
|
c += d; b ^= c; b = ROTR64(b, 24); \ |
|
a += b; d ^= a; d = ROTR64(d, 16); \ |
|
c += d; b ^= c; b = ROTR64(b, 63); \ |
|
\ |
|
} while (0) |
|
|
|
|
|
/*One Round of the Blake2b's compression function*/ |
|
|
|
#define round_lyra(s) \ |
|
do { \ |
|
G(s[0].x, s[1].x, s[2].x, s[3].x); \ |
|
G(s[0].y, s[1].y, s[2].y, s[3].y); \ |
|
G(s[0].z, s[1].z, s[2].z, s[3].z); \ |
|
G(s[0].w, s[1].w, s[2].w, s[3].w); \ |
|
G(s[0].x, s[1].y, s[2].z, s[3].w); \ |
|
G(s[0].y, s[1].z, s[2].w, s[3].x); \ |
|
G(s[0].z, s[1].w, s[2].x, s[3].y); \ |
|
G(s[0].w, s[1].x, s[2].y, s[3].z); \ |
|
} while(0) |
|
|
|
|
|
|
|
void reduceDuplexf(ulong4* state ,__global ulong4* DMatrix) |
|
{ |
|
|
|
ulong4 state1[3]; |
|
uint ps1 = 0; |
|
uint ps2 = (memshift * 3 + memshift * 4); |
|
//#pragma unroll 4 |
|
for (int i = 0; i < 4; i++) |
|
{ |
|
uint s1 = ps1 + i*memshift; |
|
uint s2 = ps2 - i*memshift; |
|
|
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
|
|
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j]; |
|
round_lyra(state); |
|
for (int j = 0; j < 3; j++) state1[j] ^= state[j]; |
|
|
|
for (int j = 0; j < 3; j++) (DMatrix)[j + s2] = state1[j]; |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void reduceDuplexRowf(uint rowIn,uint rowInOut,uint rowOut,ulong4 * state, __global ulong4 * DMatrix) |
|
{ |
|
|
|
ulong4 state1[3], state2[3]; |
|
uint ps1 = (memshift * 4 * rowIn); |
|
uint ps2 = (memshift * 4 * rowInOut); |
|
uint ps3 = (memshift * 4 * rowOut); |
|
|
|
|
|
for (int i = 0; i < 4; i++) |
|
{ |
|
uint s1 = ps1 + i*memshift; |
|
uint s2 = ps2 + i*memshift; |
|
uint s3 = ps3 + i*memshift; |
|
|
|
|
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
|
|
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; |
|
|
|
for (int j = 0; j < 3; j++) state1[j] += state2[j]; |
|
|
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j]; |
|
|
|
|
|
round_lyra(state); |
|
|
|
((ulong*)state2)[0] ^= ((ulong*)state)[11]; |
|
for (int j = 0; j < 11; j++) |
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; |
|
|
|
if (rowInOut != rowOut) { |
|
for (int j = 0; j < 3; j++) |
|
(DMatrix)[j + s2] = state2[j]; |
|
for (int j = 0; j < 3; j++) |
|
(DMatrix)[j + s3] ^= state[j]; |
|
} |
|
else { |
|
for (int j = 0; j < 3; j++) |
|
state2[j] ^= state[j]; |
|
for (int j = 0; j < 3; j++) |
|
(DMatrix)[j + s2] = state2[j]; |
|
} |
|
|
|
} |
|
} |
|
|
|
|
|
|
|
|
|
void reduceDuplexRowSetupf(uint rowIn, uint rowInOut, uint rowOut, ulong4 *state, __global ulong4* DMatrix) { |
|
|
|
ulong4 state2[3], state1[3]; |
|
uint ps1 = (memshift * 4 * rowIn); |
|
uint ps2 = (memshift * 4 * rowInOut); |
|
uint ps3 = (memshift * 3 + memshift * 4 * rowOut); |
|
|
|
for (int i = 0; i < 4; i++) |
|
{ |
|
uint s1 = ps1 + i*memshift; |
|
uint s2 = ps2 + i*memshift; |
|
uint s3 = ps3 - i*memshift; |
|
|
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
|
|
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; |
|
for (int j = 0; j < 3; j++) { |
|
ulong4 tmp = state1[j] + state2[j]; |
|
state[j] ^= tmp; |
|
} |
|
round_lyra(state); |
|
|
|
for (int j = 0; j < 3; j++) { |
|
state1[j] ^= state[j]; |
|
(DMatrix)[j + s3] = state1[j]; |
|
} |
|
|
|
((ulong*)state2)[0] ^= ((ulong*)state)[11]; |
|
for (int j = 0; j < 11; j++) |
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; |
|
for (int j = 0; j < 3; j++) |
|
(DMatrix)[j + s2] = state2[j]; |
|
} |
|
} |
|
|
|
|