mirror of https://github.com/GOSTSec/sgminer
Con Kolivas
13 years ago
5 changed files with 4388 additions and 0 deletions
@ -0,0 +1,587 @@ |
|||||||
|
// DiaKGCN 27-04-2012 - OpenCL kernel by Diapolo |
||||||
|
// |
||||||
|
// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. |
||||||
|
// The kernel was rewritten by me (Diapolo) and is still public-domain! |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
typedef uint4 u; |
||||||
|
#elif defined VECTORS2 |
||||||
|
typedef uint2 u; |
||||||
|
#else |
||||||
|
typedef uint u; |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef BITALIGN |
||||||
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable |
||||||
|
#ifdef BFI_INT |
||||||
|
#define ch(x, y, z) amd_bytealign(x, y, z) |
||||||
|
#define ma(x, y, z) amd_bytealign(z ^ x, y, x) |
||||||
|
#else |
||||||
|
#define ch(x, y, z) bitselect(z, y, x) |
||||||
|
#define ma(z, x, y) bitselect(z, y, z ^ x) |
||||||
|
#endif |
||||||
|
#else |
||||||
|
#define ch(x, y, z) (z ^ (x & (y ^ z))) |
||||||
|
#define ma(x, y, z) ((x & z) | (y & (x | z))) |
||||||
|
#endif |
||||||
|
|
||||||
|
#define rotr15(n) (rotate(n, 15U) ^ rotate(n, 13U) ^ (n >> 10U)) |
||||||
|
#define rotr25(n) (rotate(n, 25U) ^ rotate(n, 14U) ^ (n >> 3U)) |
||||||
|
#define rotr26(n) (rotate(n, 26U) ^ rotate(n, 21U) ^ rotate(n, 7U)) |
||||||
|
#define rotr30(n) (rotate(n, 30U) ^ rotate(n, 19U) ^ rotate(n, 10U)) |
||||||
|
|
||||||
|
__kernel |
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
void search( |
||||||
|
#ifndef GOFFSET |
||||||
|
const u base, |
||||||
|
#endif |
||||||
|
const uint PreVal0, const uint PreVal4, |
||||||
|
const uint H1, const uint D1A, const uint B1, const uint C1, |
||||||
|
const uint F1, const uint G1, const uint C1addK5, const uint B1addK6, const uint PreVal0addK7, |
||||||
|
const uint W16addK16, const uint W17addK17, |
||||||
|
const uint PreW18, const uint PreW19, |
||||||
|
const uint W16, const uint W17, |
||||||
|
const uint PreW31, const uint PreW32, |
||||||
|
const uint state0, const uint state1, const uint state2, const uint state3, |
||||||
|
const uint state4, const uint state5, const uint state6, const uint state7, |
||||||
|
const uint state0A, const uint state0B, |
||||||
|
const uint state1A, const uint state2A, const uint state3A, const uint state4A, |
||||||
|
const uint state5A, const uint state6A, const uint state7A, |
||||||
|
__global uint * output) |
||||||
|
{ |
||||||
|
u V[8]; |
||||||
|
u W[16]; |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; |
||||||
|
#elif defined VECTORS2 |
||||||
|
const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; |
||||||
|
#else |
||||||
|
#ifdef GOFFSET |
||||||
|
const u nonce = (uint)(get_global_id(0)); |
||||||
|
#else |
||||||
|
const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base; |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
|
||||||
|
V[0] = PreVal0 + nonce; |
||||||
|
V[1] = B1; |
||||||
|
V[2] = C1; |
||||||
|
V[3] = D1A; |
||||||
|
V[4] = PreVal4 + nonce; |
||||||
|
V[5] = F1; |
||||||
|
V[6] = G1; |
||||||
|
V[7] = H1; |
||||||
|
|
||||||
|
V[7] += V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); |
||||||
|
#elif defined VECTORS2 |
||||||
|
W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); |
||||||
|
#else |
||||||
|
W[0] = PreW18 + rotr25(nonce); |
||||||
|
#endif |
||||||
|
W[1] = PreW19 + nonce; |
||||||
|
W[2] = 0x80000000U + rotr15(W[0]); |
||||||
|
W[3] = rotr15(W[1]); |
||||||
|
W[4] = 0x00000280U + rotr15(W[2]); |
||||||
|
W[5] = W16 + rotr15(W[3]); |
||||||
|
W[6] = W17 + rotr15(W[4]); |
||||||
|
W[7] = W[0] + rotr15(W[5]); |
||||||
|
W[8] = W[1] + rotr15(W[6]); |
||||||
|
W[9] = W[2] + rotr15(W[7]); |
||||||
|
W[10] = W[3] + rotr15(W[8]); |
||||||
|
W[11] = W[4] + rotr15(W[9]); |
||||||
|
W[12] = W[5] + 0x00a00055U + rotr15(W[10]); |
||||||
|
W[13] = W[6] + PreW31 + rotr15(W[11]); |
||||||
|
W[14] = W[7] + PreW32 + rotr15(W[12]); |
||||||
|
W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]); |
||||||
|
|
||||||
|
V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0]; |
||||||
|
V[5] = 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); |
||||||
|
W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); |
||||||
|
W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); |
||||||
|
W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); |
||||||
|
W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); |
||||||
|
W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); |
||||||
|
W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); |
||||||
|
W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); |
||||||
|
W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); |
||||||
|
W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); |
||||||
|
W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); |
||||||
|
W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); |
||||||
|
W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); |
||||||
|
W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); |
||||||
|
W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); |
||||||
|
W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); |
||||||
|
|
||||||
|
V[1] += 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); |
||||||
|
W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); |
||||||
|
W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); |
||||||
|
W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); |
||||||
|
W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); |
||||||
|
W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); |
||||||
|
W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); |
||||||
|
W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); |
||||||
|
W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); |
||||||
|
W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); |
||||||
|
W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); |
||||||
|
W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); |
||||||
|
W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); |
||||||
|
W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); |
||||||
|
|
||||||
|
V[1] += 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
W[0] = state0 + V[0] + rotr25(state1 + V[1]); |
||||||
|
W[1] = state1 + V[1] + 0x00a00000U + rotr25(state2 + V[2]); |
||||||
|
W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]); |
||||||
|
W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]); |
||||||
|
W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]); |
||||||
|
W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]); |
||||||
|
W[6] = state6 + V[6] + 0x00000100U + rotr15(W[4]) + rotr25(state7 + V[7]); |
||||||
|
W[7] = state7 + V[7] + W[0] + 0x11002000U + rotr15(W[5]); |
||||||
|
W[8] = W[1] + 0x80000000U + rotr15(W[6]); |
||||||
|
W[9] = W[2] + rotr15(W[7]); |
||||||
|
W[10] = W[3] + rotr15(W[8]); |
||||||
|
W[11] = W[4] + rotr15(W[9]); |
||||||
|
W[12] = W[5] + rotr15(W[10]); |
||||||
|
W[13] = W[6] + rotr15(W[11]); |
||||||
|
W[14] = W[7] + 0x00400022U + rotr15(W[12]); |
||||||
|
W[15] = W[8] + 0x00000100U + rotr15(W[13]) + rotr25(W[0]); |
||||||
|
|
||||||
|
// 0x71374491U + 0x1f83d9abU + state1 |
||||||
|
const u state1AaddV1 = state1A + V[1]; |
||||||
|
// 0xb5c0fbcfU + 0x9b05688cU + state2 |
||||||
|
const u state2AaddV2 = state2A + V[2]; |
||||||
|
// 0x510e527fU + 0xe9b5dba5U + state3 |
||||||
|
const u state3AaddV3 = state3A + V[3]; |
||||||
|
// 0x3956c25bU + state4 |
||||||
|
const u state4AaddV4 = state4A + V[4]; |
||||||
|
// 0x59f111f1U + state5 |
||||||
|
const u state5AaddV5 = state5A + V[5]; |
||||||
|
// 0x923f82a4U + state6 |
||||||
|
const u state6AaddV6 = state6A + V[6]; |
||||||
|
// 0xab1c5ed5U + state7 |
||||||
|
const u state7AaddV7 = state7A + V[7]; |
||||||
|
|
||||||
|
// 0x98c7e2a2U + state0 |
||||||
|
V[3] = state0A + V[0]; |
||||||
|
// 0xfc08884dU + state0 |
||||||
|
V[7] = state0B + V[0]; |
||||||
|
V[0] = 0x6a09e667U; |
||||||
|
V[1] = 0xbb67ae85U; |
||||||
|
V[2] = 0x3c6ef372U; |
||||||
|
V[4] = 0x510e527fU; |
||||||
|
V[5] = 0x9b05688cU; |
||||||
|
V[6] = 0x1f83d9abU; |
||||||
|
|
||||||
|
V[2] += state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); |
||||||
|
W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); |
||||||
|
W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); |
||||||
|
W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); |
||||||
|
W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); |
||||||
|
W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); |
||||||
|
W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); |
||||||
|
W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); |
||||||
|
W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); |
||||||
|
W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); |
||||||
|
W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); |
||||||
|
W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); |
||||||
|
W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); |
||||||
|
W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); |
||||||
|
W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); |
||||||
|
W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); |
||||||
|
|
||||||
|
V[3] += 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------- |
||||||
|
|
||||||
|
W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); |
||||||
|
W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); |
||||||
|
W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); |
||||||
|
W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); |
||||||
|
W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); |
||||||
|
W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); |
||||||
|
W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); |
||||||
|
W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); |
||||||
|
W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); |
||||||
|
W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); |
||||||
|
W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); |
||||||
|
W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); |
||||||
|
W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); |
||||||
|
|
||||||
|
V[3] += 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
V[6] = 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); |
||||||
|
|
||||||
|
V[1] += 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
V[5] = 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); |
||||||
|
|
||||||
|
V[0] += 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
V[4] = 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); |
||||||
|
|
||||||
|
V[7] += 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
V[3] = 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); |
||||||
|
|
||||||
|
V[6] += 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); |
||||||
|
V[2] = 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); |
||||||
|
|
||||||
|
V[5] += 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); |
||||||
|
V[1] = 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); |
||||||
|
|
||||||
|
V[4] += 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); |
||||||
|
V[0] = 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); |
||||||
|
|
||||||
|
V[3] += 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); |
||||||
|
V[7] = 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); |
||||||
|
|
||||||
|
V[2] += 0x78a5636fU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); |
||||||
|
|
||||||
|
V[1] += 0x84c87814U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); |
||||||
|
|
||||||
|
V[0] += 0x8cc70208U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); |
||||||
|
|
||||||
|
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); |
||||||
|
|
||||||
|
#define FOUND (0x800) |
||||||
|
#define NFLAG (0x7FF) |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) |
||||||
|
output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); |
||||||
|
#elif defined VECTORS2 |
||||||
|
if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) |
||||||
|
output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y; |
||||||
|
#else |
||||||
|
if (V[7] == 0x136032edU) |
||||||
|
output[FOUND] = output[NFLAG & nonce] = nonce; |
||||||
|
#endif |
||||||
|
} |
@ -0,0 +1,417 @@ |
|||||||
|
// This file is taken and modified from the public-domain poclbm project, and |
||||||
|
// I have therefore decided to keep it public-domain. |
||||||
|
// Modified version copyright 2011-2012 Con Kolivas |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
typedef uint4 u; |
||||||
|
#elif defined VECTORS2 |
||||||
|
typedef uint2 u; |
||||||
|
#else |
||||||
|
typedef uint u; |
||||||
|
#endif |
||||||
|
|
||||||
|
__constant uint K[64] = { |
||||||
|
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, |
||||||
|
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, |
||||||
|
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, |
||||||
|
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, |
||||||
|
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, |
||||||
|
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, |
||||||
|
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, |
||||||
|
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
||||||
|
}; |
||||||
|
|
||||||
|
__constant uint ConstW[128] = { |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000U, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000280U, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x80000000U, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100U, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 |
||||||
|
}; |
||||||
|
|
||||||
|
__constant uint H[8] = { |
||||||
|
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
#ifdef BITALIGN |
||||||
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable |
||||||
|
#define rot(x, y) amd_bitalign(x, x, (uint)(32 - y)) |
||||||
|
|
||||||
|
// This part is not from the stock poclbm kernel. It's part of an optimization |
||||||
|
// added in the Phoenix Miner. |
||||||
|
|
||||||
|
// Some AMD devices have Vals[0] BFI_INT opcode, which behaves exactly like the |
||||||
|
// SHA-256 Ch function, but provides it in exactly one instruction. If |
||||||
|
// detected, use it for Ch. Otherwise, construct Ch out of simpler logical |
||||||
|
// primitives. |
||||||
|
|
||||||
|
#ifdef BFI_INT |
||||||
|
// Well, slight problem... It turns out BFI_INT isn't actually exposed to |
||||||
|
// OpenCL (or CAL IL for that matter) in any way. However, there is |
||||||
|
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via |
||||||
|
// amd_bytealign, takes the same inputs, and provides the same output. |
||||||
|
// We can use that as a placeholder for BFI_INT and have the application |
||||||
|
// patch it after compilation. |
||||||
|
|
||||||
|
// This is the BFI_INT function |
||||||
|
#define Ch(x, y, z) amd_bytealign(x,y,z) |
||||||
|
// Ma can also be implemented in terms of BFI_INT... |
||||||
|
#define Ma(z, x, y) amd_bytealign(z^x,y,x) |
||||||
|
#else // BFI_INT |
||||||
|
// Later SDKs optimise this to BFI INT without patching and GCN |
||||||
|
// actually fails if manually patched with BFI_INT |
||||||
|
|
||||||
|
#define Ch(x, y, z) bitselect((u)z, (u)y, (u)x) |
||||||
|
#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) |
||||||
|
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y) |
||||||
|
#endif |
||||||
|
#else // BITALIGN |
||||||
|
#define Ch(x, y, z) (z ^ (x & (y ^ z))) |
||||||
|
#define Ma(x, y, z) ((x & z) | (y & (x | z))) |
||||||
|
#define rot(x, y) rotate((u)x, (u)y) |
||||||
|
#define rotr(x, y) rotate((u)x, (u)(32-y)) |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Various intermediate calculations for each SHA round |
||||||
|
#define s0(n) (S0(Vals[(0 + 128 - (n)) % 8])) |
||||||
|
#define S0(n) (rot(n, 30u)^rot(n, 19u)^rot(n,10u)) |
||||||
|
|
||||||
|
#define s1(n) (S1(Vals[(4 + 128 - (n)) % 8])) |
||||||
|
#define S1(n) (rot(n, 26u)^rot(n, 21u)^rot(n, 7u)) |
||||||
|
|
||||||
|
#define ch(n) Ch(Vals[(4 + 128 - (n)) % 8],Vals[(5 + 128 - (n)) % 8],Vals[(6 + 128 - (n)) % 8]) |
||||||
|
#define maj(n) Ma(Vals[(1 + 128 - (n)) % 8],Vals[(2 + 128 - (n)) % 8],Vals[(0 + 128 - (n)) % 8]) |
||||||
|
|
||||||
|
//t1 calc when W is already calculated |
||||||
|
#define t1(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W[(n)] + s1(n) + ch(n) |
||||||
|
|
||||||
|
//t1 calc which calculates W |
||||||
|
#define t1W(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W(n) + s1(n) + ch(n) |
||||||
|
|
||||||
|
//Used for constant W Values (the compiler optimizes out zeros) |
||||||
|
#define t1C(n) (K[(n) % 64]+ ConstW[(n)]) + Vals[(7 + 128 - (n)) % 8] + s1(n) + ch(n) |
||||||
|
|
||||||
|
//t2 Calc |
||||||
|
#define t2(n) maj(n) + s0(n) |
||||||
|
|
||||||
|
#define rotC(x,n) (x<<n | x >> (32-n)) |
||||||
|
|
||||||
|
//W calculation used for SHA round |
||||||
|
#define W(n) (W[n] = P4(n) + P3(n) + P2(n) + P1(n)) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Partial W calculations (used for the begining where only some values are nonzero) |
||||||
|
#define P1(n) ((rot(W[(n)-2],15u)^rot(W[(n)-2],13u)^((W[(n)-2])>>10U))) |
||||||
|
#define P2(n) ((rot(W[(n)-15],25u)^rot(W[(n)-15],14u)^((W[(n)-15])>>3U))) |
||||||
|
|
||||||
|
|
||||||
|
#define p1(x) ((rot(x,15u)^rot(x,13u)^((x)>>10U))) |
||||||
|
#define p2(x) ((rot(x,25u)^rot(x,14u)^((x)>>3U))) |
||||||
|
|
||||||
|
|
||||||
|
#define P3(n) W[n-7] |
||||||
|
#define P4(n) W[n-16] |
||||||
|
|
||||||
|
|
||||||
|
//Partial Calcs for constant W values |
||||||
|
#define P1C(n) ((rotC(ConstW[(n)-2],15)^rotC(ConstW[(n)-2],13)^((ConstW[(n)-2])>>10U))) |
||||||
|
#define P2C(n) ((rotC(ConstW[(n)-15],25)^rotC(ConstW[(n)-15],14)^((ConstW[(n)-15])>>3U))) |
||||||
|
#define P3C(x) ConstW[x-7] |
||||||
|
#define P4C(x) ConstW[x-16] |
||||||
|
|
||||||
|
//SHA round with built in W calc |
||||||
|
#define sharoundW(n) Barrier1(n); Vals[(3 + 128 - (n)) % 8] += t1W(n); Vals[(7 + 128 - (n)) % 8] = t1W(n) + t2(n); |
||||||
|
|
||||||
|
//SHA round without W calc |
||||||
|
#define sharound(n) Barrier2(n); Vals[(3 + 128 - (n)) % 8] += t1(n); Vals[(7 + 128 - (n)) % 8] = t1(n) + t2(n); |
||||||
|
|
||||||
|
//SHA round for constant W values |
||||||
|
#define sharoundC(n) Barrier3(n); Vals[(3 + 128 - (n)) % 8] += t1C(n); Vals[(7 + 128 - (n)) % 8] = t1C(n) + t2(n); |
||||||
|
|
||||||
|
//The compiler is stupid... I put this in there only to stop the compiler from (de)optimizing the order |
||||||
|
#define Barrier1(n) t1 = t1C((n+1)) |
||||||
|
#define Barrier2(n) t1 = t1C((n)) |
||||||
|
#define Barrier3(n) t1 = t1C((n)) |
||||||
|
|
||||||
|
//#define WORKSIZE 256 |
||||||
|
#define MAXBUFFERS (4095) |
||||||
|
|
||||||
|
__kernel |
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
void search( const uint state0, const uint state1, const uint state2, const uint state3, |
||||||
|
const uint state4, const uint state5, const uint state6, const uint state7, |
||||||
|
const uint B1, const uint C1, const uint D1, |
||||||
|
const uint F1, const uint G1, const uint H1, |
||||||
|
const u base, |
||||||
|
const uint W16, const uint W17, |
||||||
|
const uint PreVal4, const uint PreVal0, |
||||||
|
const uint PreW18, const uint PreW19, |
||||||
|
const uint PreW31, const uint PreW32, |
||||||
|
|
||||||
|
__global uint * output) |
||||||
|
{ |
||||||
|
|
||||||
|
|
||||||
|
u W[124]; |
||||||
|
u Vals[8]; |
||||||
|
|
||||||
|
//Dummy Variable to prevent compiler from reordering between rounds |
||||||
|
u t1; |
||||||
|
|
||||||
|
//Vals[0]=state0; |
||||||
|
Vals[1]=B1; |
||||||
|
Vals[2]=C1; |
||||||
|
Vals[3]=D1; |
||||||
|
//Vals[4]=PreVal4; |
||||||
|
Vals[5]=F1; |
||||||
|
Vals[6]=G1; |
||||||
|
Vals[7]=H1; |
||||||
|
|
||||||
|
W[16] = W16; |
||||||
|
W[17] = W17; |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
//Less dependencies to get both the local id and group id and then add them |
||||||
|
W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); |
||||||
|
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); |
||||||
|
//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3 |
||||||
|
W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U}; |
||||||
|
#elif defined VECTORS2 |
||||||
|
W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); |
||||||
|
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); |
||||||
|
W[18] = PreW18 + (u){r, r ^ 0x2004000U}; |
||||||
|
#else |
||||||
|
W[3] = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); |
||||||
|
u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U); |
||||||
|
W[18] = PreW18 + r; |
||||||
|
#endif |
||||||
|
//the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Vals[4] = PreVal4 + W[3]; |
||||||
|
Vals[0] = PreVal0 + W[3]; |
||||||
|
|
||||||
|
sharoundC(4); |
||||||
|
W[19] = PreW19 + W[3]; |
||||||
|
sharoundC(5); |
||||||
|
W[20] = P4C(20) + P1(20); |
||||||
|
sharoundC(6); |
||||||
|
W[21] = P1(21); |
||||||
|
sharoundC(7); |
||||||
|
W[22] = P3C(22) + P1(22); |
||||||
|
sharoundC(8); |
||||||
|
W[23] = W[16] + P1(23); |
||||||
|
sharoundC(9); |
||||||
|
W[24] = W[17] + P1(24); |
||||||
|
sharoundC(10); |
||||||
|
W[25] = P1(25) + P3(25); |
||||||
|
W[26] = P1(26) + P3(26); |
||||||
|
sharoundC(11); |
||||||
|
W[27] = P1(27) + P3(27); |
||||||
|
W[28] = P1(28) + P3(28); |
||||||
|
sharoundC(12); |
||||||
|
W[29] = P1(29) + P3(29); |
||||||
|
sharoundC(13); |
||||||
|
W[30] = P1(30) + P2C(30) + P3(30); |
||||||
|
W[31] = PreW31 + (P1(31) + P3(31)); |
||||||
|
sharoundC(14); |
||||||
|
W[32] = PreW32 + (P1(32) + P3(32)); |
||||||
|
sharoundC(15); |
||||||
|
sharound(16); |
||||||
|
sharound(17); |
||||||
|
sharound(18); |
||||||
|
sharound(19); |
||||||
|
sharound(20); |
||||||
|
sharound(21); |
||||||
|
sharound(22); |
||||||
|
sharound(23); |
||||||
|
sharound(24); |
||||||
|
sharound(25); |
||||||
|
sharound(26); |
||||||
|
sharound(27); |
||||||
|
sharound(28); |
||||||
|
sharound(29); |
||||||
|
sharound(30); |
||||||
|
sharound(31); |
||||||
|
sharound(32); |
||||||
|
sharoundW(33); |
||||||
|
sharoundW(34); |
||||||
|
sharoundW(35); |
||||||
|
sharoundW(36); |
||||||
|
sharoundW(37); |
||||||
|
sharoundW(38); |
||||||
|
sharoundW(39); |
||||||
|
sharoundW(40); |
||||||
|
sharoundW(41); |
||||||
|
sharoundW(42); |
||||||
|
sharoundW(43); |
||||||
|
sharoundW(44); |
||||||
|
sharoundW(45); |
||||||
|
sharoundW(46); |
||||||
|
sharoundW(47); |
||||||
|
sharoundW(48); |
||||||
|
sharoundW(49); |
||||||
|
sharoundW(50); |
||||||
|
sharoundW(51); |
||||||
|
sharoundW(52); |
||||||
|
sharoundW(53); |
||||||
|
sharoundW(54); |
||||||
|
sharoundW(55); |
||||||
|
sharoundW(56); |
||||||
|
sharoundW(57); |
||||||
|
sharoundW(58); |
||||||
|
sharoundW(59); |
||||||
|
sharoundW(60); |
||||||
|
sharoundW(61); |
||||||
|
sharoundW(62); |
||||||
|
sharoundW(63); |
||||||
|
|
||||||
|
W[64]=state0+Vals[0]; |
||||||
|
W[65]=state1+Vals[1]; |
||||||
|
W[66]=state2+Vals[2]; |
||||||
|
W[67]=state3+Vals[3]; |
||||||
|
W[68]=state4+Vals[4]; |
||||||
|
W[69]=state5+Vals[5]; |
||||||
|
W[70]=state6+Vals[6]; |
||||||
|
W[71]=state7+Vals[7]; |
||||||
|
|
||||||
|
Vals[0]=H[0]; |
||||||
|
Vals[1]=H[1]; |
||||||
|
Vals[2]=H[2]; |
||||||
|
Vals[3]=H[3]; |
||||||
|
Vals[4]=H[4]; |
||||||
|
Vals[5]=H[5]; |
||||||
|
Vals[6]=H[6]; |
||||||
|
Vals[7]=H[7]; |
||||||
|
|
||||||
|
//sharound(64 + 0); |
||||||
|
const u Temp = (0xb0edbdd0U + K[0]) + W[64]; |
||||||
|
Vals[7] = Temp + 0x08909ae5U; |
||||||
|
Vals[3] = 0xa54ff53aU + Temp; |
||||||
|
|
||||||
|
#define P124(n) P2(n) + P1(n) + P4(n) |
||||||
|
|
||||||
|
|
||||||
|
W[64 + 16] = + P2(64 + 16) + P4(64 + 16); |
||||||
|
sharound(64 + 1); |
||||||
|
W[64 + 17] = P1C(64 + 17) + P2(64 + 17) + P4(64 + 17); |
||||||
|
sharound(64 + 2); |
||||||
|
W[64 + 18] = P124(64 + 18); |
||||||
|
sharound(64 + 3); |
||||||
|
W[64 + 19] = P124(64 + 19); |
||||||
|
sharound(64 + 4); |
||||||
|
W[64 + 20] = P124(64 + 20); |
||||||
|
sharound(64 + 5); |
||||||
|
W[64 + 21] = P124(64 + 21); |
||||||
|
sharound(64 + 6); |
||||||
|
W[64 + 22] = P4(64 + 22) + P3C(64 + 22) + P2(64 + 22) + P1(64 + 22); |
||||||
|
sharound(64 + 7); |
||||||
|
W[64 + 23] = P4(64 + 23) + P3(64 + 23) + P2C(64 + 23) + P1(64 + 23); |
||||||
|
sharoundC(64 + 8); |
||||||
|
W[64 + 24] = P1(64 + 24) + P4C(64 + 24) + P3(64 + 24); |
||||||
|
sharoundC(64 + 9); |
||||||
|
W[64 + 25] = P3(64 + 25) + P1(64 + 25); |
||||||
|
sharoundC(64 + 10); |
||||||
|
W[64 + 26] = P3(64 + 26) + P1(64 + 26); |
||||||
|
sharoundC(64 + 11); |
||||||
|
W[64 + 27] = P3(64 + 27) + P1(64 + 27); |
||||||
|
sharoundC(64 + 12); |
||||||
|
W[64 + 28] = P3(64 + 28) + P1(64 + 28); |
||||||
|
sharoundC(64 + 13); |
||||||
|
W[64 + 29] = P1(64 + 29) + P3(64 + 29); |
||||||
|
W[64 + 30] = P3(64 + 30) + P2C(64 + 30) + P1(64 + 30); |
||||||
|
sharoundC(64 + 14); |
||||||
|
W[64 + 31] = P4C(64 + 31) + P3(64 + 31) + P2(64 + 31) + P1(64 + 31); |
||||||
|
sharoundC(64 + 15); |
||||||
|
sharound(64 + 16); |
||||||
|
sharound(64 + 17); |
||||||
|
sharound(64 + 18); |
||||||
|
sharound(64 + 19); |
||||||
|
sharound(64 + 20); |
||||||
|
sharound(64 + 21); |
||||||
|
sharound(64 + 22); |
||||||
|
sharound(64 + 23); |
||||||
|
sharound(64 + 24); |
||||||
|
sharound(64 + 25); |
||||||
|
sharound(64 + 26); |
||||||
|
sharound(64 + 27); |
||||||
|
sharound(64 + 28); |
||||||
|
sharound(64 + 29); |
||||||
|
sharound(64 + 30); |
||||||
|
sharound(64 + 31); |
||||||
|
sharoundW(64 + 32); |
||||||
|
sharoundW(64 + 33); |
||||||
|
sharoundW(64 + 34); |
||||||
|
sharoundW(64 + 35); |
||||||
|
sharoundW(64 + 36); |
||||||
|
sharoundW(64 + 37); |
||||||
|
sharoundW(64 + 38); |
||||||
|
sharoundW(64 + 39); |
||||||
|
sharoundW(64 + 40); |
||||||
|
sharoundW(64 + 41); |
||||||
|
sharoundW(64 + 42); |
||||||
|
sharoundW(64 + 43); |
||||||
|
sharoundW(64 + 44); |
||||||
|
sharoundW(64 + 45); |
||||||
|
sharoundW(64 + 46); |
||||||
|
sharoundW(64 + 47); |
||||||
|
sharoundW(64 + 48); |
||||||
|
sharoundW(64 + 49); |
||||||
|
sharoundW(64 + 50); |
||||||
|
sharoundW(64 + 51); |
||||||
|
sharoundW(64 + 52); |
||||||
|
sharoundW(64 + 53); |
||||||
|
sharoundW(64 + 54); |
||||||
|
sharoundW(64 + 55); |
||||||
|
sharoundW(64 + 56); |
||||||
|
sharoundW(64 + 57); |
||||||
|
sharoundW(64 + 58); |
||||||
|
|
||||||
|
W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) - |
||||||
|
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); |
||||||
|
|
||||||
|
#define FOUND (0x800) |
||||||
|
#define NFLAG (0x7FF) |
||||||
|
|
||||||
|
#ifdef VECTORS4 |
||||||
|
bool result = W[117].x & W[117].y & W[117].z & W[117].w; |
||||||
|
if (!result) { |
||||||
|
if (!W[117].x) |
||||||
|
output[FOUND] = output[NFLAG & W[3].x] = W[3].x; |
||||||
|
if (!W[117].y) |
||||||
|
output[FOUND] = output[NFLAG & W[3].y] = W[3].y; |
||||||
|
if (!W[117].z) |
||||||
|
output[FOUND] = output[NFLAG & W[3].z] = W[3].z; |
||||||
|
if (!W[117].w) |
||||||
|
output[FOUND] = output[NFLAG & W[3].w] = W[3].w; |
||||||
|
} |
||||||
|
#elif defined VECTORS2 |
||||||
|
bool result = W[117].x & W[117].y; |
||||||
|
if (!result) { |
||||||
|
if (!W[117].x) |
||||||
|
output[FOUND] = output[NFLAG & W[3].x] = W[3].x; |
||||||
|
if (!W[117].y) |
||||||
|
output[FOUND] = output[NFLAG & W[3].y] = W[3].y; |
||||||
|
} |
||||||
|
#else |
||||||
|
if (!W[117]) |
||||||
|
output[FOUND] = output[NFLAG & W[3]] = W[3]; |
||||||
|
#endif |
||||||
|
} |
@ -0,0 +1,757 @@ |
|||||||
|
#define rotl(x,y) rotate(x,y) |
||||||
|
#define Ch(x,y,z) bitselect(z,y,x) |
||||||
|
#define Maj(x,y,z) Ch((x^z),y,z) |
||||||
|
|
||||||
|
#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U)) |
||||||
|
|
||||||
|
#define Tr2(x) (rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U)) |
||||||
|
#define Tr1(x) (rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U)) |
||||||
|
#define Wr2(x) (rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U)) |
||||||
|
#define Wr1(x) (rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U)) |
||||||
|
|
||||||
|
#define RND(a, b, c, d, e, f, g, h, k) \ |
||||||
|
h += Tr1(e) + Ch(e, f, g) + k; \ |
||||||
|
d += h; \ |
||||||
|
h += Tr2(a) + Maj(a, b, c); |
||||||
|
|
||||||
|
void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) |
||||||
|
{ |
||||||
|
uint4 S0 = *state0; |
||||||
|
uint4 S1 = *state1; |
||||||
|
|
||||||
|
#define A S0.x |
||||||
|
#define B S0.y |
||||||
|
#define C S0.z |
||||||
|
#define D S0.w |
||||||
|
#define E S1.x |
||||||
|
#define F S1.y |
||||||
|
#define G S1.z |
||||||
|
#define H S1.w |
||||||
|
|
||||||
|
uint4 W[4]; |
||||||
|
|
||||||
|
W[ 0].x = block0.x; |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0x428a2f98U); |
||||||
|
W[ 0].y = block0.y; |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0x71374491U); |
||||||
|
W[ 0].z = block0.z; |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0xb5c0fbcfU); |
||||||
|
W[ 0].w = block0.w; |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0xe9b5dba5U); |
||||||
|
|
||||||
|
W[ 1].x = block1.x; |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); |
||||||
|
W[ 1].y = block1.y; |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); |
||||||
|
W[ 1].z = block1.z; |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); |
||||||
|
W[ 1].w = block1.w; |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); |
||||||
|
|
||||||
|
W[ 2].x = block2.x; |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); |
||||||
|
W[ 2].y = block2.y; |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); |
||||||
|
W[ 2].z = block2.z; |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); |
||||||
|
W[ 2].w = block2.w; |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); |
||||||
|
|
||||||
|
W[ 3].x = block3.x; |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); |
||||||
|
W[ 3].y = block3.y; |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); |
||||||
|
W[ 3].z = block3.z; |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); |
||||||
|
W[ 3].w = block3.w; |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
||||||
|
|
||||||
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); |
||||||
|
|
||||||
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); |
||||||
|
|
||||||
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); |
||||||
|
|
||||||
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); |
||||||
|
|
||||||
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); |
||||||
|
|
||||||
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); |
||||||
|
|
||||||
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); |
||||||
|
|
||||||
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); |
||||||
|
|
||||||
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); |
||||||
|
|
||||||
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); |
||||||
|
|
||||||
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); |
||||||
|
|
||||||
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); |
||||||
|
|
||||||
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); |
||||||
|
|
||||||
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); |
||||||
|
|
||||||
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); |
||||||
|
|
||||||
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); |
||||||
|
|
||||||
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); |
||||||
|
|
||||||
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); |
||||||
|
|
||||||
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); |
||||||
|
|
||||||
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); |
||||||
|
|
||||||
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); |
||||||
|
|
||||||
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); |
||||||
|
|
||||||
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); |
||||||
|
|
||||||
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); |
||||||
|
|
||||||
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); |
||||||
|
|
||||||
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); |
||||||
|
|
||||||
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); |
||||||
|
|
||||||
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); |
||||||
|
|
||||||
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); |
||||||
|
|
||||||
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); |
||||||
|
|
||||||
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); |
||||||
|
|
||||||
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); |
||||||
|
|
||||||
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); |
||||||
|
|
||||||
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); |
||||||
|
|
||||||
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); |
||||||
|
|
||||||
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); |
||||||
|
|
||||||
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); |
||||||
|
|
||||||
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); |
||||||
|
|
||||||
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); |
||||||
|
|
||||||
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); |
||||||
|
|
||||||
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); |
||||||
|
|
||||||
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); |
||||||
|
|
||||||
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); |
||||||
|
|
||||||
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); |
||||||
|
|
||||||
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); |
||||||
|
|
||||||
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); |
||||||
|
|
||||||
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); |
||||||
|
|
||||||
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); |
||||||
|
|
||||||
|
#undef A |
||||||
|
#undef B |
||||||
|
#undef C |
||||||
|
#undef D |
||||||
|
#undef E |
||||||
|
#undef F |
||||||
|
#undef G |
||||||
|
#undef H |
||||||
|
|
||||||
|
*state0 += S0; |
||||||
|
*state1 += S1; |
||||||
|
} |
||||||
|
|
||||||
|
void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) |
||||||
|
{ |
||||||
|
#define A (*state0).x |
||||||
|
#define B (*state0).y |
||||||
|
#define C (*state0).z |
||||||
|
#define D (*state0).w |
||||||
|
#define E (*state1).x |
||||||
|
#define F (*state1).y |
||||||
|
#define G (*state1).z |
||||||
|
#define H (*state1).w |
||||||
|
|
||||||
|
uint4 W[4]; |
||||||
|
|
||||||
|
W[0].x = block0.x; |
||||||
|
D=0x98c7e2a2U+W[0].x; |
||||||
|
H=0xfc08884dU+W[0].x; |
||||||
|
|
||||||
|
W[0].y = block0.y; |
||||||
|
C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; |
||||||
|
G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); |
||||||
|
|
||||||
|
W[0].z = block0.z; |
||||||
|
B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; |
||||||
|
F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); |
||||||
|
|
||||||
|
W[0].w = block0.w; |
||||||
|
A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; |
||||||
|
E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); |
||||||
|
|
||||||
|
W[1].x = block1.x; |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); |
||||||
|
W[1].y = block1.y; |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); |
||||||
|
W[1].z = block1.z; |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); |
||||||
|
W[1].w = block1.w; |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); |
||||||
|
|
||||||
|
W[2].x = block2.x; |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); |
||||||
|
W[2].y = block2.y; |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); |
||||||
|
W[2].z = block2.z; |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); |
||||||
|
W[2].w = block2.w; |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); |
||||||
|
|
||||||
|
W[3].x = block3.x; |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); |
||||||
|
W[3].y = block3.y; |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); |
||||||
|
W[3].z = block3.z; |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); |
||||||
|
W[3].w = block3.w; |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
||||||
|
|
||||||
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); |
||||||
|
|
||||||
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); |
||||||
|
|
||||||
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); |
||||||
|
|
||||||
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); |
||||||
|
|
||||||
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); |
||||||
|
|
||||||
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); |
||||||
|
|
||||||
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); |
||||||
|
|
||||||
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); |
||||||
|
|
||||||
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); |
||||||
|
|
||||||
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); |
||||||
|
|
||||||
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); |
||||||
|
|
||||||
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); |
||||||
|
|
||||||
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); |
||||||
|
|
||||||
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); |
||||||
|
|
||||||
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); |
||||||
|
|
||||||
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); |
||||||
|
|
||||||
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); |
||||||
|
|
||||||
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); |
||||||
|
|
||||||
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); |
||||||
|
|
||||||
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); |
||||||
|
|
||||||
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); |
||||||
|
|
||||||
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); |
||||||
|
|
||||||
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); |
||||||
|
|
||||||
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); |
||||||
|
|
||||||
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); |
||||||
|
|
||||||
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); |
||||||
|
|
||||||
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); |
||||||
|
|
||||||
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); |
||||||
|
|
||||||
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); |
||||||
|
|
||||||
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); |
||||||
|
|
||||||
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); |
||||||
|
|
||||||
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); |
||||||
|
|
||||||
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); |
||||||
|
|
||||||
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); |
||||||
|
|
||||||
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); |
||||||
|
|
||||||
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); |
||||||
|
|
||||||
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); |
||||||
|
|
||||||
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); |
||||||
|
|
||||||
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); |
||||||
|
|
||||||
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); |
||||||
|
|
||||||
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
||||||
|
RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); |
||||||
|
|
||||||
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
||||||
|
RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); |
||||||
|
|
||||||
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
||||||
|
RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); |
||||||
|
|
||||||
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
||||||
|
RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); |
||||||
|
|
||||||
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
||||||
|
RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); |
||||||
|
|
||||||
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
||||||
|
RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); |
||||||
|
|
||||||
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
||||||
|
RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); |
||||||
|
|
||||||
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
||||||
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); |
||||||
|
|
||||||
|
#undef A |
||||||
|
#undef B |
||||||
|
#undef C |
||||||
|
#undef D |
||||||
|
#undef E |
||||||
|
#undef F |
||||||
|
#undef G |
||||||
|
#undef H |
||||||
|
|
||||||
|
*state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); |
||||||
|
*state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); |
||||||
|
} |
||||||
|
|
||||||
|
__constant uint fixedW[64] = |
||||||
|
{ |
||||||
|
0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, |
||||||
|
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794, |
||||||
|
0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f, |
||||||
|
0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c, |
||||||
|
0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa, |
||||||
|
0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012, |
||||||
|
0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4, |
||||||
|
0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848, |
||||||
|
}; |
||||||
|
|
||||||
|
void SHA256_fixed(uint4*restrict state0,uint4*restrict state1) |
||||||
|
{ |
||||||
|
uint4 S0 = *state0; |
||||||
|
uint4 S1 = *state1; |
||||||
|
|
||||||
|
#define A S0.x |
||||||
|
#define B S0.y |
||||||
|
#define C S0.z |
||||||
|
#define D S0.w |
||||||
|
#define E S1.x |
||||||
|
#define F S1.y |
||||||
|
#define G S1.z |
||||||
|
#define H S1.w |
||||||
|
|
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[0]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[1]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[2]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[3]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[4]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[5]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[6]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[7]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[8]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[9]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[10]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[11]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[12]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[13]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[14]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[15]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[16]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[17]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[18]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[19]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[20]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[21]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[22]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[23]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[24]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[25]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[26]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[27]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[28]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[29]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[30]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[31]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[32]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[33]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[34]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[35]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[36]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[37]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[38]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[39]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[40]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[41]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[42]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[43]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[44]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[45]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[46]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[47]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[48]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[49]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[50]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[51]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[52]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[53]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[54]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[55]); |
||||||
|
RND(A,B,C,D,E,F,G,H, fixedW[56]); |
||||||
|
RND(H,A,B,C,D,E,F,G, fixedW[57]); |
||||||
|
RND(G,H,A,B,C,D,E,F, fixedW[58]); |
||||||
|
RND(F,G,H,A,B,C,D,E, fixedW[59]); |
||||||
|
RND(E,F,G,H,A,B,C,D, fixedW[60]); |
||||||
|
RND(D,E,F,G,H,A,B,C, fixedW[61]); |
||||||
|
RND(C,D,E,F,G,H,A,B, fixedW[62]); |
||||||
|
RND(B,C,D,E,F,G,H,A, fixedW[63]); |
||||||
|
|
||||||
|
#undef A |
||||||
|
#undef B |
||||||
|
#undef C |
||||||
|
#undef D |
||||||
|
#undef E |
||||||
|
#undef F |
||||||
|
#undef G |
||||||
|
#undef H |
||||||
|
*state0 += S0; |
||||||
|
*state1 += S1; |
||||||
|
} |
||||||
|
|
||||||
|
void shittify(uint4 B[8]) |
||||||
|
{ |
||||||
|
uint4 tmp[4]; |
||||||
|
tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w); |
||||||
|
tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w); |
||||||
|
tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w); |
||||||
|
tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
B[i] = EndianSwap(tmp[i]); |
||||||
|
|
||||||
|
tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w); |
||||||
|
tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w); |
||||||
|
tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w); |
||||||
|
tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
B[i+4] = EndianSwap(tmp[i]); |
||||||
|
} |
||||||
|
|
||||||
|
void unshittify(uint4 B[8]) |
||||||
|
{ |
||||||
|
uint4 tmp[4]; |
||||||
|
tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w); |
||||||
|
tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w); |
||||||
|
tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w); |
||||||
|
tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
B[i] = EndianSwap(tmp[i]); |
||||||
|
|
||||||
|
tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w); |
||||||
|
tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w); |
||||||
|
tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w); |
||||||
|
tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
B[i+4] = EndianSwap(tmp[i]); |
||||||
|
} |
||||||
|
|
||||||
|
void salsa(uint4 B[8]) |
||||||
|
{ |
||||||
|
uint4 w[4]; |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
w[i] = (B[i]^=B[i+4]); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
{ |
||||||
|
w[0] ^= rotl(w[3] +w[2] , 7U); |
||||||
|
w[1] ^= rotl(w[0] +w[3] , 9U); |
||||||
|
w[2] ^= rotl(w[1] +w[0] ,13U); |
||||||
|
w[3] ^= rotl(w[2] +w[1] ,18U); |
||||||
|
w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); |
||||||
|
w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); |
||||||
|
w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); |
||||||
|
w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); |
||||||
|
} |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
w[i] = (B[i+4]^=(B[i]+=w[i])); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
{ |
||||||
|
w[0] ^= rotl(w[3] +w[2] , 7U); |
||||||
|
w[1] ^= rotl(w[0] +w[3] , 9U); |
||||||
|
w[2] ^= rotl(w[1] +w[0] ,13U); |
||||||
|
w[3] ^= rotl(w[2] +w[1] ,18U); |
||||||
|
w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); |
||||||
|
w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); |
||||||
|
w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); |
||||||
|
w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); |
||||||
|
} |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint i=0; i<4; ++i) |
||||||
|
B[i+4] += w[i]; |
||||||
|
} |
||||||
|
|
||||||
|
#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE) |
||||||
|
#define CO Coord(z,x,y) |
||||||
|
|
||||||
|
void scrypt_core(uint4 X[8], __global uint4*restrict lookup) |
||||||
|
{ |
||||||
|
shittify(X); |
||||||
|
const uint zSIZE = 8; |
||||||
|
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0)); |
||||||
|
const uint xSIZE = CONCURRENT_THREADS; |
||||||
|
uint x = get_global_id(0)%xSIZE; |
||||||
|
|
||||||
|
for(uint y=0; y<1024/LOOKUP_GAP; ++y) |
||||||
|
{ |
||||||
|
#pragma unroll |
||||||
|
for(uint z=0; z<zSIZE; ++z) |
||||||
|
lookup[CO] = X[z]; |
||||||
|
for(uint i=0; i<LOOKUP_GAP; ++i) |
||||||
|
salsa(X); |
||||||
|
} |
||||||
|
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8) |
||||||
|
{ |
||||||
|
uint y = (1024/LOOKUP_GAP); |
||||||
|
#pragma unroll |
||||||
|
for(uint z=0; z<zSIZE; ++z) |
||||||
|
lookup[CO] = X[z]; |
||||||
|
for(uint i=0; i<1024%LOOKUP_GAP; ++i) |
||||||
|
salsa(X); |
||||||
|
} |
||||||
|
#endif |
||||||
|
for (uint i=0; i<1024; ++i) |
||||||
|
{ |
||||||
|
uint4 V[8]; |
||||||
|
uint j = X[7].x & 0x3FF; |
||||||
|
uint y = (j/LOOKUP_GAP); |
||||||
|
#pragma unroll |
||||||
|
for(uint z=0; z<zSIZE; ++z) |
||||||
|
V[z] = lookup[CO]; |
||||||
|
|
||||||
|
#if (LOOKUP_GAP == 1) |
||||||
|
#elif (LOOKUP_GAP == 2) |
||||||
|
if (j&1) |
||||||
|
salsa(V); |
||||||
|
#else |
||||||
|
uint val = j%LOOKUP_GAP; |
||||||
|
for (uint z=0; z<val; ++z) |
||||||
|
salsa(V); |
||||||
|
#endif |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for(uint z=0; z<zSIZE; ++z) |
||||||
|
X[z] ^= V[z]; |
||||||
|
salsa(X); |
||||||
|
} |
||||||
|
unshittify(X); |
||||||
|
} |
||||||
|
|
||||||
|
#define FOUND (0x800) |
||||||
|
#define NFLAG (0x7FF) |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search(__global const uint4 * restrict input, |
||||||
|
__global uint*restrict output, __global uint4*restrict padcache, |
||||||
|
const uint4 midstate0, const uint4 midstate16, const uint target) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
uint4 X[8]; |
||||||
|
uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1; |
||||||
|
uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid); |
||||||
|
uint4 pad0 = midstate0, pad1 = midstate16; |
||||||
|
|
||||||
|
SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280)); |
||||||
|
SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU); |
||||||
|
SHA256_fresh(&tstate0,&tstate1, pad0^0x36363636U, pad1^0x36363636U, 0x36363636U, 0x36363636U); |
||||||
|
|
||||||
|
tmp0 = tstate0; |
||||||
|
tmp1 = tstate1; |
||||||
|
SHA256(&tstate0, &tstate1, input[0],input[1],input[2],input[3]); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (uint i=0; i<4; i++) |
||||||
|
{ |
||||||
|
pad0 = tstate0; |
||||||
|
pad1 = tstate1; |
||||||
|
X[i*2 ] = ostate0; |
||||||
|
X[i*2+1] = ostate1; |
||||||
|
|
||||||
|
SHA256(&pad0,&pad1, data, (uint4)(i+1,0x80000000U,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x4a0U)); |
||||||
|
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
||||||
|
} |
||||||
|
scrypt_core(X,padcache); |
||||||
|
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]); |
||||||
|
SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]); |
||||||
|
SHA256_fixed(&tmp0,&tmp1); |
||||||
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
||||||
|
|
||||||
|
bool found = (EndianSwap(ostate1.w) <= target); |
||||||
|
if (found) |
||||||
|
output[FOUND] = output[NFLAG & gid] = gid; |
||||||
|
} |
||||||
|
|
||||||
|
/*- |
||||||
|
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt, |
||||||
|
* 2012 Con Kolivas. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
* |
||||||
|
* This file was originally written by Colin Percival as part of the Tarsnap |
||||||
|
* online backup system. |
||||||
|
*/ |
Loading…
Reference in new issue