From 83dde50f9dc332cf74ba2c1d83b8bf0311f8a40d Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 18 Feb 2012 15:17:50 +1100 Subject: [PATCH] Updates to diakgcn kernel courtesy of Philip Kaufmann --- device-gpu.c | 10 +- diakgcn120216.cl | 753 ++++++++++++++++++++++++----------------------- findnonce.c | 11 +- miner.h | 1 + 4 files changed, 409 insertions(+), 366 deletions(-) diff --git a/device-gpu.c b/device-gpu.c index f2994e99..b9de952d 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -761,10 +761,10 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) nonces[i] = blk->nonce + i; CL_SET_VARG(vwidth, nonces); + CL_SET_BLKARG(PreVal0); CL_SET_BLKARG(PreVal4_2); CL_SET_BLKARG(cty_h); CL_SET_BLKARG(D1A); - CL_SET_BLKARG(PreVal0); CL_SET_BLKARG(cty_b); CL_SET_BLKARG(cty_c); CL_SET_BLKARG(cty_f); @@ -793,6 +793,14 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) CL_SET_BLKARG(zeroA); CL_SET_BLKARG(zeroB); + CL_SET_BLKARG(oneA); + CL_SET_BLKARG(twoA); + CL_SET_BLKARG(threeA); + CL_SET_BLKARG(fourA); + CL_SET_BLKARG(fiveA); + CL_SET_BLKARG(sixA); + CL_SET_BLKARG(sevenA); + CL_SET_ARG(clState->outputBuffer); return status; diff --git a/diakgcn120216.cl b/diakgcn120216.cl index 510c779b..e96cee92 100644 --- a/diakgcn120216.cl +++ b/diakgcn120216.cl @@ -1,4 +1,4 @@ -// DiaKGCN 09-02-2012 - OpenCL kernel by Diapolo +// DiaKGCN 16-02-2012 - OpenCL kernel by Diapolo // // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. // The kernel was rewritten by me (Diapolo) and is still public-domain! @@ -16,34 +16,23 @@ #ifdef BITALIGN #pragma OPENCL EXTENSION cl_amd_media_ops : enable #ifdef BFI_INT - #define Ch(x, y, z) amd_bytealign(x, y, z) - #define Ma(x, y, z) amd_bytealign(z ^ x, y, x) + #define ch(x, y, z) amd_bytealign(x, y, z) + #define ma(x, y, z) amd_bytealign(z ^ x, y, x) #else - #define Ch(x, y, z) bitselect(z, y, x) + #define ch(x, y, z) bitselect(z, y, x) #if defined(VECTORS2) || defined(VECTORS4) || defined(VECTORS8) - // GCN - VEC2 or VEC4 - #define Ma(z, x, y) bitselect(z, y, z ^ x) + // GCN - VEC2 or VEC4 or VEC8 + #define ma(z, x, y) bitselect(z, y, z ^ x) #else // GCN - no VEC - #define Ma(z, x, y) Ch(z ^ x, y, x) + #define ma(z, x, y) ch(z ^ x, y, x) #endif #endif #else //BITALIGN - #define Ch(x, y, z) (z ^ (x & (y ^ z))) - #define Ma(x, y, z) ((x & z) | (y & (x | z))) + #define ch(x, y, z) (z ^ (x & (y ^ z))) + #define ma(x, y, z) ((x & z) | (y & (x | z))) #endif -#ifdef GOFFSET - // make sure kernel parameter "base" is not used, if GOFFSET is defined - #define BASE -#else - // make sure kernel parameter "base" is used, if GOFFSET is not defined - #define BASE const u base, -#endif - -#define ch(n) Ch(V[(4 + 128 - n) % 8], V[(5 + 128 - n) % 8], V[(6 + 128 - n) % 8]) -#define ma(n) Ma(V[(1 + 128 - n) % 8], V[(2 + 128 - n) % 8], V[(0 + 128 - n) % 8]) - #define rotr15(n) (rotate(n, 15U) ^ rotate(n, 13U) ^ (n >> 10U)) #define rotr25(n) (rotate(n, 25U) ^ rotate(n, 14U) ^ (n >> 3U)) #define rotr26(n) (rotate(n, 26U) ^ rotate(n, 21U) ^ rotate(n, 7U)) @@ -51,9 +40,12 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) - void search( BASE - const uint PreVal4, - const uint H1, const uint D1A, const uint PreVal0, const uint B1, const uint C1, + void search( + #ifndef GOFFSET + const u base, + #endif + const uint PreVal0, const uint PreVal4, + const uint H1, const uint D1A, const uint B1, const uint C1, const uint F1, const uint G1, const uint C1addK5, const uint B1addK6, const uint PreVal0addK7, const uint W16addK16, const uint W17addK17, const uint PreW18, const uint PreW19, @@ -62,82 +54,83 @@ __kernel const uint state0, const uint state1, const uint state2, const uint state3, const uint state4, const uint state5, const uint state6, const uint state7, const uint state0A, const uint state0B, + const uint state1A, const uint state2A, const uint state3A, const uint state4A, + const uint state5A, const uint state6A, const uint state7A, __global uint * output) { - u W[17]; + u W[16]; u V[8]; #ifdef VECTORS8 #ifdef GOFFSET - u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7); + const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7); #else - u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; #endif #elif defined VECTORS4 #ifdef GOFFSET - u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3); + const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3); #else - u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; #endif #elif defined VECTORS2 #ifdef GOFFSET - u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1); + const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1); #else - u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; #endif #else #ifdef GOFFSET - u nonce = (uint)get_global_id(0); + const u nonce = (uint)get_global_id(0); #else - u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; #endif #endif + V[0] = PreVal0 + nonce; V[4] = PreVal4 + nonce; - V[7] = H1 + (V[3] = D1A + Ch((PreVal0 + nonce), B1, C1) + rotr26(PreVal0 + nonce)); - V[3] += rotr30(V[4]) + Ma(F1, G1, V[4]); + V[7] = H1 + (V[3] = D1A + ch(V[0], B1, C1) + rotr26(V[0])); + V[3] += rotr30(V[4]) + ma(F1, G1, V[4]); - V[6] = G1 + (V[2] = C1addK5 + Ch(V[7], (PreVal0 + nonce), B1) + rotr26(V[7])); - V[2] += rotr30(V[3]) + Ma(V[4], F1, V[3]); + V[6] = G1 + (V[2] = C1addK5 + ch(V[7], V[0], B1) + rotr26(V[7])); + V[2] += rotr30(V[3]) + ma(V[4], F1, V[3]); - V[5] = F1 + (V[1] = B1addK6 + Ch(V[6], V[7], (PreVal0 + nonce)) + rotr26(V[6])); - V[1] += rotr30(V[2]) + Ma(V[3], V[4], V[2]); + V[5] = F1 + (V[1] = B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6])); + V[1] += rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[4] += nonce + PreVal0addK7 + Ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = nonce + PreVal0addK7 + Ch(V[5], V[6], V[7]) + rotr26(V[5]) +rotr30(V[1]) + Ma(V[2], V[3], V[1]); + V[4] = V[4] + (V[0] = PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5])); + V[0] += rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[3] += 0xd807aa98 + V[7] + Ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0xd807aa98 + V[7] + Ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + Ma(V[1], V[2], V[0]); + V[3] += 0xd807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xd807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[2] += 0x12835b01 + V[6] + Ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x12835b01 + V[6] + Ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + Ma(V[0], V[1], V[7]); + V[2] += 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[1] += 0x243185be + V[5] + Ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x243185be + V[5] + Ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + Ma(V[7], V[0], V[6]); + V[1] += 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[0] += 0x550c7dc3 + V[4] + Ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x550c7dc3 + V[4] + Ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + Ma(V[6], V[7], V[5]); + V[0] += 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); -//--------------- ch() + ma() replaced above --------------- + V[7] += 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[7] += 0x72be5d74 + V[3] + ch(12) + rotr26(V[0]); - V[3] = 0x72be5d74 + V[3] + ch(12) + rotr26(V[0]) + rotr30(V[4]) + ma(12); + V[6] += 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[6] += 0x80deb1fe + V[2] + ch(13) + rotr26(V[7]); - V[2] = 0x80deb1fe + V[2] + ch(13) + rotr26(V[7]) + rotr30(V[3]) + ma(13); + V[5] += 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[5] += 0x9bdc06a7 + V[1] + ch(14) + rotr26(V[6]); - V[1] = 0x9bdc06a7 + V[1] + ch(14) + rotr26(V[6]) + rotr30(V[2]) + ma(14); + V[4] += 0xc19bf3f4 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc19bf3f4 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[4] += 0xc19bf3f4 + V[0] + ch(15) + rotr26(V[5]); - V[0] = 0xc19bf3f4 + V[0] + ch(15) + rotr26(V[5]) + rotr30(V[1]) + ma(15); + V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[3] += W16addK16 + V[7] + ch(16) + rotr26(V[4]); - V[7] = W16addK16 + V[7] + ch(16) + rotr26(V[4]) + rotr30(V[0]) + ma(16); - - V[2] += W17addK17 + V[6] + ch(17) + rotr26(V[3]); - V[6] = W17addK17 + V[6] + ch(17) + rotr26(V[3]) + rotr30(V[7]) + ma(17); + V[2] += W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); //---------------------------------------------------------------------------------- @@ -162,184 +155,184 @@ __kernel W[9] = W[2] + rotr15(W[7]); W[10] = W[3] + rotr15(W[8]); W[11] = W[4] + rotr15(W[9]); - W[12] = 0x00a00055 + W[5] + rotr15(W[10]); - W[13] = PreW31 + W[6] + rotr15(W[11]); - W[14] = PreW32 + W[7] + rotr15(W[12]); - W[15] = W17 + W[8] + rotr15(W[13]) + rotr25(W[0]); - W[16] = W[0] + W[9] + rotr15(W[14]) + rotr25(W[1]); + W[12] = W[5] + 0x00a00055 + rotr15(W[10]); + W[13] = W[6] + PreW31 + rotr15(W[11]); + W[14] = W[7] + PreW32 + rotr15(W[12]); + W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]); - V[1] += 0x0fc19dc6 + V[5] + W[0] + ch(18) + rotr26(V[2]); - V[5] = 0x0fc19dc6 + V[5] + W[0] + ch(18) + rotr26(V[2]) + rotr30(V[6]) + ma(18); + V[1] += 0x0fc19dc6 + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x0fc19dc6 + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[0] += 0x240ca1cc + V[4] + W[1] + ch(19) + rotr26(V[1]); - V[4] = 0x240ca1cc + V[4] + W[1] + ch(19) + rotr26(V[1]) + rotr30(V[5]) + ma(19); + V[0] += 0x240ca1cc + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x240ca1cc + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[7] += 0x2de92c6f + V[3] + W[2] + ch(20) + rotr26(V[0]); - V[3] = 0x2de92c6f + V[3] + W[2] + ch(20) + rotr26(V[0]) + rotr30(V[4]) + ma(20); + V[7] += 0x2de92c6f + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x2de92c6f + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[6] += 0x4a7484aa + V[2] + W[3] + ch(21) + rotr26(V[7]); - V[2] = 0x4a7484aa + V[2] + W[3] + ch(21) + rotr26(V[7]) + rotr30(V[3]) + ma(21); + V[6] += 0x4a7484aa + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4a7484aa + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[5] += 0x5cb0a9dc + V[1] + W[4] + ch(22) + rotr26(V[6]); - V[1] = 0x5cb0a9dc + V[1] + W[4] + ch(22) + rotr26(V[6]) + rotr30(V[2]) + ma(22); + V[5] += 0x5cb0a9dc + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5cb0a9dc + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[4] += 0x76f988da + V[0] + W[5] + ch(23) + rotr26(V[5]); - V[0] = 0x76f988da + V[0] + W[5] + ch(23) + rotr26(V[5]) + rotr30(V[1]) + ma(23); + V[4] += 0x76f988da + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x76f988da + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[3] += 0x983e5152 + V[7] + W[6] + ch(24) + rotr26(V[4]); - V[7] = 0x983e5152 + V[7] + W[6] + ch(24) + rotr26(V[4]) + rotr30(V[0]) + ma(24); + V[3] += 0x983e5152 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x983e5152 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[2] += 0xa831c66d + V[6] + W[7] + ch(25) + rotr26(V[3]); - V[6] = 0xa831c66d + V[6] + W[7] + ch(25) + rotr26(V[3]) + rotr30(V[7]) + ma(25); + V[2] += 0xa831c66d + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa831c66d + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[1] += 0xb00327c8 + V[5] + W[8] + ch(26) + rotr26(V[2]); - V[5] = 0xb00327c8 + V[5] + W[8] + ch(26) + rotr26(V[2]) + rotr30(V[6]) + ma(26); + V[1] += 0xb00327c8 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xb00327c8 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[0] += 0xbf597fc7 + V[4] + W[9] + ch(27) + rotr26(V[1]); - V[4] = 0xbf597fc7 + V[4] + W[9] + ch(27) + rotr26(V[1]) + rotr30(V[5]) + ma(27); + V[0] += 0xbf597fc7 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xbf597fc7 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[7] += 0xc6e00bf3 + V[3] + W[10] + ch(28) + rotr26(V[0]); - V[3] = 0xc6e00bf3 + V[3] + W[10] + ch(28) + rotr26(V[0]) + rotr30(V[4]) + ma(28); + V[7] += 0xc6e00bf3 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xc6e00bf3 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[6] += 0xd5a79147 + V[2] + W[11] + ch(29) + rotr26(V[7]); - V[2] = 0xd5a79147 + V[2] + W[11] + ch(29) + rotr26(V[7]) + rotr30(V[3]) + ma(29); + V[6] += 0xd5a79147 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd5a79147 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[5] += 0x06ca6351 + V[1] + W[12] + ch(30) + rotr26(V[6]); - V[1] = 0x06ca6351 + V[1] + W[12] + ch(30) + rotr26(V[6]) + rotr30(V[2]) + ma(30); + V[5] += 0x06ca6351 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x06ca6351 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[4] += 0x14292967 + V[0] + W[13] + ch(31) + rotr26(V[5]); - V[0] = 0x14292967 + V[0] + W[13] + ch(31) + rotr26(V[5]) + rotr30(V[1]) + ma(31); + V[4] += 0x14292967 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x14292967 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[3] += 0x27b70a85 + V[7] + W[14] + ch(32) + rotr26(V[4]); - V[7] = 0x27b70a85 + V[7] + W[14] + ch(32) + rotr26(V[4]) + rotr30(V[0]) + ma(32); + V[3] += 0x27b70a85 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x27b70a85 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[2] += 0x2e1b2138 + V[6] + W[15] + ch(33) + rotr26(V[3]); - V[6] = 0x2e1b2138 + V[6] + W[15] + ch(33) + rotr26(V[3]) + rotr30(V[7]) + ma(33); - - V[1] += 0x4d2c6dfc + V[5] + W[16] + ch(34) + rotr26(V[2]); - V[5] = 0x4d2c6dfc + V[5] + W[16] + ch(34) + rotr26(V[2]) + rotr30(V[6]) + ma(34); + V[2] += 0x2e1b2138 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x2e1b2138 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); //---------------------------------------------------------------------------------- - W[0] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[1] = W[2] + W[11] + rotr15(W[16]) + rotr25( W[3]); - W[2] = W[3] + W[12] + rotr15( W[0]) + rotr25( W[4]); - W[3] = W[4] + W[13] + rotr15( W[1]) + rotr25( W[5]); - W[4] = W[5] + W[14] + rotr15( W[2]) + rotr25( W[6]); - W[5] = W[6] + W[15] + rotr15( W[3]) + rotr25( W[7]); - W[6] = W[7] + W[16] + rotr15( W[4]) + rotr25( W[8]); - W[7] = W[8] + W[0] + rotr15( W[5]) + rotr25( W[9]); - W[8] = W[9] + W[1] + rotr15( W[6]) + rotr25(W[10]); - W[9] = W[10] + W[2] + rotr15( W[7]) + rotr25(W[11]); - W[10] = W[11] + W[3] + rotr15( W[8]) + rotr25(W[12]); - W[11] = W[12] + W[4] + rotr15( W[9]) + rotr25(W[13]); - W[12] = W[13] + W[5] + rotr15(W[10]) + rotr25(W[14]); - W[13] = W[14] + W[6] + rotr15(W[11]) + rotr25(W[15]); - W[14] = W[15] + W[7] + rotr15(W[12]) + rotr25(W[16]); - W[15] = W[16] + W[8] + rotr15(W[13]) + rotr25( W[0]); - W[16] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); + W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); + W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); - V[0] += 0x53380d13 + V[4] + W[0] + ch(35) + rotr26(V[1]); - V[4] = 0x53380d13 + V[4] + W[0] + ch(35) + rotr26(V[1]) + rotr30(V[5]) + ma(35); + V[1] += 0x4d2c6dfc + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x4d2c6dfc + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[7] += 0x650a7354 + V[3] + W[1] + ch(36) + rotr26(V[0]); - V[3] = 0x650a7354 + V[3] + W[1] + ch(36) + rotr26(V[0]) + rotr30(V[4]) + ma(36); + V[0] += 0x53380d13 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x53380d13 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[6] += 0x766a0abb + V[2] + W[2] + ch(37) + rotr26(V[7]); - V[2] = 0x766a0abb + V[2] + W[2] + ch(37) + rotr26(V[7]) + rotr30(V[3]) + ma(37); + V[7] += 0x650a7354 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x650a7354 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[5] += 0x81c2c92e + V[1] + W[3] + ch(38) + rotr26(V[6]); - V[1] = 0x81c2c92e + V[1] + W[3] + ch(38) + rotr26(V[6]) + rotr30(V[2]) + ma(38); + V[6] += 0x766a0abb + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x766a0abb + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[4] += 0x92722c85 + V[0] + W[4] + ch(39) + rotr26(V[5]); - V[0] = 0x92722c85 + V[0] + W[4] + ch(39) + rotr26(V[5]) + rotr30(V[1]) + ma(39); + V[5] += 0x81c2c92e + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x81c2c92e + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[3] += 0xa2bfe8a1 + V[7] + W[5] + ch(40) + rotr26(V[4]); - V[7] = 0xa2bfe8a1 + V[7] + W[5] + ch(40) + rotr26(V[4]) + rotr30(V[0]) + ma(40); + V[4] += 0x92722c85 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x92722c85 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[2] += 0xa81a664b + V[6] + W[6] + ch(41) + rotr26(V[3]); - V[6] = 0xa81a664b + V[6] + W[6] + ch(41) + rotr26(V[3]) + rotr30(V[7]) + ma(41); + V[3] += 0xa2bfe8a1 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xa2bfe8a1 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[1] += 0xc24b8b70 + V[5] + W[7] + ch(42) + rotr26(V[2]); - V[5] = 0xc24b8b70 + V[5] + W[7] + ch(42) + rotr26(V[2]) + rotr30(V[6]) + ma(42); + V[2] += 0xa81a664b + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa81a664b + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[0] += 0xc76c51a3 + V[4] + W[8] + ch(43) + rotr26(V[1]); - V[4] = 0xc76c51a3 + V[4] + W[8] + ch(43) + rotr26(V[1]) + rotr30(V[5]) + ma(43); + V[1] += 0xc24b8b70 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xc24b8b70 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[7] += 0xd192e819 + V[3] + W[9] + ch(44) + rotr26(V[0]); - V[3] = 0xd192e819 + V[3] + W[9] + ch(44) + rotr26(V[0]) + rotr30(V[4]) + ma(44); + V[0] += 0xc76c51a3 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xc76c51a3 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[6] += 0xd6990624 + V[2] + W[10] + ch(45) + rotr26(V[7]); - V[2] = 0xd6990624 + V[2] + W[10] + ch(45) + rotr26(V[7]) + rotr30(V[3]) + ma(45); + V[7] += 0xd192e819 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xd192e819 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[5] += 0xf40e3585 + V[1] + W[11] + ch(46) + rotr26(V[6]); - V[1] = 0xf40e3585 + V[1] + W[11] + ch(46) + rotr26(V[6]) + rotr30(V[2]) + ma(46); + V[6] += 0xd6990624 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd6990624 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[4] += 0x106aa070 + V[0] + W[12] + ch(47) + rotr26(V[5]); - V[0] = 0x106aa070 + V[0] + W[12] + ch(47) + rotr26(V[5]) + rotr30(V[1]) + ma(47); + V[5] += 0xf40e3585 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xf40e3585 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[3] += 0x19a4c116 + V[7] + W[13] + ch(48) + rotr26(V[4]); - V[7] = 0x19a4c116 + V[7] + W[13] + ch(48) + rotr26(V[4]) + rotr30(V[0]) + ma(48); + V[4] += 0x106aa070 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x106aa070 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[2] += 0x1e376c08 + V[6] + W[14] + ch(49) + rotr26(V[3]); - V[6] = 0x1e376c08 + V[6] + W[14] + ch(49) + rotr26(V[3]) + rotr30(V[7]) + ma(49); + V[3] += 0x19a4c116 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x19a4c116 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[1] += 0x2748774c + V[5] + W[15] + ch(50) + rotr26(V[2]); - V[5] = 0x2748774c + V[5] + W[15] + ch(50) + rotr26(V[2]) + rotr30(V[6]) + ma(50); - - V[0] += 0x34b0bcb5 + V[4] + W[16] + ch(51) + rotr26(V[1]); - V[4] = 0x34b0bcb5 + V[4] + W[16] + ch(51) + rotr26(V[1]) + rotr30(V[5]) + ma(51); + V[2] += 0x1e376c08 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x1e376c08 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); //---------------------------------------------------------------------------------- - W[0] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[1] = W[2] + W[11] + rotr15(W[16]) + rotr25( W[3]); - W[2] = W[3] + W[12] + rotr15( W[0]) + rotr25( W[4]); - W[3] = W[4] + W[13] + rotr15( W[1]) + rotr25( W[5]); - W[4] = W[5] + W[14] + rotr15( W[2]) + rotr25( W[6]); - W[5] = W[6] + W[15] + rotr15( W[3]) + rotr25( W[7]); - W[6] = W[7] + W[16] + rotr15( W[4]) + rotr25( W[8]); - W[7] = W[8] + W[0] + rotr15( W[5]) + rotr25( W[9]); - W[8] = W[9] + W[1] + rotr15( W[6]) + rotr25(W[10]); - W[9] = W[10] + W[2] + rotr15( W[7]) + rotr25(W[11]); - W[10] = W[11] + W[3] + rotr15( W[8]) + rotr25(W[12]); - W[11] = W[12] + W[4] + rotr15( W[9]) + rotr25(W[13]); + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); - V[7] += 0x391c0cb3 + V[3] + W[0] + ch(52) + rotr26(V[0]); - V[3] = 0x391c0cb3 + V[3] + W[0] + ch(52) + rotr26(V[0]) + rotr30(V[4]) + ma(52); + V[1] += 0x2748774c + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x2748774c + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[6] += 0x4ed8aa4a + V[2] + W[1] + ch(53) + rotr26(V[7]); - V[2] = 0x4ed8aa4a + V[2] + W[1] + ch(53) + rotr26(V[7]) + rotr30(V[3]) + ma(53); + V[0] += 0x34b0bcb5 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x34b0bcb5 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[5] += 0x5b9cca4f + V[1] + W[2] + ch(54) + rotr26(V[6]); - V[1] = 0x5b9cca4f + V[1] + W[2] + ch(54) + rotr26(V[6]) + rotr30(V[2]) + ma(54); + V[7] += 0x391c0cb3 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x391c0cb3 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[4] += 0x682e6ff3 + V[0] + W[3] + ch(55) + rotr26(V[5]); - V[0] = 0x682e6ff3 + V[0] + W[3] + ch(55) + rotr26(V[5]) + rotr30(V[1]) + ma(55); + V[6] += 0x4ed8aa4a + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4ed8aa4a + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[3] += 0x748f82ee + V[7] + W[4] + ch(56) + rotr26(V[4]); - V[7] = 0x748f82ee + V[7] + W[4] + ch(56) + rotr26(V[4]) + rotr30(V[0]) + ma(56); + V[5] += 0x5b9cca4f + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5b9cca4f + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[2] += 0x78a5636f + V[6] + W[5] + ch(57) + rotr26(V[3]); - V[6] = 0x78a5636f + V[6] + W[5] + ch(57) + rotr26(V[3]) + rotr30(V[7]) + ma(57); + V[4] += 0x682e6ff3 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x682e6ff3 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[1] += 0x84c87814 + V[5] + W[6] + ch(58) + rotr26(V[2]); - V[5] = 0x84c87814 + V[5] + W[6] + ch(58) + rotr26(V[2]) + rotr30(V[6]) + ma(58); + V[3] += 0x748f82ee + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x748f82ee + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[0] += 0x8cc70208 + V[4] + W[7] + ch(59) + rotr26(V[1]); - V[4] = 0x8cc70208 + V[4] + W[7] + ch(59) + rotr26(V[1]) + rotr30(V[5]) + ma(59); + V[2] += 0x78a5636f + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x78a5636f + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[7] += 0x90befffa + V[3] + W[8] + ch(60) + rotr26(V[0]); - V[3] = 0x90befffa + V[3] + W[8] + ch(60) + rotr26(V[0]) + rotr30(V[4]) + ma(60); + V[1] += 0x84c87814 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x84c87814 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[6] += 0xa4506ceb + V[2] + W[9] + ch(61) + rotr26(V[7]); - V[2] = 0xa4506ceb + V[2] + W[9] + ch(61) + rotr26(V[7]) + rotr30(V[3]) + ma(61); + V[0] += 0x8cc70208 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x8cc70208 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[5] += 0xbef9a3f7 + V[1] + W[10] + ch(62) + rotr26(V[6]); - V[1] = 0xbef9a3f7 + V[1] + W[10] + ch(62) + rotr26(V[6]) + rotr30(V[2]) + ma(62); + V[7] += 0x90befffa + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x90befffa + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[4] += 0xc67178f2 + V[0] + W[11] + ch(63) + rotr26(V[5]); - V[0] = 0xc67178f2 + V[0] + W[11] + ch(63) + rotr26(V[5]) + rotr30(V[1]) + ma(63); + V[6] += 0xa4506ceb + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xa4506ceb + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0xbef9a3f7 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xbef9a3f7 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0xc67178f2 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc67178f2 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); //---------------------------------------------------------------------------------- @@ -353,247 +346,283 @@ __kernel W[7] = state7 + V[7]; // 0x98c7e2a2 + W[0] - u state0AaddV0 = state0A + V[0]; + const u state0AaddV0 = state0A + V[0]; // 0xfc08884d + W[0] - u state0BaddV0 = state0B + V[0]; + const u state0BaddV0 = state0B + V[0]; - V[2] = 0x3c6ef372 + (V[6] = 0x90bb1e3c + W[1] + Ch(state0AaddV0, 0x510e527fU, 0x9b05688cU) + rotr26(state0AaddV0)); - V[6] += rotr30(state0BaddV0) + Ma(0x6a09e667U, 0xbb67ae85U, state0BaddV0); + // 0x90bb1e3c + W[1] + const u state1AaddV1 = state1A + V[1]; + // 0x50c6645b + W[2] + const u state2AaddV2 = state2A + V[2]; + // 0x3ac42e24 + W[3] + const u state3AaddV3 = state3A + V[3]; + // 0x3956c25b + W[4] + const u state4AaddV4 = state4A + V[4]; + // 0x59f111f1 + W[5] + const u state5AaddV5 = state5A + V[5]; + // 0x923f82a4 + W[6] + const u state6AaddV6 = state6A + V[6]; + // 0xab1c5ed5 + W[7] + const u state7AaddV7 = state7A + V[7]; + + V[2] = 0x3c6ef372 + (V[6] = state1AaddV1 + ch(state0AaddV0, 0x510e527fU, 0x9b05688cU) + rotr26(state0AaddV0)); + V[6] += rotr30(state0BaddV0) + ma(0x6a09e667U, 0xbb67ae85U, state0BaddV0); - V[1] = 0xbb67ae85 + (V[5] = 0x50c6645b + W[2] + Ch(V[2], state0AaddV0, 0x510e527fU) + rotr26(V[2])); - V[5] += rotr30(V[6]) + Ma(state0BaddV0, 0x6a09e667U, V[6]); + V[1] = 0xbb67ae85 + (V[5] = state2AaddV2 + ch(V[2], state0AaddV0, 0x510e527fU) + rotr26(V[2])); + V[5] += rotr30(V[6]) + ma(state0BaddV0, 0x6a09e667U, V[6]); - V[0] = 0x6a09e667 + (V[4] = 0x3ac42e24 + W[3] + Ch(V[1], V[2], state0AaddV0) + rotr26(V[1])); - V[4] += rotr30(V[5]) + Ma(V[6], state0BaddV0, V[5]); + V[0] = 0x6a09e667 + (V[4] = state3AaddV3 + ch(V[1], V[2], state0AaddV0) + rotr26(V[1])); + V[4] += rotr30(V[5]) + ma(V[6], state0BaddV0, V[5]); - V[7] = (state0BaddV0) + (V[3] = 0x3956c25b + state0AaddV0 + W[4] + Ch(V[0], V[1], V[2]) + rotr26(V[0])); - V[3] += rotr30(V[4]) + Ma(V[5], V[6], V[4]); + V[7] = state0BaddV0 + (V[3] = state4AaddV4 + state0AaddV0 + ch(V[0], V[1], V[2]) + rotr26(V[0])); + V[3] += rotr30(V[4]) + ma(V[5], V[6], V[4]); -//--------------- ch() + ma() replaced above --------------- + V[6] += (V[2] += state5AaddV5 + ch(V[7], V[0], V[1]) + rotr26(V[7])); + V[2] += rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[6] += 0x59f111f1 + V[2] + W[5] + ch(69) + rotr26(V[7]); - V[2] = 0x59f111f1 + V[2] + W[5] + ch(69) + rotr26(V[7]) + rotr30(V[3]) + ma(69); + V[5] += (V[1] += state6AaddV6 + ch(V[6], V[7], V[0]) + rotr26(V[6])); + V[1] += rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[5] += 0x923f82a4 + V[1] + W[6] + ch(70) + rotr26(V[6]); - V[1] = 0x923f82a4 + V[1] + W[6] + ch(70) + rotr26(V[6]) + rotr30(V[2]) + ma(70); + V[4] += (V[0] += state7AaddV7 + ch(V[5], V[6], V[7]) + rotr26(V[5])); + V[0] += rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[4] += 0xab1c5ed5 + V[0] + W[7] + ch(71) + rotr26(V[5]); - V[0] = 0xab1c5ed5 + V[0] + W[7] + ch(71) + rotr26(V[5]) + rotr30(V[1]) + ma(71); + V[3] += 0x5807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x5807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[3] += 0x5807aa98 + V[7] + ch(72) + rotr26(V[4]); - V[7] = 0x5807aa98 + V[7] + ch(72) + rotr26(V[4]) + rotr30(V[0]) + ma(72); + V[2] += 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[2] += 0x12835b01 + V[6] + ch(73) + rotr26(V[3]); - V[6] = 0x12835b01 + V[6] + ch(73) + rotr26(V[3]) + rotr30(V[7]) + ma(73); + V[1] += 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[1] += 0x243185be + V[5] + ch(74) + rotr26(V[2]); - V[5] = 0x243185be + V[5] + ch(74) + rotr26(V[2]) + rotr30(V[6]) + ma(74); + V[0] += 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[0] += 0x550c7dc3 + V[4] + ch(75) + rotr26(V[1]); - V[4] = 0x550c7dc3 + V[4] + ch(75) + rotr26(V[1]) + rotr30(V[5]) + ma(75); + V[7] += 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[7] += 0x72be5d74 + V[3] + ch(76) + rotr26(V[0]); - V[3] = 0x72be5d74 + V[3] + ch(76) + rotr26(V[0]) + rotr30(V[4]) + ma(76); + V[6] += 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[6] += 0x80deb1fe + V[2] + ch(77) + rotr26(V[7]); - V[2] = 0x80deb1fe + V[2] + ch(77) + rotr26(V[7]) + rotr30(V[3]) + ma(77); + V[5] += 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[5] += 0x9bdc06a7 + V[1] + ch(78) + rotr26(V[6]); - V[1] = 0x9bdc06a7 + V[1] + ch(78) + rotr26(V[6]) + rotr30(V[2]) + ma(78); - - V[4] += 0xc19bf274 + V[0] + ch(79) + rotr26(V[5]); - V[0] = 0xc19bf274 + V[0] + ch(79) + rotr26(V[5]) + rotr30(V[1]) + ma(79); + V[4] += 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); //---------------------------------------------------------------------------------- W[0] = W[0] + rotr25(W[1]); - W[1] = 0x00a00000 + W[1] + rotr25(W[2]); + W[1] = W[1] + 0x00a00000 + rotr25(W[2]); W[2] = W[2] + rotr15(W[0]) + rotr25(W[3]); W[3] = W[3] + rotr15(W[1]) + rotr25(W[4]); W[4] = W[4] + rotr15(W[2]) + rotr25(W[5]); W[5] = W[5] + rotr15(W[3]) + rotr25(W[6]); - W[6] = 0x00000100 + W[6] + rotr15(W[4]) + rotr25(W[7]); - W[7] = 0x11002000 + W[7] + W[0] + rotr15(W[5]); - W[8] = 0x80000000 + W[1] + rotr15(W[6]); + W[6] = W[6] + 0x00000100 + rotr15(W[4]) + rotr25(W[7]); + W[7] = W[7] + W[0] + 0x11002000 + rotr15(W[5]); + W[8] = W[1] + 0x80000000 + rotr15(W[6]); W[9] = W[2] + rotr15(W[7]); W[10] = W[3] + rotr15(W[8]); W[11] = W[4] + rotr15(W[9]); W[12] = W[5] + rotr15(W[10]); W[13] = W[6] + rotr15(W[11]); - W[14] = 0x00400022 + W[7] + rotr15( W[12]); - W[15] = 0x00000100 + W[8] + rotr15( W[13]) + rotr25(W[0]); - W[16] = W[0] + W[9] + rotr15( W[14]) + rotr25(W[1]); + W[14] = W[7] + 0x00400022 + rotr15(W[12]); + W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]); - V[3] += 0xe49b69c1 + V[7] + W[0] + ch(80) + rotr26(V[4]); - V[7] = 0xe49b69c1 + V[7] + W[0] + ch(80) + rotr26(V[4]) + rotr30(V[0]) + ma(80); + V[3] += 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[2] += 0xefbe4786 + V[6] + W[1] + ch(81) + rotr26(V[3]); - V[6] = 0xefbe4786 + V[6] + W[1] + ch(81) + rotr26(V[3]) + rotr30(V[7]) + ma(81); + V[2] += 0xefbe4786 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xefbe4786 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[1] += 0x0fc19dc6 + V[5] + W[2] + ch(82) + rotr26(V[2]); - V[5] = 0x0fc19dc6 + V[5] + W[2] + ch(82) + rotr26(V[2]) + rotr30(V[6]) + ma(82); + V[1] += 0x0fc19dc6 + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x0fc19dc6 + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[0] += 0x240ca1cc + V[4] + W[3] + ch(83) + rotr26(V[1]); - V[4] = 0x240ca1cc + V[4] + W[3] + ch(83) + rotr26(V[1]) + rotr30(V[5]) + ma(83); + V[0] += 0x240ca1cc + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x240ca1cc + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[7] += 0x2de92c6f + V[3] + W[4] + ch(84) + rotr26(V[0]); - V[3] = 0x2de92c6f + V[3] + W[4] + ch(84) + rotr26(V[0]) + rotr30(V[4]) + ma(84); + V[7] += 0x2de92c6f + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x2de92c6f + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[6] += 0x4a7484aa + V[2] + W[5] + ch(85) + rotr26(V[7]); - V[2] = 0x4a7484aa + V[2] + W[5] + ch(85) + rotr26(V[7]) + rotr30(V[3]) + ma(85); + V[6] += 0x4a7484aa + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4a7484aa + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[5] += 0x5cb0a9dc + V[1] + W[6] + ch(86) + rotr26(V[6]); - V[1] = 0x5cb0a9dc + V[1] + W[6] + ch(86) + rotr26(V[6]) + rotr30(V[2]) + ma(86); + V[5] += 0x5cb0a9dc + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5cb0a9dc + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[4] += 0x76f988da + V[0] + W[7] + ch(87) + rotr26(V[5]); - V[0] = 0x76f988da + V[0] + W[7] + ch(87) + rotr26(V[5]) + rotr30(V[1]) + ma(87); + V[4] += 0x76f988da + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x76f988da + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[3] += 0x983e5152 + V[7] + W[8] + ch(88) + rotr26(V[4]); - V[7] = 0x983e5152 + V[7] + W[8] + ch(88) + rotr26(V[4]) + rotr30(V[0]) + ma(88); + V[3] += 0x983e5152 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x983e5152 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[2] += 0xa831c66d + V[6] + W[9] + ch(89) + rotr26(V[3]); - V[6] = 0xa831c66d + V[6] + W[9] + ch(89) + rotr26(V[3]) + rotr30(V[7]) + ma(89); + V[2] += 0xa831c66d + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa831c66d + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[1] += 0xb00327c8 + V[5] + W[10] + ch(90) + rotr26(V[2]); - V[5] = 0xb00327c8 + V[5] + W[10] + ch(90) + rotr26(V[2]) + rotr30(V[6]) + ma(90); + V[1] += 0xb00327c8 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xb00327c8 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[0] += 0xbf597fc7 + V[4] + W[11] + ch(91) + rotr26(V[1]); - V[4] = 0xbf597fc7 + V[4] + W[11] + ch(91) + rotr26(V[1]) + rotr30(V[5]) + ma(91); + V[0] += 0xbf597fc7 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xbf597fc7 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[7] += 0xc6e00bf3 + V[3] + W[12] + ch(92) + rotr26(V[0]); - V[3] = 0xc6e00bf3 + V[3] + W[12] + ch(92) + rotr26(V[0]) + rotr30(V[4]) + ma(92); + V[7] += 0xc6e00bf3 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xc6e00bf3 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[6] += 0xd5a79147 + V[2] + W[13] + ch(93) + rotr26(V[7]); - V[2] = 0xd5a79147 + V[2] + W[13] + ch(93) + rotr26(V[7]) + rotr30(V[3]) + ma(93); + V[6] += 0xd5a79147 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd5a79147 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[5] += 0x06ca6351 + V[1] + W[14] + ch(94) + rotr26(V[6]); - V[1] = 0x06ca6351 + V[1] + W[14] + ch(94) + rotr26(V[6]) + rotr30(V[2]) + ma(94); + V[5] += 0x06ca6351 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x06ca6351 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[4] += 0x14292967 + V[0] + W[15] + ch(95) + rotr26(V[5]); - V[0] = 0x14292967 + V[0] + W[15] + ch(95) + rotr26(V[5]) + rotr30(V[1]) + ma(95); - - V[3] += 0x27b70a85 + V[7] + W[16] + ch(96) + rotr26(V[4]); - V[7] = 0x27b70a85 + V[7] + W[16] + ch(96) + rotr26(V[4]) + rotr30(V[0]) + ma(96); + V[4] += 0x14292967 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x14292967 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); //---------------------------------------------------------------------------------- - W[0] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[1] = W[2] + W[11] + rotr15(W[16]) + rotr25( W[3]); - W[2] = W[3] + W[12] + rotr15( W[0]) + rotr25( W[4]); - W[3] = W[4] + W[13] + rotr15( W[1]) + rotr25( W[5]); - W[4] = W[5] + W[14] + rotr15( W[2]) + rotr25( W[6]); - W[5] = W[6] + W[15] + rotr15( W[3]) + rotr25( W[7]); - W[6] = W[7] + W[16] + rotr15( W[4]) + rotr25( W[8]); - W[7] = W[8] + W[0] + rotr15( W[5]) + rotr25( W[9]); - W[8] = W[9] + W[1] + rotr15( W[6]) + rotr25(W[10]); - W[9] = W[10] + W[2] + rotr15( W[7]) + rotr25(W[11]); - W[10] = W[11] + W[3] + rotr15( W[8]) + rotr25(W[12]); - W[11] = W[12] + W[4] + rotr15( W[9]) + rotr25(W[13]); - W[12] = W[13] + W[5] + rotr15(W[10]) + rotr25(W[14]); - W[13] = W[14] + W[6] + rotr15(W[11]) + rotr25(W[15]); - W[14] = W[15] + W[7] + rotr15(W[12]) + rotr25(W[16]); - W[15] = W[16] + W[8] + rotr15(W[13]) + rotr25( W[0]); - W[16] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); + W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); + W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); - V[2] += 0x2e1b2138 + V[6] + W[0] + ch(97) + rotr26(V[3]); - V[6] = 0x2e1b2138 + V[6] + W[0] + ch(97) + rotr26(V[3]) + rotr30(V[7]) + ma(97); + V[3] += 0x27b70a85 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x27b70a85 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[1] += 0x4d2c6dfc + V[5] + W[1] + ch(98) + rotr26(V[2]); - V[5] = 0x4d2c6dfc + V[5] + W[1] + ch(98) + rotr26(V[2]) + rotr30(V[6]) + ma(98); + V[2] += 0x2e1b2138 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x2e1b2138 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[0] += 0x53380d13 + V[4] + W[2] + ch(99) + rotr26(V[1]); - V[4] = 0x53380d13 + V[4] + W[2] + ch(99) + rotr26(V[1]) + rotr30(V[5]) + ma(99); + V[1] += 0x4d2c6dfc + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x4d2c6dfc + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[7] += 0x650a7354 + V[3] + W[3] + ch(100) + rotr26(V[0]); - V[3] = 0x650a7354 + V[3] + W[3] + ch(100) + rotr26(V[0]) + rotr30(V[4]) + ma(100); + V[0] += 0x53380d13 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x53380d13 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[6] += 0x766a0abb + V[2] + W[4] + ch(101) + rotr26(V[7]); - V[2] = 0x766a0abb + V[2] + W[4] + ch(101) + rotr26(V[7]) + rotr30(V[3]) + ma(101); + V[7] += 0x650a7354 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x650a7354 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[5] += 0x81c2c92e + V[1] + W[5] + ch(102) + rotr26(V[6]); - V[1] = 0x81c2c92e + V[1] + W[5] + ch(102) + rotr26(V[6]) + rotr30(V[2]) + ma(102); + V[6] += 0x766a0abb + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x766a0abb + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[4] += 0x92722c85 + V[0] + W[6] + ch(103) + rotr26(V[5]); - V[0] = 0x92722c85 + V[0] + W[6] + ch(103) + rotr26(V[5]) + rotr30(V[1]) + ma(103); + V[5] += 0x81c2c92e + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x81c2c92e + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[3] += 0xa2bfe8a1 + V[7] + W[7] + ch(104) + rotr26(V[4]); - V[7] = 0xa2bfe8a1 + V[7] + W[7] + ch(104) + rotr26(V[4]) + rotr30(V[0]) + ma(104); + V[4] += 0x92722c85 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x92722c85 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[2] += 0xa81a664b + V[6] + W[8] + ch(105) + rotr26(V[3]); - V[6] = 0xa81a664b + V[6] + W[8] + ch(105) + rotr26(V[3]) + rotr30(V[7]) + ma(105); + V[3] += 0xa2bfe8a1 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xa2bfe8a1 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[1] += 0xc24b8b70 + V[5] + W[9] + ch(106) + rotr26(V[2]); - V[5] = 0xc24b8b70 + V[5] + W[9] + ch(106) + rotr26(V[2]) + rotr30(V[6]) + ma(106); + V[2] += 0xa81a664b + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa81a664b + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[0] += 0xc76c51a3 + V[4] + W[10] + ch(107) + rotr26(V[1]); - V[4] = 0xc76c51a3 + V[4] + W[10] + ch(107) + rotr26(V[1]) + rotr30(V[5]) + ma(107); + V[1] += 0xc24b8b70 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xc24b8b70 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[7] += 0xd192e819 + V[3] + W[11] + ch(108) + rotr26(V[0]); - V[3] = 0xd192e819 + V[3] + W[11] + ch(108) + rotr26(V[0]) + rotr30(V[4]) + ma(108); + V[0] += 0xc76c51a3 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xc76c51a3 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[6] += 0xd6990624 + V[2] + W[12] + ch(109) + rotr26(V[7]); - V[2] = 0xd6990624 + V[2] + W[12] + ch(109) + rotr26(V[7]) + rotr30(V[3]) + ma(109); + V[7] += 0xd192e819 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xd192e819 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[5] += 0xf40e3585 + V[1] + W[13] + ch(110) + rotr26(V[6]); - V[1] = 0xf40e3585 + V[1] + W[13] + ch(110) + rotr26(V[6]) + rotr30(V[2]) + ma(110); + V[6] += 0xd6990624 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd6990624 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[4] += 0x106aa070 + V[0] + W[14] + ch(111) + rotr26(V[5]); - V[0] = 0x106aa070 + V[0] + W[14] + ch(111) + rotr26(V[5]) + rotr30(V[1]) + ma(111); + V[5] += 0xf40e3585 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xf40e3585 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[3] += 0x19a4c116 + V[7] + W[15] + ch(112) + rotr26(V[4]); - V[7] = 0x19a4c116 + V[7] + W[15] + ch(112) + rotr26(V[4]) + rotr30(V[0]) + ma(112); - - V[2] += 0x1e376c08 + V[6] + W[16] + ch(113) + rotr26(V[3]); - V[6] = 0x1e376c08 + V[6] + W[16] + ch(113) + rotr26(V[3]) + rotr30(V[7]) + ma(113); + V[4] += 0x106aa070 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x106aa070 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); //---------------------------------------------------------------------------------- - W[0] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[1] = W[2] + W[11] + rotr15(W[16]) + rotr25( W[3]); - W[2] = W[3] + W[12] + rotr15( W[0]) + rotr25( W[4]); - W[3] = W[4] + W[13] + rotr15( W[1]) + rotr25( W[5]); - W[4] = W[5] + W[14] + rotr15( W[2]) + rotr25( W[6]); - W[5] = W[6] + W[15] + rotr15( W[3]) + rotr25( W[7]); - W[6] = W[7] + W[16] + rotr15( W[4]) + rotr25( W[8]); - W[7] = W[8] + W[0] + rotr15( W[5]) + rotr25( W[9]); - W[8] = W[9] + W[1] + rotr15( W[6]) + rotr25(W[10]); - W[9] = W[10] + W[2] + rotr15( W[7]) + rotr25(W[11]); - W[10] = W[11] + W[3] + rotr15( W[8]) + rotr25(W[12]); + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); - V[1] += 0x2748774c + V[5] + W[0] + ch(114) + rotr26(V[2]); - V[5] = 0x2748774c + V[5] + W[0] + ch(114) + rotr26(V[2]) + rotr30(V[6]) + ma(114); + V[3] += 0x19a4c116 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x19a4c116 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[0] += 0x34b0bcb5 + V[4] + W[1] + ch(115) + rotr26(V[1]); - V[4] = 0x34b0bcb5 + V[4] + W[1] + ch(115) + rotr26(V[1]) + rotr30(V[5]) + ma(115); + V[2] += 0x1e376c08 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x1e376c08 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - V[7] += 0x391c0cb3 + V[3] + W[2] + ch(116) + rotr26(V[0]); - V[3] = 0x391c0cb3 + V[3] + W[2] + ch(116) + rotr26(V[0]) + rotr30(V[4]) + ma(116); + V[1] += 0x2748774c + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x2748774c + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - V[6] += 0x4ed8aa4a + V[2] + W[3] + ch(117) + rotr26(V[7]); - V[2] = 0x4ed8aa4a + V[2] + W[3] + ch(117) + rotr26(V[7]) + rotr30(V[3]) + ma(117); + V[0] += 0x34b0bcb5 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x34b0bcb5 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - V[5] += 0x5b9cca4f + V[1] + W[4] + ch(118) + rotr26(V[6]); - V[1] = 0x5b9cca4f + V[1] + W[4] + ch(118) + rotr26(V[6]) + rotr30(V[2]) + ma(118); + V[7] += 0x391c0cb3 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x391c0cb3 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - V[4] += 0x682e6ff3 + V[0] + W[5] + ch(119) + rotr26(V[5]); - V[0] = 0x682e6ff3 + V[0] + W[5] + ch(119) + rotr26(V[5]) + rotr30(V[1]) + ma(119); + V[6] += 0x4ed8aa4a + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4ed8aa4a + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - V[3] += 0x748f82ee + V[7] + W[6] + ch(120) + rotr26(V[4]); - V[7] = 0x748f82ee + V[7] + W[6] + ch(120) + rotr26(V[4]) + rotr30(V[0]) + ma(120); + V[5] += 0x5b9cca4f + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5b9cca4f + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - V[2] += 0x78a5636f + V[6] + W[7] + ch(121) + rotr26(V[3]); + V[4] += 0x682e6ff3 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x682e6ff3 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - V[1] += 0x84c87814 + V[5] + W[8] + ch(122) + rotr26(V[2]); + V[3] += 0x748f82ee + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x748f82ee + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - V[0] += 0x8cc70208 + V[4] + W[9] + ch(123) + rotr26(V[1]); + V[2] += 0x78a5636f + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[7] += V[3] + W[10] + ch(124) + rotr26(V[0]); + V[1] += 0x84c87814 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + + V[0] += 0x8cc70208 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + + V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); #define FOUND (0x80) #define NFLAG (0x7F) -#ifdef VECTORS4 +#ifdef VECTORS8 + V[7] ^= 0x136032ed; + + bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7; + + if (!result) { + if (!V[7].s0) + output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0; + if (!V[7].s1) + output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1; + if (!V[7].s2) + output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2; + if (!V[7].s3) + output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3; + if (!V[7].s4) + output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4; + if (!V[7].s5) + output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5; + if (!V[7].s6) + output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6; + if (!V[7].s7) + output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7; + } +#elif defined VECTORS4 V[7] ^= 0x136032ed; bool result = V[7].x & V[7].y & V[7].z & V[7].w; @@ -608,21 +637,19 @@ __kernel if (!V[7].w) output[FOUND] = output[NFLAG & nonce.w] = nonce.w; } +#elif defined VECTORS2 + V[7] ^= 0x136032ed; + + bool result = V[7].x & V[7].y; + + if (!result) { + if (!V[7].x) + output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + if (!V[7].y) + output[FOUND] = output[NFLAG & nonce.y] = nonce.y; + } #else - #ifdef VECTORS2 - V[7] ^= 0x136032ed; - - bool result = V[7].x & V[7].y; - - if (!result) { - if (!V[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (!V[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; - } - #else - if (V[7] == 0x136032ed) - output[FOUND] = output[NFLAG & nonce] = nonce; - #endif + if (V[7] == 0x136032ed) + output[FOUND] = output[NFLAG & nonce] = nonce; #endif } diff --git a/findnonce.c b/findnonce.c index 61bc82bb..037af0f2 100644 --- a/findnonce.c +++ b/findnonce.c @@ -120,6 +120,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { blk->zeroA = blk->ctx_a + 0x98c7e2a2; blk->zeroB = blk->ctx_a + 0xfc08884d; + blk->oneA = blk->ctx_b + 0x90bb1e3c; + blk->twoA = blk->ctx_c + 0x50c6645b; + blk->threeA = blk->ctx_d + 0x3ac42e24; + blk->fourA = blk->ctx_e + SHA256_K[4]; + blk->fiveA = blk->ctx_f + SHA256_K[5]; + blk->sixA = blk->ctx_g + SHA256_K[6]; + blk->sevenA = blk->ctx_h + SHA256_K[7]; } #define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10))) @@ -178,7 +185,7 @@ static void send_nonce(struct pc_data *pcd, cl_uint nonce) E = blk->cty_e; F = blk->cty_f; G = blk->cty_g; H = blk->cty_h; W[0] = blk->merkle; W[1] = blk->ntime; - W[2] = blk->nbits; W[3] = nonce;; + W[2] = blk->nbits; W[3] = nonce; W[4] = 0x80000000; W[5] = 0x00000000; W[6] = 0x00000000; W[7] = 0x00000000; W[8] = 0x00000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000; W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000280; @@ -202,7 +209,7 @@ static void send_nonce(struct pc_data *pcd, cl_uint nonce) FR(32); FR(40); FR(48); PFR(56); - if (likely(H == 0xA41F32E7)) { + if (likely(H == 0xa41f32e7)) { if (unlikely(submit_nonce(thr, work, nonce) == false)) applog(LOG_ERR, "Failed to submit work, exiting"); } else { diff --git a/miner.h b/miner.h index 6e6dad57..68ce55b3 100644 --- a/miner.h +++ b/miner.h @@ -507,6 +507,7 @@ typedef struct { /* For diakgcn */ cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17; cl_uint zeroA, zeroB; + cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA; } dev_blk_ctx; #else typedef struct {