From bff58c3bed937bd027e46907acd1eab7327e838b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 24 Jul 2012 20:55:34 +1000 Subject: [PATCH] Add back in new cl files. --- diablo120724.cl | 1274 +++++++++++++++++++++++++++++++++++++++++++ diakgcn120724.cl | 587 ++++++++++++++++++++ phatk120724.cl | 417 ++++++++++++++ poclbm120724.cl | 1353 ++++++++++++++++++++++++++++++++++++++++++++++ scrypt120724.cl | 757 ++++++++++++++++++++++++++ 5 files changed, 4388 insertions(+) create mode 100644 diablo120724.cl create mode 100644 diakgcn120724.cl create mode 100644 phatk120724.cl create mode 100644 poclbm120724.cl create mode 100644 scrypt120724.cl diff --git a/diablo120724.cl b/diablo120724.cl new file mode 100644 index 00000000..4b64c300 --- /dev/null +++ b/diablo120724.cl @@ -0,0 +1,1274 @@ +/* + * DiabloMiner - OpenCL miner for BitCoin + * Copyright (C) 2010, 2011, 2012 Patrick McFarland + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more detail). + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifdef VECTORS4 + typedef uint4 z; +#elif defined(VECTORS2) + typedef uint2 z; +#else + typedef uint z; +#endif + +#ifdef BITALIGN +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#define Zrotr(a, b) amd_bitalign((z)a, (z)a, (z)(32 - b)) +#else +#define Zrotr(a, b) rotate((z)a, (z)b) +#endif + +#ifdef BFI_INT +#define ZCh(a, b, c) amd_bytealign(a, b, c) +#define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a)) +#else +#define ZCh(a, b, c) bitselect((z)c, (z)b, (z)a) +#define ZMa(a, b, c) bitselect((z)a, (z)b, (z)c ^ (z)a) +#endif + +#define ZR25(n) ((Zrotr((n), 25) ^ Zrotr((n), 14) ^ ((n) >> 3U))) +#define ZR15(n) ((Zrotr((n), 15) ^ Zrotr((n), 13) ^ ((n) >> 10U))) +#define ZR26(n) ((Zrotr((n), 26) ^ Zrotr((n), 21) ^ Zrotr((n), 7))) +#define ZR30(n) ((Zrotr((n), 30) ^ Zrotr((n), 19) ^ Zrotr((n), 10))) + +__kernel +__attribute__((vec_type_hint(z))) +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +void search( +#ifndef GOFFSET + const z base, +#endif + const uint PreVal4_state0, const uint PreVal4_state0_k7, + const uint PreVal4_T1, + const uint W18, const uint W19, + const uint W16, const uint W17, + const uint W16_plus_K16, const uint W17_plus_K17, + const uint W31, const uint W32, + const uint d1, const uint b1, const uint c1, + const uint h1, const uint f1, const uint g1, + const uint c1_plus_k5, const uint b1_plus_k6, + const uint state0, const uint state1, const uint state2, const uint state3, + const uint state4, const uint state5, const uint state6, const uint state7, + __global uint * output) +{ + + z ZA[930]; + +#ifdef GOFFSET + const z Znonce = (uint)(get_global_id(0)); +#else + const z Znonce = base + (uint)(get_global_id(0)); +#endif + + ZA[15] = Znonce + PreVal4_state0; + + ZA[16] = (ZCh(ZA[15], b1, c1) + d1) + ZR26(ZA[15]); + ZA[26] = Znonce + PreVal4_T1; + + ZA[27] = ZMa(f1, g1, ZA[26]) + ZR30(ZA[26]); + ZA[17] = ZA[16] + h1; + + ZA[19] = (ZCh(ZA[17], ZA[15], b1) + c1_plus_k5) + ZR26(ZA[17]); + ZA[28] = ZA[27] + ZA[16]; + + ZA[548] = ZMa(ZA[26], f1, ZA[28]) + ZR30(ZA[28]); + ZA[20] = ZA[19] + g1; + + ZA[22] = (ZCh(ZA[20], ZA[17], ZA[15]) + b1_plus_k6) + ZR26(ZA[20]); + ZA[29] = ZA[548] + ZA[19]; + + ZA[549] = ZMa(ZA[28], ZA[26], ZA[29]) + ZR30(ZA[29]); + ZA[23] = ZA[22] + f1; + + ZA[24] = ZCh(ZA[23], ZA[20], ZA[17]) + ZR26(ZA[23]); + ZA[180] = Znonce + PreVal4_state0_k7; + ZA[30] = ZA[549] + ZA[22]; + + ZA[31] = ZMa(ZA[29], ZA[28], ZA[30]) + ZR30(ZA[30]); + ZA[181] = ZA[180] + ZA[24]; + + ZA[182] = ZA[181] + ZA[26]; + ZA[183] = ZA[181] + ZA[31]; + ZA[18] = ZA[17] + 0xd807aa98U; + + ZA[186] = (ZCh(ZA[182], ZA[23], ZA[20]) + ZA[18]) + ZR26(ZA[182]); + ZA[184] = ZMa(ZA[30], ZA[29], ZA[183]) + ZR30(ZA[183]); + + ZA[187] = ZA[186] + ZA[28]; + ZA[188] = ZA[186] + ZA[184]; + ZA[21] = ZA[20] + 0x12835b01U; + + ZA[191] = (ZCh(ZA[187], ZA[182], ZA[23]) + ZA[21]) + ZR26(ZA[187]); + ZA[189] = ZMa(ZA[183], ZA[30], ZA[188]) + ZR30(ZA[188]); + + ZA[192] = ZA[191] + ZA[29]; + ZA[193] = ZA[191] + ZA[189]; + ZA[25] = ZA[23] + 0x243185beU; + + ZA[196] = (ZCh(ZA[192], ZA[187], ZA[182]) + ZA[25]) + ZR26(ZA[192]); + ZA[194] = ZMa(ZA[188], ZA[183], ZA[193]) + ZR30(ZA[193]); + + ZA[197] = ZA[196] + ZA[30]; + ZA[198] = ZA[196] + ZA[194]; + ZA[185] = ZA[182] + 0x550c7dc3U; + + ZA[201] = (ZCh(ZA[197], ZA[192], ZA[187]) + ZA[185]) + ZR26(ZA[197]); + ZA[199] = ZMa(ZA[193], ZA[188], ZA[198]) + ZR30(ZA[198]); + + ZA[202] = ZA[201] + ZA[183]; + ZA[203] = ZA[201] + ZA[199]; + ZA[190] = ZA[187] + 0x72be5d74U; + + ZA[206] = (ZCh(ZA[202], ZA[197], ZA[192]) + ZA[190]) + ZR26(ZA[202]); + ZA[204] = ZMa(ZA[198], ZA[193], ZA[203]) + ZR30(ZA[203]); + + ZA[207] = ZA[206] + ZA[188]; + ZA[208] = ZA[206] + ZA[204]; + ZA[195] = ZA[192] + 0x80deb1feU; + + ZA[211] = (ZCh(ZA[207], ZA[202], ZA[197]) + ZA[195]) + ZR26(ZA[207]); + ZA[209] = ZMa(ZA[203], ZA[198], ZA[208]) + ZR30(ZA[208]); + + ZA[212] = ZA[193] + ZA[211]; + ZA[213] = ZA[211] + ZA[209]; + ZA[200] = ZA[197] + 0x9bdc06a7U; + + ZA[216] = (ZCh(ZA[212], ZA[207], ZA[202]) + ZA[200]) + ZR26(ZA[212]); + ZA[214] = ZMa(ZA[208], ZA[203], ZA[213]) + ZR30(ZA[213]); + + ZA[217] = ZA[198] + ZA[216]; + ZA[218] = ZA[216] + ZA[214]; + ZA[205] = ZA[202] + 0xc19bf3f4U; + + ZA[220] = (ZCh(ZA[217], ZA[212], ZA[207]) + ZA[205]) + ZR26(ZA[217]); + ZA[219] = ZMa(ZA[213], ZA[208], ZA[218]) + ZR30(ZA[218]); + + ZA[222] = ZA[203] + ZA[220]; + ZA[223] = ZA[220] + ZA[219]; + ZA[210] = ZA[207] + W16_plus_K16; + + ZA[226] = (ZCh(ZA[222], ZA[217], ZA[212]) + ZA[210]) + ZR26(ZA[222]); + ZA[225] = ZMa(ZA[218], ZA[213], ZA[223]) + ZR30(ZA[223]); + + ZA[0] = ZR25(Znonce) + W18; + ZA[228] = ZA[226] + ZA[225]; + ZA[227] = ZA[208] + ZA[226]; + ZA[215] = ZA[212] + W17_plus_K17; + + ZA[231] = (ZCh(ZA[227], ZA[222], ZA[217]) + ZA[215]) + ZR26(ZA[227]); + ZA[229] = ZMa(ZA[223], ZA[218], ZA[228]) + ZR30(ZA[228]); + ZA[1] = ZA[0] + 0x0fc19dc6U; + + ZA[232] = ZA[213] + ZA[231]; + ZA[233] = ZA[231] + ZA[229]; + ZA[221] = ZA[217] + ZA[1]; + ZA[32] = Znonce + W19; + + ZA[236] = (ZCh(ZA[232], ZA[227], ZA[222]) + ZA[221]) + ZR26(ZA[232]); + ZA[234] = ZMa(ZA[228], ZA[223], ZA[233]) + ZR30(ZA[233]); + ZA[33] = ZA[32] + 0x240ca1ccU; + + ZA[3] = ZR15(ZA[0]) + 0x80000000U; + ZA[238] = ZA[236] + ZA[234]; + ZA[237] = ZA[218] + ZA[236]; + ZA[224] = ZA[222] + ZA[33]; + + ZA[241] = (ZCh(ZA[237], ZA[232], ZA[227]) + ZA[224]) + ZR26(ZA[237]); + ZA[239] = ZMa(ZA[233], ZA[228], ZA[238]) + ZR30(ZA[238]); + ZA[4] = ZA[3] + 0x2de92c6fU; + + ZA[35] = ZR15(ZA[32]); + ZA[243] = ZA[241] + ZA[239]; + ZA[242] = ZA[223] + ZA[241]; + ZA[230] = ZA[227] + ZA[4]; + + ZA[246] = (ZCh(ZA[242], ZA[237], ZA[232]) + ZA[230]) + ZR26(ZA[242]); + ZA[244] = ZMa(ZA[238], ZA[233], ZA[243]) + ZR30(ZA[243]); + ZA[36] = ZA[35] + 0x4a7484aaU; + + ZA[7] = ZR15(ZA[3]) + 0x00000280U; + ZA[248] = ZA[246] + ZA[244]; + ZA[247] = ZA[228] + ZA[246]; + ZA[235] = ZA[232] + ZA[36]; + + ZA[251] = (ZCh(ZA[247], ZA[242], ZA[237]) + ZA[235]) + ZR26(ZA[247]); + ZA[249] = ZMa(ZA[243], ZA[238], ZA[248]) + ZR30(ZA[248]); + ZA[8] = ZA[7] + 0x5cb0a9dcU; + + ZA[38] = ZR15(ZA[35]) + W16; + ZA[253] = ZA[251] + ZA[249]; + ZA[252] = ZA[233] + ZA[251]; + ZA[240] = ZA[237] + ZA[8]; + + ZA[256] = (ZCh(ZA[252], ZA[247], ZA[242]) + ZA[240]) + ZR26(ZA[252]); + ZA[254] = ZMa(ZA[248], ZA[243], ZA[253]) + ZR30(ZA[253]); + ZA[40] = ZA[38] + 0x76f988daU; + + ZA[10] = ZR15(ZA[7]) + W17; + ZA[258] = ZA[256] + ZA[254]; + ZA[257] = ZA[238] + ZA[256]; + ZA[245] = ZA[242] + ZA[40]; + + ZA[261] = (ZCh(ZA[257], ZA[252], ZA[247]) + ZA[245]) + ZR26(ZA[257]); + ZA[259] = ZMa(ZA[253], ZA[248], ZA[258]) + ZR30(ZA[258]); + ZA[13] = ZA[10] + 0x983e5152U; + + ZA[43] = ZR15(ZA[38]) + ZA[0]; + ZA[263] = ZA[261] + ZA[259]; + ZA[262] = ZA[243] + ZA[261]; + ZA[250] = ZA[247] + ZA[13]; + + ZA[266] = (ZCh(ZA[262], ZA[257], ZA[252]) + ZA[250]) + ZR26(ZA[262]); + ZA[264] = ZMa(ZA[258], ZA[253], ZA[263]) + ZR30(ZA[263]); + ZA[11] = ZR15(ZA[10]); + ZA[45] = ZA[43] + 0xa831c66dU; + + ZA[52] = ZA[11] + ZA[32]; + ZA[267] = ZA[248] + ZA[266]; + ZA[255] = ZA[252] + ZA[45]; + ZA[268] = ZA[266] + ZA[264]; + + ZA[271] = (ZCh(ZA[267], ZA[262], ZA[257]) + ZA[255]) + ZR26(ZA[267]); + ZA[269] = ZMa(ZA[263], ZA[258], ZA[268]) + ZR30(ZA[268]); + ZA[54] = ZA[52] + 0xb00327c8U; + + ZA[48] = ZR15(ZA[43]) + ZA[3]; + ZA[273] = ZA[271] + ZA[269]; + ZA[272] = ZA[253] + ZA[271]; + ZA[260] = ZA[257] + ZA[54]; + + ZA[276] = (ZCh(ZA[272], ZA[267], ZA[262]) + ZA[260]) + ZR26(ZA[272]); + ZA[274] = ZMa(ZA[268], ZA[263], ZA[273]) + ZR30(ZA[273]); + ZA[49] = ZA[48] + 0xbf597fc7U; + + ZA[61] = ZR15(ZA[52]) + ZA[35]; + ZA[278] = ZA[276] + ZA[274]; + ZA[277] = ZA[258] + ZA[276]; + ZA[265] = ZA[262] + ZA[49]; + + ZA[281] = (ZCh(ZA[277], ZA[272], ZA[267]) + ZA[265]) + ZR26(ZA[277]); + ZA[279] = ZMa(ZA[273], ZA[268], ZA[278]) + ZR30(ZA[278]); + ZA[62] = ZA[61] + 0xc6e00bf3U; + + ZA[53] = ZR15(ZA[48]) + ZA[7]; + ZA[283] = ZA[281] + ZA[279]; + ZA[282] = ZA[263] + ZA[281]; + ZA[270] = ZA[267] + ZA[62]; + + ZA[286] = (ZCh(ZA[282], ZA[277], ZA[272]) + ZA[270]) + ZR26(ZA[282]); + ZA[284] = ZMa(ZA[278], ZA[273], ZA[283]) + ZR30(ZA[283]); + ZA[39] = ZA[38] + 0x00A00055U; + ZA[55] = ZA[53] + 0xd5a79147U; + + ZA[66] = ZR15(ZA[61]) + ZA[39]; + ZA[288] = ZA[286] + ZA[284]; + ZA[287] = ZA[268] + ZA[286]; + ZA[275] = ZA[272] + ZA[55]; + + ZA[291] = (ZCh(ZA[287], ZA[282], ZA[277]) + ZA[275]) + ZR26(ZA[287]); + ZA[289] = ZMa(ZA[283], ZA[278], ZA[288]) + ZR30(ZA[288]); + ZA[12] = ZA[10] + W31; + ZA[68] = ZA[66] + 0x06ca6351U; + + ZA[67] = ZR15(ZA[53]) + ZA[12]; + ZA[293] = ZA[291] + ZA[289]; + ZA[292] = ZA[273] + ZA[291]; + ZA[280] = ZA[277] + ZA[68]; + + ZA[296] = (ZCh(ZA[292], ZA[287], ZA[282]) + ZA[280]) + ZR26(ZA[292]); + ZA[294] = ZMa(ZA[288], ZA[283], ZA[293]) + ZR30(ZA[293]); + ZA[2] = ZR25(ZA[0]); + ZA[69] = ZA[67] + 0x14292967U; + ZA[44] = ZA[43] + W32; + + ZA[75] = ZR15(ZA[66]) + ZA[44]; + ZA[298] = ZA[296] + ZA[294]; + ZA[297] = ZA[278] + ZA[296]; + ZA[285] = ZA[282] + ZA[69]; + ZA[5] = ZA[2] + W17; + + ZA[301] = (ZCh(ZA[297], ZA[292], ZA[287]) + ZA[285]) + ZR26(ZA[297]); + ZA[299] = ZMa(ZA[293], ZA[288], ZA[298]) + ZR30(ZA[298]); + ZA[56] = ZA[52] + ZA[5]; + ZA[76] = ZA[75] + 0x27b70a85U; + + ZA[34] = ZR25(ZA[32]) + ZA[0]; + ZA[70] = ZR15(ZA[67]) + ZA[56]; + ZA[302] = ZA[283] + ZA[301]; + ZA[303] = ZA[301] + ZA[299]; + ZA[290] = ZA[287] + ZA[76]; + + ZA[306] = (ZCh(ZA[302], ZA[297], ZA[292]) + ZA[290]) + ZR26(ZA[302]); + ZA[304] = ZMa(ZA[298], ZA[293], ZA[303]) + ZR30(ZA[303]); + ZA[6] = ZR25(ZA[3]); + ZA[77] = ZA[70] + 0x2e1b2138U; + ZA[50] = ZA[34] + ZA[48]; + + ZA[78] = ZR15(ZA[75]) + ZA[50]; + ZA[308] = ZA[306] + ZA[304]; + ZA[307] = ZA[288] + ZA[306]; + ZA[295] = ZA[292] + ZA[77]; + ZA[41] = ZA[32] + ZA[6]; + + ZA[311] = (ZCh(ZA[307], ZA[302], ZA[297]) + ZA[295]) + ZR26(ZA[307]); + ZA[309] = ZMa(ZA[303], ZA[298], ZA[308]) + ZR30(ZA[308]); + ZA[63] = ZA[41] + ZA[61]; + ZA[85] = ZA[78] + 0x4d2c6dfcU; + + ZA[37] = ZR25(ZA[35]) + ZA[3]; + ZA[79] = ZR15(ZA[70]) + ZA[63]; + ZA[312] = ZA[293] + ZA[311]; + ZA[313] = ZA[311] + ZA[309]; + ZA[300] = ZA[297] + ZA[85]; + + ZA[316] = (ZCh(ZA[312], ZA[307], ZA[302]) + ZA[300]) + ZR26(ZA[312]); + ZA[314] = ZMa(ZA[308], ZA[303], ZA[313]) + ZR30(ZA[313]); + ZA[9] = ZR25(ZA[7]); + ZA[86] = ZA[79] + 0x53380d13U; + ZA[57] = ZA[37] + ZA[53]; + + ZA[87] = ZR15(ZA[78]) + ZA[57]; + ZA[318] = ZA[316] + ZA[314]; + ZA[317] = ZA[298] + ZA[316]; + ZA[305] = ZA[302] + ZA[86]; + ZA[46] = ZA[35] + ZA[9]; + + ZA[321] = (ZCh(ZA[317], ZA[312], ZA[307]) + ZA[305]) + ZR26(ZA[317]); + ZA[319] = ZMa(ZA[313], ZA[308], ZA[318]) + ZR30(ZA[318]); + ZA[71] = ZA[46] + ZA[66]; + ZA[92] = ZA[87] + 0x650a7354U; + + ZA[42] = ZR25(ZA[38]) + ZA[7]; + ZA[88] = ZR15(ZA[79]) + ZA[71]; + ZA[322] = ZA[303] + ZA[321]; + ZA[323] = ZA[321] + ZA[319]; + ZA[310] = ZA[307] + ZA[92]; + + ZA[326] = (ZCh(ZA[322], ZA[317], ZA[312]) + ZA[310]) + ZR26(ZA[322]); + ZA[324] = ZMa(ZA[318], ZA[313], ZA[323]) + ZR30(ZA[323]); + ZA[14] = ZR25(ZA[10]); + ZA[93] = ZA[88] + 0x766a0abbU; + ZA[72] = ZA[42] + ZA[67]; + + ZA[94] = ZR15(ZA[87]) + ZA[72]; + ZA[328] = ZA[326] + ZA[324]; + ZA[327] = ZA[308] + ZA[326]; + ZA[315] = ZA[312] + ZA[93]; + ZA[51] = ZA[38] + ZA[14]; + + ZA[331] = (ZCh(ZA[327], ZA[322], ZA[317]) + ZA[315]) + ZR26(ZA[327]); + ZA[329] = ZMa(ZA[323], ZA[318], ZA[328]) + ZR30(ZA[328]); + ZA[80] = ZA[51] + ZA[75]; + ZA[100] = ZA[94] + 0x81c2c92eU; + + ZA[47] = ZR25(ZA[43]) + ZA[10]; + ZA[95] = ZR15(ZA[88]) + ZA[80]; + ZA[332] = ZA[313] + ZA[331]; + ZA[333] = ZA[331] + ZA[329]; + ZA[320] = ZA[317] + ZA[100]; + + ZA[336] = (ZCh(ZA[332], ZA[327], ZA[322]) + ZA[320]) + ZR26(ZA[332]); + ZA[334] = ZMa(ZA[328], ZA[323], ZA[333]) + ZR30(ZA[333]); + ZA[81] = ZA[47] + ZA[70]; + ZA[101] = ZA[95] + 0x92722c85U; + + ZA[58] = ZR25(ZA[52]) + ZA[43]; + ZA[102] = ZR15(ZA[94]) + ZA[81]; + ZA[337] = ZA[318] + ZA[336]; + ZA[338] = ZA[336] + ZA[334]; + ZA[325] = ZA[322] + ZA[101]; + + ZA[341] = (ZCh(ZA[337], ZA[332], ZA[327]) + ZA[325]) + ZR26(ZA[337]); + ZA[339] = ZMa(ZA[333], ZA[328], ZA[338]) + ZR30(ZA[338]); + ZA[89] = ZA[58] + ZA[78]; + ZA[108] = ZA[102] + 0xa2bfe8a1U; + + ZA[59] = ZR25(ZA[48]) + ZA[52]; + ZA[103] = ZR15(ZA[95]) + ZA[89]; + ZA[342] = ZA[323] + ZA[341]; + ZA[343] = ZA[341] + ZA[339]; + ZA[330] = ZA[327] + ZA[108]; + + ZA[346] = (ZCh(ZA[342], ZA[337], ZA[332]) + ZA[330]) + ZR26(ZA[342]); + ZA[344] = ZMa(ZA[338], ZA[333], ZA[343]) + ZR30(ZA[343]); + ZA[90] = ZA[59] + ZA[79]; + ZA[109] = ZA[103] + 0xa81a664bU; + + ZA[64] = ZR25(ZA[61]) + ZA[48]; + ZA[110] = ZR15(ZA[102]) + ZA[90]; + ZA[347] = ZA[328] + ZA[346]; + ZA[348] = ZA[346] + ZA[344]; + ZA[335] = ZA[332] + ZA[109]; + + ZA[351] = (ZCh(ZA[347], ZA[342], ZA[337]) + ZA[335]) + ZR26(ZA[347]); + ZA[349] = ZMa(ZA[343], ZA[338], ZA[348]) + ZR30(ZA[348]); + ZA[60] = ZR25(ZA[53]); + ZA[116] = ZA[110] + 0xc24b8b70U; + ZA[96] = ZA[87] + ZA[64]; + + ZA[111] = ZR15(ZA[103]) + ZA[96]; + ZA[353] = ZA[351] + ZA[349]; + ZA[352] = ZA[333] + ZA[351]; + ZA[340] = ZA[337] + ZA[116]; + ZA[65] = ZA[60] + ZA[61]; + + ZA[356] = (ZCh(ZA[352], ZA[347], ZA[342]) + ZA[340]) + ZR26(ZA[352]); + ZA[354] = ZMa(ZA[348], ZA[343], ZA[353]) + ZR30(ZA[353]); + ZA[97] = ZA[88] + ZA[65]; + ZA[117] = ZA[111] + 0xc76c51a3U; + + ZA[73] = ZR25(ZA[66]) + ZA[53]; + ZA[118] = ZR15(ZA[110]) + ZA[97]; + ZA[357] = ZA[338] + ZA[356]; + ZA[358] = ZA[356] + ZA[354]; + ZA[345] = ZA[342] + ZA[117]; + + ZA[361] = (ZCh(ZA[357], ZA[352], ZA[347]) + ZA[345]) + ZR26(ZA[357]); + ZA[359] = ZMa(ZA[353], ZA[348], ZA[358]) + ZR30(ZA[358]); + ZA[104] = ZA[73] + ZA[94]; + ZA[124] = ZA[118] + 0xd192e819U; + + ZA[74] = ZR25(ZA[67]) + ZA[66]; + ZA[119] = ZR15(ZA[111]) + ZA[104]; + ZA[362] = ZA[343] + ZA[361]; + ZA[363] = ZA[361] + ZA[359]; + ZA[350] = ZA[347] + ZA[124]; + + ZA[366] = (ZCh(ZA[362], ZA[357], ZA[352]) + ZA[350]) + ZR26(ZA[362]); + ZA[364] = ZMa(ZA[358], ZA[353], ZA[363]) + ZR30(ZA[363]); + ZA[105] = ZA[74] + ZA[95]; + ZA[125] = ZA[119] + 0xd6990624U; + + ZA[82] = ZR25(ZA[75]) + ZA[67]; + ZA[126] = ZR15(ZA[118]) + ZA[105]; + ZA[367] = ZA[348] + ZA[366]; + ZA[368] = ZA[366] + ZA[364]; + ZA[355] = ZA[352] + ZA[125]; + + ZA[371] = (ZCh(ZA[367], ZA[362], ZA[357]) + ZA[355]) + ZR26(ZA[367]); + ZA[369] = ZMa(ZA[363], ZA[358], ZA[368]) + ZR30(ZA[368]); + ZA[112] = ZA[102] + ZA[82]; + ZA[132] = ZA[126] + 0xf40e3585U; + + ZA[83] = ZR25(ZA[70]) + ZA[75]; + ZA[127] = ZR15(ZA[119]) + ZA[112]; + ZA[372] = ZA[353] + ZA[371]; + ZA[373] = ZA[371] + ZA[369]; + ZA[360] = ZA[357] + ZA[132]; + + ZA[376] = (ZCh(ZA[372], ZA[367], ZA[362]) + ZA[360]) + ZR26(ZA[372]); + ZA[374] = ZMa(ZA[368], ZA[363], ZA[373]) + ZR30(ZA[373]); + ZA[113] = ZA[103] + ZA[83]; + ZA[133] = ZA[127] + 0x106aa070U; + + ZA[84] = ZR25(ZA[78]) + ZA[70]; + ZA[134] = ZR15(ZA[126]) + ZA[113]; + ZA[377] = ZA[358] + ZA[376]; + ZA[378] = ZA[376] + ZA[374]; + ZA[365] = ZA[362] + ZA[133]; + + ZA[381] = (ZCh(ZA[377], ZA[372], ZA[367]) + ZA[365]) + ZR26(ZA[377]); + ZA[379] = ZMa(ZA[373], ZA[368], ZA[378]) + ZR30(ZA[378]); + ZA[120] = ZA[110] + ZA[84]; + ZA[140] = ZA[134] + 0x19a4c116U; + + ZA[91] = ZR25(ZA[79]) + ZA[78]; + ZA[135] = ZR15(ZA[127]) + ZA[120]; + ZA[382] = ZA[363] + ZA[381]; + ZA[383] = ZA[381] + ZA[379]; + ZA[370] = ZA[367] + ZA[140]; + + ZA[386] = (ZCh(ZA[382], ZA[377], ZA[372]) + ZA[370]) + ZR26(ZA[382]); + ZA[384] = ZMa(ZA[378], ZA[373], ZA[383]) + ZR30(ZA[383]); + ZA[121] = ZA[111] + ZA[91]; + ZA[141] = ZA[135] + 0x1e376c08U; + + ZA[98] = ZR25(ZA[87]) + ZA[79]; + ZA[142] = ZR15(ZA[134]) + ZA[121]; + ZA[387] = ZA[368] + ZA[386]; + ZA[388] = ZA[386] + ZA[384]; + ZA[375] = ZA[372] + ZA[141]; + + ZA[391] = (ZCh(ZA[387], ZA[382], ZA[377]) + ZA[375]) + ZR26(ZA[387]); + ZA[389] = ZMa(ZA[383], ZA[378], ZA[388]) + ZR30(ZA[388]); + ZA[128] = ZA[118] + ZA[98]; + ZA[147] = ZA[142] + 0x2748774cU; + + ZA[99] = ZR25(ZA[88]) + ZA[87]; + ZA[143] = ZR15(ZA[135]) + ZA[128]; + ZA[392] = ZA[373] + ZA[391]; + ZA[393] = ZA[391] + ZA[389]; + ZA[380] = ZA[377] + ZA[147]; + + ZA[396] = (ZCh(ZA[392], ZA[387], ZA[382]) + ZA[380]) + ZR26(ZA[392]); + ZA[394] = ZMa(ZA[388], ZA[383], ZA[393]) + ZR30(ZA[393]); + ZA[129] = ZA[119] + ZA[99]; + ZA[148] = ZA[143] + 0x34b0bcb5U; + + ZA[106] = ZR25(ZA[94]) + ZA[88]; + ZA[149] = ZR15(ZA[142]) + ZA[129]; + ZA[397] = ZA[378] + ZA[396]; + ZA[398] = ZA[396] + ZA[394]; + ZA[385] = ZA[382] + ZA[148]; + + ZA[401] = (ZCh(ZA[397], ZA[392], ZA[387]) + ZA[385]) + ZR26(ZA[397]); + ZA[399] = ZMa(ZA[393], ZA[388], ZA[398]) + ZR30(ZA[398]); + ZA[136] = ZA[126] + ZA[106]; + ZA[153] = ZA[149] + 0x391c0cb3U; + + ZA[107] = ZR25(ZA[95]) + ZA[94]; + ZA[150] = ZR15(ZA[143]) + ZA[136]; + ZA[402] = ZA[383] + ZA[401]; + ZA[403] = ZA[401] + ZA[399]; + ZA[390] = ZA[387] + ZA[153]; + + ZA[406] = (ZCh(ZA[402], ZA[397], ZA[392]) + ZA[390]) + ZR26(ZA[402]); + ZA[404] = ZMa(ZA[398], ZA[393], ZA[403]) + ZR30(ZA[403]); + ZA[137] = ZA[127] + ZA[107]; + ZA[154] = ZA[150] + 0x4ed8aa4aU; + + ZA[114] = ZR25(ZA[102]) + ZA[95]; + ZA[155] = ZR15(ZA[149]) + ZA[137]; + ZA[407] = ZA[388] + ZA[406]; + ZA[408] = ZA[406] + ZA[404]; + ZA[395] = ZA[392] + ZA[154]; + + ZA[411] = (ZCh(ZA[407], ZA[402], ZA[397]) + ZA[395]) + ZR26(ZA[407]); + ZA[409] = ZMa(ZA[403], ZA[398], ZA[408]) + ZR30(ZA[408]); + ZA[144] = ZA[134] + ZA[114]; + ZA[159] = ZA[155] + 0x5b9cca4fU; + + ZA[115] = ZR25(ZA[103]) + ZA[102]; + ZA[156] = ZR15(ZA[150]) + ZA[144]; + ZA[412] = ZA[393] + ZA[411]; + ZA[413] = ZA[411] + ZA[409]; + ZA[400] = ZA[397] + ZA[159]; + + ZA[416] = (ZCh(ZA[412], ZA[407], ZA[402]) + ZA[400]) + ZR26(ZA[412]); + ZA[414] = ZMa(ZA[408], ZA[403], ZA[413]) + ZR30(ZA[413]); + ZA[145] = ZA[135] + ZA[115]; + ZA[160] = ZA[156] + 0x682e6ff3U; + + ZA[122] = ZR25(ZA[110]) + ZA[103]; + ZA[161] = ZR15(ZA[155]) + ZA[145]; + ZA[417] = ZA[398] + ZA[416]; + ZA[418] = ZA[416] + ZA[414]; + ZA[405] = ZA[402] + ZA[160]; + + ZA[421] = (ZCh(ZA[417], ZA[412], ZA[407]) + ZA[405]) + ZR26(ZA[417]); + ZA[419] = ZMa(ZA[413], ZA[408], ZA[418]) + ZR30(ZA[418]); + ZA[151] = ZA[142] + ZA[122]; + ZA[165] = ZA[161] + 0x748f82eeU; + + ZA[123] = ZR25(ZA[111]) + ZA[110]; + ZA[162] = ZR15(ZA[156]) + ZA[151]; + ZA[422] = ZA[403] + ZA[421]; + ZA[423] = ZA[421] + ZA[419]; + ZA[410] = ZA[407] + ZA[165]; + + ZA[426] = (ZCh(ZA[422], ZA[417], ZA[412]) + ZA[410]) + ZR26(ZA[422]); + ZA[424] = ZMa(ZA[418], ZA[413], ZA[423]) + ZR30(ZA[423]); + ZA[152] = ZA[143] + ZA[123]; + ZA[166] = ZA[162] + 0x78a5636fU; + + ZA[130] = ZR25(ZA[118]) + ZA[111]; + ZA[167] = ZR15(ZA[161]) + ZA[152]; + ZA[427] = ZA[408] + ZA[426]; + ZA[428] = ZA[426] + ZA[424]; + ZA[415] = ZA[412] + ZA[166]; + + ZA[431] = (ZCh(ZA[427], ZA[422], ZA[417]) + ZA[415]) + ZR26(ZA[427]); + ZA[429] = ZMa(ZA[423], ZA[418], ZA[428]) + ZR30(ZA[428]); + ZA[157] = ZA[149] + ZA[130]; + ZA[170] = ZA[167] + 0x84c87814U; + + ZA[131] = ZR25(ZA[119]) + ZA[118]; + ZA[168] = ZR15(ZA[162]) + ZA[157]; + ZA[432] = ZA[413] + ZA[431]; + ZA[433] = ZA[431] + ZA[429]; + ZA[420] = ZA[417] + ZA[170]; + + ZA[436] = (ZCh(ZA[432], ZA[427], ZA[422]) + ZA[420]) + ZR26(ZA[432]); + ZA[434] = ZMa(ZA[428], ZA[423], ZA[433]) + ZR30(ZA[433]); + ZA[158] = ZA[150] + ZA[131]; + ZA[171] = ZA[168] + 0x8cc70208U; + + ZA[138] = ZR25(ZA[126]) + ZA[119]; + ZA[172] = ZR15(ZA[167]) + ZA[158]; + ZA[437] = ZA[418] + ZA[436]; + ZA[438] = ZA[436] + ZA[434]; + ZA[425] = ZA[422] + ZA[171]; + + ZA[441] = (ZCh(ZA[437], ZA[432], ZA[427]) + ZA[425]) + ZR26(ZA[437]); + ZA[439] = ZMa(ZA[433], ZA[428], ZA[438]) + ZR30(ZA[438]); + ZA[163] = ZA[155] + ZA[138]; + ZA[174] = ZA[172] + 0x90befffaU; + + ZA[139] = ZR25(ZA[127]) + ZA[126]; + ZA[173] = ZR15(ZA[168]) + ZA[163]; + ZA[442] = ZA[423] + ZA[441]; + ZA[443] = ZA[441] + ZA[439]; + ZA[430] = ZA[427] + ZA[174]; + + ZA[445] = (ZCh(ZA[442], ZA[437], ZA[432]) + ZA[430]) + ZR26(ZA[442]); + ZA[444] = ZMa(ZA[438], ZA[433], ZA[443]) + ZR30(ZA[443]); + ZA[164] = ZA[156] + ZA[139]; + ZA[175] = ZA[173] + 0xa4506cebU; + + ZA[146] = ZR25(ZA[134]) + ZA[127]; + ZA[176] = ZR15(ZA[172]) + ZA[164]; + ZA[446] = ZA[428] + ZA[445]; + ZA[447] = ZA[445] + ZA[444]; + ZA[435] = ZA[432] + ZA[175]; + + ZA[449] = (ZCh(ZA[446], ZA[442], ZA[437]) + ZA[435]) + ZR26(ZA[446]); + ZA[448] = ZMa(ZA[443], ZA[438], ZA[447]) + ZR30(ZA[447]); + ZA[169] = ZA[161] + ZA[146]; + ZA[178] = ZA[176] + 0xbef9a3f7U; + + ZA[177] = ZR15(ZA[173]) + ZA[169]; + ZA[451] = ZA[449] + ZA[448]; + ZA[450] = ZA[433] + ZA[449]; + ZA[440] = ZA[437] + ZA[178]; + + ZA[453] = (ZCh(ZA[450], ZA[446], ZA[442]) + ZA[440]) + ZR26(ZA[450]); + ZA[452] = ZMa(ZA[447], ZA[443], ZA[451]) + ZR30(ZA[451]); + ZA[179] = ZA[177] + 0xc67178f2U; + + ZA[454] = ZA[438] + ZA[453]; + ZA[494] = ZA[442] + ZA[179]; + ZA[455] = ZA[453] + ZA[452]; + + ZA[457] = (ZCh(ZA[454], ZA[450], ZA[446]) + ZA[494]) + ZR26(ZA[454]); + ZA[456] = ZMa(ZA[451], ZA[447], ZA[455]) + ZR30(ZA[455]); + + ZA[459] = ZA[457] + ZA[456]; + + ZA[461] = ZA[455] + state1; + ZA[460] = ZA[459] + state0; + + ZA[495] = ZA[460] + 0x98c7e2a2U; + ZA[469] = ZA[461] + 0x90bb1e3cU; + + ZA[498] = (ZCh(ZA[495], 0x510e527fU, 0x9b05688cU) + ZA[469]) + ZR26(ZA[495]); + ZA[462] = ZA[451] + state2; + + ZA[496] = ZA[460] + 0xfc08884dU; + ZA[506] = ZA[498] + 0x3c6ef372U; + ZA[470] = ZA[462] + 0x50c6645bU; + + ZA[507] = (ZCh(ZA[506], ZA[495], 0x510e527fU) + ZA[470]) + ZR26(ZA[506]); + ZA[500] = ZMa(0x6a09e667U, 0xbb67ae85U, ZA[496]) + ZR30(ZA[496]); + ZA[463] = ZA[447] + state3; + + ZA[458] = ZA[443] + ZA[457]; + ZA[499] = ZA[498] + ZA[500]; + ZA[508] = ZA[507] + 0xbb67ae85U; + ZA[473] = ZA[463] + 0x3ac42e24U; + + ZA[510] = (ZCh(ZA[508], ZA[506], ZA[495]) + ZA[473]) + ZR26(ZA[508]); + ZA[928] = ZMa(ZA[496], 0x6a09e667U, ZA[499]) + ZR30(ZA[499]); + ZA[464] = ZA[458] + state4; + + ZA[476] = ZA[464] + ZA[460] + 0xd21ea4fdU; + ZA[511] = ZA[510] + 0x6a09e667U; + ZA[509] = ZA[928] + ZA[507]; + ZA[465] = ZA[454] + state5; + + ZA[514] = (ZCh(ZA[511], ZA[508], ZA[506]) + ZA[476]) + ZR26(ZA[511]); + ZA[512] = ZMa(ZA[499], ZA[496], ZA[509]) + ZR30(ZA[509]); + ZA[478] = ZA[465] + 0x59f111f1U; + + ZA[519] = ZA[506] + ZA[478]; + ZA[516] = ZA[496] + ZA[514]; + ZA[513] = ZA[510] + ZA[512]; + ZA[466] = ZA[450] + state6; + + ZA[520] = (ZCh(ZA[516], ZA[511], ZA[508]) + ZA[519]) + ZR26(ZA[516]); + ZA[515] = ZMa(ZA[509], ZA[499], ZA[513]) + ZR30(ZA[513]); + ZA[480] = ZA[466] + 0x923f82a4U; + + ZA[524] = ZA[508] + ZA[480]; + ZA[521] = ZA[499] + ZA[520]; + ZA[517] = ZA[514] + ZA[515]; + ZA[467] = ZA[446] + state7; + + ZA[525] = (ZCh(ZA[521], ZA[516], ZA[511]) + ZA[524]) + ZR26(ZA[521]); + ZA[522] = ZMa(ZA[513], ZA[509], ZA[517]) + ZR30(ZA[517]); + ZA[484] = ZA[467] + 0xab1c5ed5U; + + ZA[529] = ZA[511] + ZA[484]; + ZA[526] = ZA[509] + ZA[525]; + ZA[523] = ZA[520] + ZA[522]; + + ZA[530] = (ZCh(ZA[526], ZA[521], ZA[516]) + ZA[529]) + ZR26(ZA[526]); + ZA[550] = ZMa(ZA[517], ZA[513], ZA[523]) + ZR30(ZA[523]); + + ZA[531] = ZA[513] + ZA[530]; + ZA[533] = ZA[516] + 0x5807aa98U; + ZA[527] = ZA[550] + ZA[525]; + + ZA[534] = (ZCh(ZA[531], ZA[526], ZA[521]) + ZA[533]) + ZR26(ZA[531]); + ZA[551] = ZMa(ZA[523], ZA[517], ZA[527]) + ZR30(ZA[527]); + + ZA[535] = ZA[517] + ZA[534]; + ZA[538] = ZA[521] + 0x12835b01U; + ZA[532] = ZA[551] + ZA[530]; + + ZA[539] = (ZCh(ZA[535], ZA[531], ZA[526]) + ZA[538]) + ZR26(ZA[535]); + ZA[552] = ZMa(ZA[527], ZA[523], ZA[532]) + ZR30(ZA[532]); + + ZA[540] = ZA[523] + ZA[539]; + ZA[542] = ZA[526] + 0x243185beU; + ZA[536] = ZA[552] + ZA[534]; + + ZA[543] = (ZCh(ZA[540], ZA[535], ZA[531]) + ZA[542]) + ZR26(ZA[540]); + ZA[553] = ZMa(ZA[532], ZA[527], ZA[536]) + ZR30(ZA[536]); + + ZA[544] = ZA[527] + ZA[543]; + ZA[555] = ZA[531] + 0x550c7dc3U; + ZA[541] = ZA[553] + ZA[539]; + + ZA[558] = (ZCh(ZA[544], ZA[540], ZA[535]) + ZA[555]) + ZR26(ZA[544]); + ZA[547] = ZMa(ZA[536], ZA[532], ZA[541]) + ZR30(ZA[541]); + + ZA[559] = ZA[532] + ZA[558]; + ZA[556] = ZA[535] + 0x72be5d74U; + ZA[545] = ZA[547] + ZA[543]; + + ZA[562] = (ZCh(ZA[559], ZA[544], ZA[540]) + ZA[556]) + ZR26(ZA[559]); + ZA[561] = ZMa(ZA[541], ZA[536], ZA[545]) + ZR30(ZA[545]); + + ZA[563] = ZA[536] + ZA[562]; + ZA[560] = ZA[561] + ZA[558]; + ZA[557] = ZA[540] + 0x80deb1feU; + + ZA[568] = (ZCh(ZA[563], ZA[559], ZA[544]) + ZA[557]) + ZR26(ZA[563]); + ZA[564] = ZMa(ZA[545], ZA[541], ZA[560]) + ZR30(ZA[560]); + + ZA[569] = ZA[541] + ZA[568]; + ZA[572] = ZA[544] + 0x9bdc06a7U; + ZA[565] = ZA[562] + ZA[564]; + + ZA[574] = (ZCh(ZA[569], ZA[563], ZA[559]) + ZA[572]) + ZR26(ZA[569]); + ZA[570] = ZMa(ZA[560], ZA[545], ZA[565]) + ZR30(ZA[565]); + ZA[468] = ZR25(ZA[461]); + + ZA[497] = ZA[468] + ZA[460]; + ZA[575] = ZA[545] + ZA[574]; + ZA[571] = ZA[568] + ZA[570]; + ZA[573] = ZA[559] + 0xc19bf274U; + + ZA[578] = (ZCh(ZA[575], ZA[569], ZA[563]) + ZA[573]) + ZR26(ZA[575]); + ZA[576] = ZMa(ZA[565], ZA[560], ZA[571]) + ZR30(ZA[571]); + ZA[929] = ZR25(ZA[462]); + ZA[503] = ZA[497] + 0xe49b69c1U; + + ZA[471] = ZA[929] + ZA[461] + 0x00a00000U; + ZA[582] = ZA[563] + ZA[503]; + ZA[579] = ZA[560] + ZA[578]; + ZA[577] = ZA[574] + ZA[576]; + + ZA[583] = (ZCh(ZA[579], ZA[575], ZA[569]) + ZA[582]) + ZR26(ZA[579]); + ZA[580] = ZMa(ZA[571], ZA[565], ZA[577]) + ZR30(ZA[577]); + ZA[488] = ZA[471] + 0xefbe4786U; + + ZA[472] = ZR25(ZA[463]) + ZA[462]; + ZA[587] = ZA[569] + ZA[488]; + ZA[584] = ZA[565] + ZA[583]; + ZA[581] = ZA[578] + ZA[580]; + + ZA[588] = (ZCh(ZA[584], ZA[579], ZA[575]) + ZA[587]) + ZR26(ZA[584]); + ZA[586] = ZMa(ZA[577], ZA[571], ZA[581]) + ZR30(ZA[581]); + ZA[501] = ZR15(ZA[497]) + ZA[472]; + ZA[475] = ZR15(ZA[471]); + ZA[926] = ZA[575] + 0x0fc19dc6U; + + ZA[474] = ZA[475] + ZA[463] + ZR25(ZA[464]); + ZA[927] = ZA[926] + ZA[501]; + ZA[589] = ZA[571] + ZA[588]; + ZA[585] = ZA[583] + ZA[586]; + + ZA[592] = (ZCh(ZA[589], ZA[584], ZA[579]) + ZA[927]) + ZR26(ZA[589]); + ZA[590] = ZMa(ZA[581], ZA[577], ZA[585]) + ZR30(ZA[585]); + ZA[477] = ZR25(ZA[465]) + ZA[464]; + ZA[489] = ZA[474] + 0x240ca1ccU; + + ZA[518] = ZR15(ZA[501]) + ZA[477]; + ZA[479] = ZR25(ZA[466]); + ZA[596] = ZA[579] + ZA[489]; + ZA[593] = ZA[577] + ZA[592]; + ZA[591] = ZA[588] + ZA[590]; + + ZA[597] = (ZCh(ZA[593], ZA[589], ZA[584]) + ZA[596]) + ZR26(ZA[593]); + ZA[594] = ZMa(ZA[585], ZA[581], ZA[591]) + ZR30(ZA[591]); + ZA[481] = ZA[479] + ZA[465]; + ZA[601] = ZA[518] + 0x2de92c6fU; + + ZA[482] = ZR15(ZA[474]) + ZA[481]; + ZA[602] = ZA[584] + ZA[601]; + ZA[598] = ZA[581] + ZA[597]; + ZA[595] = ZA[592] + ZA[594]; + + ZA[632] = (ZCh(ZA[598], ZA[593], ZA[589]) + ZA[602]) + ZR26(ZA[598]); + ZA[599] = ZMa(ZA[591], ZA[585], ZA[595]) + ZR30(ZA[595]); + ZA[483] = ZA[466] + 0x00000100U + ZR25(ZA[467]); + ZA[490] = ZA[482] + 0x4a7484aaU; + + ZA[528] = ZR15(ZA[518]) + ZA[483]; + ZA[736] = ZA[585] + ZA[632]; + ZA[605] = ZA[589] + ZA[490]; + ZA[600] = ZA[597] + ZA[599]; + ZA[485] = ZA[467] + 0x11002000U; + + ZA[738] = (ZCh(ZA[736], ZA[598], ZA[593]) + ZA[605]) + ZR26(ZA[736]); + ZA[744] = ZMa(ZA[595], ZA[591], ZA[600]) + ZR30(ZA[600]); + ZA[487] = ZR15(ZA[482]) + ZA[485]; + ZA[603] = ZA[528] + 0x5cb0a9dcU; + + ZA[502] = ZA[497] + ZA[487]; + ZA[739] = ZA[591] + ZA[738]; + ZA[604] = ZA[593] + ZA[603]; + ZA[737] = ZA[744] + ZA[632]; + + ZA[741] = (ZCh(ZA[739], ZA[736], ZA[598]) + ZA[604]) + ZR26(ZA[739]); + ZA[745] = ZMa(ZA[600], ZA[595], ZA[737]) + ZR30(ZA[737]); + ZA[486] = ZA[471] + 0x80000000U; + ZA[606] = ZA[502] + 0x76f988daU; + + ZA[537] = ZR15(ZA[528]) + ZA[486]; + ZA[742] = ZA[595] + ZA[741]; + ZA[613] = ZA[598] + ZA[606]; + ZA[740] = ZA[745] + ZA[738]; + + ZA[747] = (ZCh(ZA[742], ZA[739], ZA[736]) + ZA[613]) + ZR26(ZA[742]); + ZA[746] = ZMa(ZA[737], ZA[600], ZA[740]) + ZR30(ZA[740]); + ZA[607] = ZA[537] + 0x983e5152U; + + ZA[546] = ZR15(ZA[502]) + ZA[501]; + ZA[751] = ZA[736] + ZA[607]; + ZA[748] = ZA[600] + ZA[747]; + ZA[743] = ZA[746] + ZA[741]; + + ZA[752] = (ZCh(ZA[748], ZA[742], ZA[739]) + ZA[751]) + ZR26(ZA[748]); + ZA[749] = ZMa(ZA[740], ZA[737], ZA[743]) + ZR30(ZA[743]); + ZA[608] = ZA[546] + 0xa831c66dU; + + ZA[554] = ZR15(ZA[537]) + ZA[474]; + ZA[756] = ZA[739] + ZA[608]; + ZA[753] = ZA[737] + ZA[752]; + ZA[750] = ZA[747] + ZA[749]; + + ZA[757] = (ZCh(ZA[753], ZA[748], ZA[742]) + ZA[756]) + ZR26(ZA[753]); + ZA[754] = ZMa(ZA[743], ZA[740], ZA[750]) + ZR30(ZA[750]); + ZA[609] = ZA[554] + 0xb00327c8U; + + ZA[566] = ZR15(ZA[546]) + ZA[518]; + ZA[761] = ZA[742] + ZA[609]; + ZA[758] = ZA[740] + ZA[757]; + ZA[755] = ZA[752] + ZA[754]; + + ZA[762] = (ZCh(ZA[758], ZA[753], ZA[748]) + ZA[761]) + ZR26(ZA[758]); + ZA[759] = ZMa(ZA[750], ZA[743], ZA[755]) + ZR30(ZA[755]); + ZA[610] = ZA[566] + 0xbf597fc7U; + + ZA[567] = ZR15(ZA[554]) + ZA[482]; + ZA[766] = ZA[748] + ZA[610]; + ZA[763] = ZA[743] + ZA[762]; + ZA[760] = ZA[757] + ZA[759]; + + ZA[767] = (ZCh(ZA[763], ZA[758], ZA[753]) + ZA[766]) + ZR26(ZA[763]); + ZA[764] = ZMa(ZA[755], ZA[750], ZA[760]) + ZR30(ZA[760]); + ZA[611] = ZA[567] + 0xc6e00bf3U; + + ZA[614] = ZR15(ZA[566]) + ZA[528]; + ZA[771] = ZA[753] + ZA[611]; + ZA[768] = ZA[750] + ZA[767]; + ZA[765] = ZA[762] + ZA[764]; + + ZA[772] = (ZCh(ZA[768], ZA[763], ZA[758]) + ZA[771]) + ZR26(ZA[768]); + ZA[769] = ZMa(ZA[760], ZA[755], ZA[765]) + ZR30(ZA[765]); + ZA[612] = ZA[502] + 0x00400022U; + ZA[615] = ZA[614] + 0xd5a79147U; + + ZA[616] = ZR15(ZA[567]) + ZA[612]; + ZA[504] = ZR25(ZA[497]) + 0x00000100U; + ZA[776] = ZA[758] + ZA[615]; + ZA[773] = ZA[755] + ZA[772]; + ZA[770] = ZA[767] + ZA[769]; + + ZA[777] = (ZCh(ZA[773], ZA[768], ZA[763]) + ZA[776]) + ZR26(ZA[773]); + ZA[774] = ZMa(ZA[765], ZA[760], ZA[770]) + ZR30(ZA[770]); + ZA[492] = ZR25(ZA[471]); + ZA[618] = ZA[537] + ZA[504]; + ZA[617] = ZA[616] + 0x06ca6351U; + + ZA[619] = ZR15(ZA[614]) + ZA[618]; + ZA[781] = ZA[763] + ZA[617]; + ZA[778] = ZA[760] + ZA[777]; + ZA[775] = ZA[772] + ZA[774]; + ZA[505] = ZA[492] + ZA[497]; + + ZA[782] = (ZCh(ZA[778], ZA[773], ZA[768]) + ZA[781]) + ZR26(ZA[778]); + ZA[779] = ZMa(ZA[770], ZA[765], ZA[775]) + ZR30(ZA[775]); + ZA[621] = ZA[505] + ZA[546]; + ZA[620] = ZA[619] + 0x14292967U; + + ZA[622] = ZR15(ZA[616]) + ZA[621]; + ZA[625] = ZR25(ZA[501]); + ZA[786] = ZA[768] + ZA[620]; + ZA[783] = ZA[765] + ZA[782]; + ZA[624] = ZA[554] + ZA[471]; + ZA[780] = ZA[777] + ZA[779]; + + ZA[787] = (ZCh(ZA[783], ZA[778], ZA[773]) + ZA[786]) + ZR26(ZA[783]); + ZA[784] = ZMa(ZA[775], ZA[770], ZA[780]) + ZR30(ZA[780]); + ZA[493] = ZR25(ZA[474]); + ZA[626] = ZA[625] + ZA[624]; + ZA[623] = ZA[622] + 0x27b70a85U; + + ZA[627] = ZR15(ZA[619]) + ZA[626]; + ZA[791] = ZA[773] + ZA[623]; + ZA[788] = ZA[770] + ZA[787]; + ZA[785] = ZA[782] + ZA[784]; + ZA[629] = ZA[493] + ZA[501]; + + ZA[792] = (ZCh(ZA[788], ZA[783], ZA[778]) + ZA[791]) + ZR26(ZA[788]); + ZA[789] = ZMa(ZA[780], ZA[775], ZA[785]) + ZR30(ZA[785]); + ZA[630] = ZA[566] + ZA[629]; + ZA[628] = ZA[627] + 0x2e1b2138U; + + ZA[634] = ZR25(ZA[518]) + ZA[474]; + ZA[631] = ZR15(ZA[622]) + ZA[630]; + ZA[796] = ZA[778] + ZA[628]; + ZA[793] = ZA[775] + ZA[792]; + ZA[790] = ZA[787] + ZA[789]; + + ZA[797] = (ZCh(ZA[793], ZA[788], ZA[783]) + ZA[796]) + ZR26(ZA[793]); + ZA[794] = ZMa(ZA[785], ZA[780], ZA[790]) + ZR30(ZA[790]); + ZA[491] = ZR25(ZA[482]); + ZA[635] = ZA[567] + ZA[634]; + ZA[633] = ZA[631] + 0x4d2c6dfcU; + + ZA[636] = ZR15(ZA[627]) + ZA[635]; + ZA[801] = ZA[783] + ZA[633]; + ZA[798] = ZA[780] + ZA[797]; + ZA[795] = ZA[792] + ZA[794]; + ZA[638] = ZA[491] + ZA[518]; + + ZA[802] = (ZCh(ZA[798], ZA[793], ZA[788]) + ZA[801]) + ZR26(ZA[798]); + ZA[799] = ZMa(ZA[790], ZA[785], ZA[795]) + ZR30(ZA[795]); + ZA[639] = ZA[638] + ZA[614]; + ZA[637] = ZA[636] + 0x53380d13U; + + ZA[642] = ZR25(ZA[528]) + ZA[482]; + ZA[640] = ZR15(ZA[631]) + ZA[639]; + ZA[806] = ZA[788] + ZA[637]; + ZA[803] = ZA[785] + ZA[802]; + ZA[800] = ZA[797] + ZA[799]; + + ZA[807] = (ZCh(ZA[803], ZA[798], ZA[793]) + ZA[806]) + ZR26(ZA[803]); + ZA[804] = ZMa(ZA[795], ZA[790], ZA[800]) + ZR30(ZA[800]); + ZA[643] = ZA[616] + ZA[642]; + ZA[641] = ZA[640] + 0x650a7354U; + + ZA[646] = ZR25(ZA[502]) + ZA[528]; + ZA[644] = ZR15(ZA[636]) + ZA[643]; + ZA[811] = ZA[793] + ZA[641]; + ZA[808] = ZA[790] + ZA[807]; + ZA[805] = ZA[802] + ZA[804]; + + ZA[812] = (ZCh(ZA[808], ZA[803], ZA[798]) + ZA[811]) + ZR26(ZA[808]); + ZA[809] = ZMa(ZA[800], ZA[795], ZA[805]) + ZR30(ZA[805]); + ZA[647] = ZA[619] + ZA[646]; + ZA[645] = ZA[644] + 0x766a0abbU; + + ZA[650] = ZR25(ZA[537]) + ZA[502]; + ZA[648] = ZR15(ZA[640]) + ZA[647]; + ZA[816] = ZA[798] + ZA[645]; + ZA[813] = ZA[795] + ZA[812]; + ZA[810] = ZA[807] + ZA[809]; + + ZA[817] = (ZCh(ZA[813], ZA[808], ZA[803]) + ZA[816]) + ZR26(ZA[813]); + ZA[814] = ZMa(ZA[805], ZA[800], ZA[810]) + ZR30(ZA[810]); + ZA[925] = ZA[622] + ZA[650]; + ZA[649] = ZA[648] + 0x81c2c92eU; + + ZA[653] = ZR25(ZA[546]) + ZA[537]; + ZA[651] = ZR15(ZA[644]) + ZA[925]; + ZA[821] = ZA[803] + ZA[649]; + ZA[818] = ZA[800] + ZA[817]; + ZA[815] = ZA[812] + ZA[814]; + + ZA[822] = (ZCh(ZA[818], ZA[813], ZA[808]) + ZA[821]) + ZR26(ZA[818]); + ZA[819] = ZMa(ZA[810], ZA[805], ZA[815]) + ZR30(ZA[815]); + ZA[654] = ZA[627] + ZA[653]; + ZA[652] = ZA[651] + 0x92722c85U; + + ZA[657] = ZR25(ZA[554]) + ZA[546]; + ZA[655] = ZR15(ZA[648]) + ZA[654]; + ZA[826] = ZA[808] + ZA[652]; + ZA[823] = ZA[805] + ZA[822]; + ZA[820] = ZA[817] + ZA[819]; + + ZA[827] = (ZCh(ZA[823], ZA[818], ZA[813]) + ZA[826]) + ZR26(ZA[823]); + ZA[824] = ZMa(ZA[815], ZA[810], ZA[820]) + ZR30(ZA[820]); + ZA[658] = ZA[631] + ZA[657]; + ZA[656] = ZA[655] + 0xa2bfe8a1U; + + ZA[661] = ZR25(ZA[566]) + ZA[554]; + ZA[659] = ZR15(ZA[651]) + ZA[658]; + ZA[831] = ZA[813] + ZA[656]; + ZA[828] = ZA[810] + ZA[827]; + ZA[825] = ZA[822] + ZA[824]; + + ZA[832] = (ZCh(ZA[828], ZA[823], ZA[818]) + ZA[831]) + ZR26(ZA[828]); + ZA[829] = ZMa(ZA[820], ZA[815], ZA[825]) + ZR30(ZA[825]); + ZA[662] = ZA[636] + ZA[661]; + ZA[660] = ZA[659] + 0xa81a664bU; + + ZA[665] = ZR25(ZA[567]) + ZA[566]; + ZA[663] = ZR15(ZA[655]) + ZA[662]; + ZA[836] = ZA[818] + ZA[660]; + ZA[833] = ZA[815] + ZA[832]; + ZA[830] = ZA[827] + ZA[829]; + + ZA[837] = (ZCh(ZA[833], ZA[828], ZA[823]) + ZA[836]) + ZR26(ZA[833]); + ZA[834] = ZMa(ZA[825], ZA[820], ZA[830]) + ZR30(ZA[830]); + ZA[666] = ZA[640] + ZA[665]; + ZA[664] = ZA[663] + 0xc24b8b70U; + + ZA[669] = ZR25(ZA[614]) + ZA[567]; + ZA[667] = ZR15(ZA[659]) + ZA[666]; + ZA[841] = ZA[823] + ZA[664]; + ZA[838] = ZA[820] + ZA[837]; + ZA[835] = ZA[832] + ZA[834]; + + ZA[842] = (ZCh(ZA[838], ZA[833], ZA[828]) + ZA[841]) + ZR26(ZA[838]); + ZA[839] = ZMa(ZA[830], ZA[825], ZA[835]) + ZR30(ZA[835]); + ZA[670] = ZA[644] + ZA[669]; + ZA[668] = ZA[667] + 0xc76c51a3U; + + ZA[677] = ZR25(ZA[616]) + ZA[614]; + ZA[671] = ZR15(ZA[663]) + ZA[670]; + ZA[846] = ZA[828] + ZA[668]; + ZA[843] = ZA[825] + ZA[842]; + ZA[840] = ZA[837] + ZA[839]; + + ZA[847] = (ZCh(ZA[843], ZA[838], ZA[833]) + ZA[846]) + ZR26(ZA[843]); + ZA[844] = ZMa(ZA[835], ZA[830], ZA[840]) + ZR30(ZA[840]); + ZA[678] = ZA[648] + ZA[677]; + ZA[676] = ZA[671] + 0xd192e819U; + + ZA[682] = ZR25(ZA[619]) + ZA[616]; + ZA[679] = ZR15(ZA[667]) + ZA[678]; + ZA[851] = ZA[833] + ZA[676]; + ZA[848] = ZA[830] + ZA[847]; + ZA[845] = ZA[842] + ZA[844]; + + ZA[852] = (ZCh(ZA[848], ZA[843], ZA[838]) + ZA[851]) + ZR26(ZA[848]); + ZA[849] = ZMa(ZA[840], ZA[835], ZA[845]) + ZR30(ZA[845]); + ZA[683] = ZA[651] + ZA[682]; + ZA[680] = ZA[679] + 0xd6990624U; + + ZA[686] = ZR25(ZA[622]) + ZA[619]; + ZA[684] = ZR15(ZA[671]) + ZA[683]; + ZA[856] = ZA[838] + ZA[680]; + ZA[853] = ZA[835] + ZA[852]; + ZA[850] = ZA[847] + ZA[849]; + + ZA[857] = (ZCh(ZA[853], ZA[848], ZA[843]) + ZA[856]) + ZR26(ZA[853]); + ZA[854] = ZMa(ZA[845], ZA[840], ZA[850]) + ZR30(ZA[850]); + ZA[687] = ZA[655] + ZA[686]; + ZA[685] = ZA[684] + 0xf40e3585U; + + ZA[690] = ZR25(ZA[627]) + ZA[622]; + ZA[688] = ZR15(ZA[679]) + ZA[687]; + ZA[861] = ZA[843] + ZA[685]; + ZA[858] = ZA[840] + ZA[857]; + ZA[855] = ZA[852] + ZA[854]; + + ZA[862] = (ZCh(ZA[858], ZA[853], ZA[848]) + ZA[861]) + ZR26(ZA[858]); + ZA[859] = ZMa(ZA[850], ZA[845], ZA[855]) + ZR30(ZA[855]); + ZA[691] = ZA[659] + ZA[690]; + ZA[689] = ZA[688] + 0x106aa070U; + + ZA[694] = ZR25(ZA[631]) + ZA[627]; + ZA[692] = ZR15(ZA[684]) + ZA[691]; + ZA[866] = ZA[848] + ZA[689]; + ZA[863] = ZA[845] + ZA[862]; + ZA[860] = ZA[857] + ZA[859]; + + ZA[867] = (ZCh(ZA[863], ZA[858], ZA[853]) + ZA[866]) + ZR26(ZA[863]); + ZA[864] = ZMa(ZA[855], ZA[850], ZA[860]) + ZR30(ZA[860]); + ZA[695] = ZA[663] + ZA[694]; + ZA[693] = ZA[692] + 0x19a4c116U; + + ZA[698] = ZR25(ZA[636]) + ZA[631]; + ZA[696] = ZR15(ZA[688]) + ZA[695]; + ZA[871] = ZA[853] + ZA[693]; + ZA[868] = ZA[850] + ZA[867]; + ZA[865] = ZA[862] + ZA[864]; + + ZA[873] = (ZCh(ZA[868], ZA[863], ZA[858]) + ZA[871]) + ZR26(ZA[868]); + ZA[869] = ZMa(ZA[860], ZA[855], ZA[865]) + ZR30(ZA[865]); + ZA[699] = ZA[667] + ZA[698]; + ZA[697] = ZA[696] + 0x1e376c08U; + + ZA[702] = ZR25(ZA[640]) + ZA[636]; + ZA[700] = ZR15(ZA[692]) + ZA[699]; + ZA[877] = ZA[858] + ZA[697]; + ZA[874] = ZA[855] + ZA[873]; + ZA[870] = ZA[867] + ZA[869]; + + ZA[878] = (ZCh(ZA[874], ZA[868], ZA[863]) + ZA[877]) + ZR26(ZA[874]); + ZA[875] = ZMa(ZA[865], ZA[860], ZA[870]) + ZR30(ZA[870]); + ZA[703] = ZA[671] + ZA[702]; + ZA[701] = ZA[700] + 0x2748774cU; + + ZA[706] = ZR25(ZA[644]) + ZA[640]; + ZA[704] = ZR15(ZA[696]) + ZA[703]; + ZA[882] = ZA[863] + ZA[701]; + ZA[879] = ZA[860] + ZA[878]; + ZA[876] = ZA[873] + ZA[875]; + + ZA[883] = (ZCh(ZA[879], ZA[874], ZA[868]) + ZA[882]) + ZR26(ZA[879]); + ZA[880] = ZMa(ZA[870], ZA[865], ZA[876]) + ZR30(ZA[876]); + ZA[707] = ZA[679] + ZA[706]; + ZA[705] = ZA[704] + 0x34b0bcb5U; + + ZA[710] = ZR25(ZA[648]) + ZA[644]; + ZA[708] = ZR15(ZA[700]) + ZA[707]; + ZA[887] = ZA[868] + ZA[705]; + ZA[884] = ZA[865] + ZA[883]; + ZA[881] = ZA[878] + ZA[880]; + + ZA[888] = (ZCh(ZA[884], ZA[879], ZA[874]) + ZA[887]) + ZR26(ZA[884]); + ZA[885] = ZMa(ZA[876], ZA[870], ZA[881]) + ZR30(ZA[881]); + ZA[711] = ZA[684] + ZA[710]; + ZA[709] = ZA[708] + 0x391c0cb3U; + + ZA[714] = ZR25(ZA[651]) + ZA[648]; + ZA[712] = ZR15(ZA[704]) + ZA[711]; + ZA[892] = ZA[874] + ZA[709]; + ZA[889] = ZA[870] + ZA[888]; + ZA[886] = ZA[883] + ZA[885]; + + ZA[893] = (ZCh(ZA[889], ZA[884], ZA[879]) + ZA[892]) + ZR26(ZA[889]); + ZA[890] = ZMa(ZA[881], ZA[876], ZA[886]) + ZR30(ZA[886]); + ZA[715] = ZA[688] + ZA[714]; + ZA[713] = ZA[712] + 0x4ed8aa4aU; + + ZA[718] = ZR25(ZA[655]) + ZA[651]; + ZA[716] = ZR15(ZA[708]) + ZA[715]; + ZA[897] = ZA[879] + ZA[713]; + ZA[894] = ZA[876] + ZA[893]; + ZA[891] = ZA[888] + ZA[890]; + + ZA[898] = (ZCh(ZA[894], ZA[889], ZA[884]) + ZA[897]) + ZR26(ZA[894]); + ZA[895] = ZMa(ZA[886], ZA[881], ZA[891]) + ZR30(ZA[891]); + ZA[719] = ZA[692] + ZA[718]; + ZA[717] = ZA[716] + 0x5b9cca4fU; + + ZA[722] = ZR25(ZA[659]) + ZA[655]; + ZA[720] = ZR15(ZA[712]) + ZA[719]; + ZA[902] = ZA[884] + ZA[717]; + ZA[899] = ZA[881] + ZA[898]; + ZA[896] = ZA[893] + ZA[895]; + + ZA[903] = (ZCh(ZA[899], ZA[894], ZA[889]) + ZA[902]) + ZR26(ZA[899]); + ZA[900] = ZMa(ZA[891], ZA[886], ZA[896]) + ZR30(ZA[896]); + ZA[723] = ZA[696] + ZA[722]; + ZA[721] = ZA[720] + 0x682e6ff3U; + + ZA[672] = ZR25(ZA[663]) + ZA[659]; + ZA[724] = ZR15(ZA[716]) + ZA[723]; + ZA[907] = ZA[889] + ZA[721]; + ZA[904] = ZA[886] + ZA[903]; + ZA[901] = ZA[898] + ZA[900]; + + ZA[908] = (ZCh(ZA[904], ZA[899], ZA[894]) + ZA[907]) + ZR26(ZA[904]); + ZA[905] = ZMa(ZA[896], ZA[891], ZA[901]) + ZR30(ZA[901]); + ZA[673] = ZR25(ZA[667]) + ZA[663]; + ZA[726] = ZA[700] + ZA[672]; + ZA[725] = ZA[724] + 0x748f82eeU; + + ZA[727] = ZR15(ZA[720]) + ZA[726]; + ZA[912] = ZA[894] + ZA[725]; + ZA[909] = ZA[891] + ZA[908]; + ZA[906] = ZA[903] + ZA[905]; + ZA[675] = ZA[667] + 0x8cc70208U; + ZA[729] = ZA[704] + ZA[673]; + + ZA[913] = (ZCh(ZA[909], ZA[904], ZA[899]) + ZA[912]) + ZR26(ZA[909]); + ZA[910] = ZMa(ZA[901], ZA[896], ZA[906]) + ZR30(ZA[906]); + ZA[674] = ZR25(ZA[671]) + ZA[675]; + ZA[730] = ZR15(ZA[724]) + ZA[729]; + ZA[728] = ZA[727] + 0x78a5636fU; + + ZA[681] = ZR25(ZA[679]) + ZA[671]; + ZA[917] = ZA[899] + ZA[901] + ZA[728]; + ZA[914] = ZA[896] + ZA[913]; + ZA[911] = ZA[908] + ZA[910]; + ZA[732] = ZA[708] + ZA[674]; + ZA[731] = ZA[730] + 0x84c87814U; + + ZA[918] = (ZCh(ZA[914], ZA[909], ZA[904]) + ZA[917]) + ZR26(ZA[914]); + ZA[915] = ZMa(ZA[906], ZA[901], ZA[911]) + ZR30(ZA[911]); + ZA[733] = ZR15(ZA[727]) + ZA[732]; + ZA[919] = ZA[906] + ZA[904] + ZA[731]; + ZA[734] = ZA[712] + ZA[681]; + + ZA[920] = (ZCh(ZA[918], ZA[914], ZA[909]) + ZA[919]) + ZR26(ZA[918]); + ZA[735] = ZR15(ZA[730]) + ZA[734]; + ZA[921] = ZA[911] + ZA[909] + ZA[733]; + ZA[916] = ZA[913] + ZA[915]; + + ZA[922] = (ZCh(ZA[920], ZA[918], ZA[914]) + ZA[921]) + ZR26(ZA[920]); + ZA[923] = ZA[916] + ZA[914] + ZA[735]; + + ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]); + +#define FOUND (0x800) +#define NFLAG (0x7FF) + +#if defined(VECTORS4) + bool result = any(ZA[924] == 0x136032EDU); + + if (result) { + if (ZA[924].x == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; + if (ZA[924].y == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + if (ZA[924].z == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z; + if (ZA[924].w == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w; + } +#elif defined(VECTORS2) + bool result = any(ZA[924] == 0x136032EDU); + + if (result) { + if (ZA[924].x == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; + if (ZA[924].y == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + } +#else + if (ZA[924] == 0x136032EDU) + output[FOUND] = output[NFLAG & Znonce] = Znonce; +#endif +} diff --git a/diakgcn120724.cl b/diakgcn120724.cl new file mode 100644 index 00000000..7dd73fb9 --- /dev/null +++ b/diakgcn120724.cl @@ -0,0 +1,587 @@ +// DiaKGCN 27-04-2012 - OpenCL kernel by Diapolo +// +// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. +// The kernel was rewritten by me (Diapolo) and is still public-domain! + +#ifdef VECTORS4 + typedef uint4 u; +#elif defined VECTORS2 + typedef uint2 u; +#else + typedef uint u; +#endif + +#ifdef BITALIGN + #pragma OPENCL EXTENSION cl_amd_media_ops : enable + #ifdef BFI_INT + #define ch(x, y, z) amd_bytealign(x, y, z) + #define ma(x, y, z) amd_bytealign(z ^ x, y, x) + #else + #define ch(x, y, z) bitselect(z, y, x) + #define ma(z, x, y) bitselect(z, y, z ^ x) + #endif +#else + #define ch(x, y, z) (z ^ (x & (y ^ z))) + #define ma(x, y, z) ((x & z) | (y & (x | z))) +#endif + +#define rotr15(n) (rotate(n, 15U) ^ rotate(n, 13U) ^ (n >> 10U)) +#define rotr25(n) (rotate(n, 25U) ^ rotate(n, 14U) ^ (n >> 3U)) +#define rotr26(n) (rotate(n, 26U) ^ rotate(n, 21U) ^ rotate(n, 7U)) +#define rotr30(n) (rotate(n, 30U) ^ rotate(n, 19U) ^ rotate(n, 10U)) + +__kernel + __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) + void search( +#ifndef GOFFSET + const u base, +#endif + const uint PreVal0, const uint PreVal4, + const uint H1, const uint D1A, const uint B1, const uint C1, + const uint F1, const uint G1, const uint C1addK5, const uint B1addK6, const uint PreVal0addK7, + const uint W16addK16, const uint W17addK17, + const uint PreW18, const uint PreW19, + const uint W16, const uint W17, + const uint PreW31, const uint PreW32, + const uint state0, const uint state1, const uint state2, const uint state3, + const uint state4, const uint state5, const uint state6, const uint state7, + const uint state0A, const uint state0B, + const uint state1A, const uint state2A, const uint state3A, const uint state4A, + const uint state5A, const uint state6A, const uint state7A, + __global uint * output) +{ + u V[8]; + u W[16]; + +#ifdef VECTORS4 + const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; +#elif defined VECTORS2 + const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; +#else + #ifdef GOFFSET + const u nonce = (uint)(get_global_id(0)); + #else + const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base; + #endif +#endif + + V[0] = PreVal0 + nonce; + V[1] = B1; + V[2] = C1; + V[3] = D1A; + V[4] = PreVal4 + nonce; + V[5] = F1; + V[6] = G1; + V[7] = H1; + + V[7] += V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + +//---------------------------------------------------------------------------------- + +#ifdef VECTORS4 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); +#elif defined VECTORS2 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); +#else + W[0] = PreW18 + rotr25(nonce); +#endif + W[1] = PreW19 + nonce; + W[2] = 0x80000000U + rotr15(W[0]); + W[3] = rotr15(W[1]); + W[4] = 0x00000280U + rotr15(W[2]); + W[5] = W16 + rotr15(W[3]); + W[6] = W17 + rotr15(W[4]); + W[7] = W[0] + rotr15(W[5]); + W[8] = W[1] + rotr15(W[6]); + W[9] = W[2] + rotr15(W[7]); + W[10] = W[3] + rotr15(W[8]); + W[11] = W[4] + rotr15(W[9]); + W[12] = W[5] + 0x00a00055U + rotr15(W[10]); + W[13] = W[6] + PreW31 + rotr15(W[11]); + W[14] = W[7] + PreW32 + rotr15(W[12]); + W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]); + + V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0]; + V[5] = 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + +//---------------------------------------------------------------------------------- + + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); + W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); + W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); + + V[1] += 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + +//---------------------------------------------------------------------------------- + + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); + + V[1] += 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + +//---------------------------------------------------------------------------------- + + W[0] = state0 + V[0] + rotr25(state1 + V[1]); + W[1] = state1 + V[1] + 0x00a00000U + rotr25(state2 + V[2]); + W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]); + W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]); + W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]); + W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]); + W[6] = state6 + V[6] + 0x00000100U + rotr15(W[4]) + rotr25(state7 + V[7]); + W[7] = state7 + V[7] + W[0] + 0x11002000U + rotr15(W[5]); + W[8] = W[1] + 0x80000000U + rotr15(W[6]); + W[9] = W[2] + rotr15(W[7]); + W[10] = W[3] + rotr15(W[8]); + W[11] = W[4] + rotr15(W[9]); + W[12] = W[5] + rotr15(W[10]); + W[13] = W[6] + rotr15(W[11]); + W[14] = W[7] + 0x00400022U + rotr15(W[12]); + W[15] = W[8] + 0x00000100U + rotr15(W[13]) + rotr25(W[0]); + + // 0x71374491U + 0x1f83d9abU + state1 + const u state1AaddV1 = state1A + V[1]; + // 0xb5c0fbcfU + 0x9b05688cU + state2 + const u state2AaddV2 = state2A + V[2]; + // 0x510e527fU + 0xe9b5dba5U + state3 + const u state3AaddV3 = state3A + V[3]; + // 0x3956c25bU + state4 + const u state4AaddV4 = state4A + V[4]; + // 0x59f111f1U + state5 + const u state5AaddV5 = state5A + V[5]; + // 0x923f82a4U + state6 + const u state6AaddV6 = state6A + V[6]; + // 0xab1c5ed5U + state7 + const u state7AaddV7 = state7A + V[7]; + + // 0x98c7e2a2U + state0 + V[3] = state0A + V[0]; + // 0xfc08884dU + state0 + V[7] = state0B + V[0]; + V[0] = 0x6a09e667U; + V[1] = 0xbb67ae85U; + V[2] = 0x3c6ef372U; + V[4] = 0x510e527fU; + V[5] = 0x9b05688cU; + V[6] = 0x1f83d9abU; + + V[2] += state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + +//---------------------------------------------------------------------------------- + + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); + W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); + W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); + + V[3] += 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + +//---------------------------------------------------------------------------------- + + W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); + W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); + W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); + W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); + W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); + W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); + W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); + W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); + W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); + W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); + W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); + W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); + W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); + + V[3] += 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + V[6] = 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); + + V[1] += 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + V[5] = 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + + V[0] += 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + V[4] = 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); + + V[7] += 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + V[3] = 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); + + V[6] += 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); + V[2] = 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); + + V[5] += 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); + V[1] = 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); + + V[4] += 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); + V[0] = 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); + + V[3] += 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); + V[7] = 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); + + V[2] += 0x78a5636fU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); + + V[1] += 0x84c87814U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); + + V[0] += 0x8cc70208U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); + + V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); + +#define FOUND (0x800) +#define NFLAG (0x7FF) + +#ifdef VECTORS4 + if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) + output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); +#elif defined VECTORS2 + if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) + output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y; +#else + if (V[7] == 0x136032edU) + output[FOUND] = output[NFLAG & nonce] = nonce; +#endif +} diff --git a/phatk120724.cl b/phatk120724.cl new file mode 100644 index 00000000..0f604436 --- /dev/null +++ b/phatk120724.cl @@ -0,0 +1,417 @@ +// This file is taken and modified from the public-domain poclbm project, and +// I have therefore decided to keep it public-domain. +// Modified version copyright 2011-2012 Con Kolivas + +#ifdef VECTORS4 + typedef uint4 u; +#elif defined VECTORS2 + typedef uint2 u; +#else + typedef uint u; +#endif + +__constant uint K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +__constant uint ConstW[128] = { +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000U, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000280U, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x80000000U, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100U, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + +__constant uint H[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + + +#ifdef BITALIGN + #pragma OPENCL EXTENSION cl_amd_media_ops : enable + #define rot(x, y) amd_bitalign(x, x, (uint)(32 - y)) + +// This part is not from the stock poclbm kernel. It's part of an optimization +// added in the Phoenix Miner. + +// Some AMD devices have Vals[0] BFI_INT opcode, which behaves exactly like the +// SHA-256 Ch function, but provides it in exactly one instruction. If +// detected, use it for Ch. Otherwise, construct Ch out of simpler logical +// primitives. + + #ifdef BFI_INT + // Well, slight problem... It turns out BFI_INT isn't actually exposed to + // OpenCL (or CAL IL for that matter) in any way. However, there is + // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via + // amd_bytealign, takes the same inputs, and provides the same output. + // We can use that as a placeholder for BFI_INT and have the application + // patch it after compilation. + + // This is the BFI_INT function + #define Ch(x, y, z) amd_bytealign(x,y,z) + // Ma can also be implemented in terms of BFI_INT... + #define Ma(z, x, y) amd_bytealign(z^x,y,x) + #else // BFI_INT + // Later SDKs optimise this to BFI INT without patching and GCN + // actually fails if manually patched with BFI_INT + + #define Ch(x, y, z) bitselect((u)z, (u)y, (u)x) + #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) + #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y) + #endif +#else // BITALIGN + #define Ch(x, y, z) (z ^ (x & (y ^ z))) + #define Ma(x, y, z) ((x & z) | (y & (x | z))) + #define rot(x, y) rotate((u)x, (u)y) + #define rotr(x, y) rotate((u)x, (u)(32-y)) +#endif + + + +//Various intermediate calculations for each SHA round +#define s0(n) (S0(Vals[(0 + 128 - (n)) % 8])) +#define S0(n) (rot(n, 30u)^rot(n, 19u)^rot(n,10u)) + +#define s1(n) (S1(Vals[(4 + 128 - (n)) % 8])) +#define S1(n) (rot(n, 26u)^rot(n, 21u)^rot(n, 7u)) + +#define ch(n) Ch(Vals[(4 + 128 - (n)) % 8],Vals[(5 + 128 - (n)) % 8],Vals[(6 + 128 - (n)) % 8]) +#define maj(n) Ma(Vals[(1 + 128 - (n)) % 8],Vals[(2 + 128 - (n)) % 8],Vals[(0 + 128 - (n)) % 8]) + +//t1 calc when W is already calculated +#define t1(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W[(n)] + s1(n) + ch(n) + +//t1 calc which calculates W +#define t1W(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W(n) + s1(n) + ch(n) + +//Used for constant W Values (the compiler optimizes out zeros) +#define t1C(n) (K[(n) % 64]+ ConstW[(n)]) + Vals[(7 + 128 - (n)) % 8] + s1(n) + ch(n) + +//t2 Calc +#define t2(n) maj(n) + s0(n) + +#define rotC(x,n) (x<> (32-n)) + +//W calculation used for SHA round +#define W(n) (W[n] = P4(n) + P3(n) + P2(n) + P1(n)) + + + +//Partial W calculations (used for the begining where only some values are nonzero) +#define P1(n) ((rot(W[(n)-2],15u)^rot(W[(n)-2],13u)^((W[(n)-2])>>10U))) +#define P2(n) ((rot(W[(n)-15],25u)^rot(W[(n)-15],14u)^((W[(n)-15])>>3U))) + + +#define p1(x) ((rot(x,15u)^rot(x,13u)^((x)>>10U))) +#define p2(x) ((rot(x,25u)^rot(x,14u)^((x)>>3U))) + + +#define P3(n) W[n-7] +#define P4(n) W[n-16] + + +//Partial Calcs for constant W values +#define P1C(n) ((rotC(ConstW[(n)-2],15)^rotC(ConstW[(n)-2],13)^((ConstW[(n)-2])>>10U))) +#define P2C(n) ((rotC(ConstW[(n)-15],25)^rotC(ConstW[(n)-15],14)^((ConstW[(n)-15])>>3U))) +#define P3C(x) ConstW[x-7] +#define P4C(x) ConstW[x-16] + +//SHA round with built in W calc +#define sharoundW(n) Barrier1(n); Vals[(3 + 128 - (n)) % 8] += t1W(n); Vals[(7 + 128 - (n)) % 8] = t1W(n) + t2(n); + +//SHA round without W calc +#define sharound(n) Barrier2(n); Vals[(3 + 128 - (n)) % 8] += t1(n); Vals[(7 + 128 - (n)) % 8] = t1(n) + t2(n); + +//SHA round for constant W values +#define sharoundC(n) Barrier3(n); Vals[(3 + 128 - (n)) % 8] += t1C(n); Vals[(7 + 128 - (n)) % 8] = t1C(n) + t2(n); + +//The compiler is stupid... I put this in there only to stop the compiler from (de)optimizing the order +#define Barrier1(n) t1 = t1C((n+1)) +#define Barrier2(n) t1 = t1C((n)) +#define Barrier3(n) t1 = t1C((n)) + +//#define WORKSIZE 256 +#define MAXBUFFERS (4095) + +__kernel + __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +void search( const uint state0, const uint state1, const uint state2, const uint state3, + const uint state4, const uint state5, const uint state6, const uint state7, + const uint B1, const uint C1, const uint D1, + const uint F1, const uint G1, const uint H1, + const u base, + const uint W16, const uint W17, + const uint PreVal4, const uint PreVal0, + const uint PreW18, const uint PreW19, + const uint PreW31, const uint PreW32, + + __global uint * output) +{ + + + u W[124]; + u Vals[8]; + +//Dummy Variable to prevent compiler from reordering between rounds + u t1; + + //Vals[0]=state0; + Vals[1]=B1; + Vals[2]=C1; + Vals[3]=D1; + //Vals[4]=PreVal4; + Vals[5]=F1; + Vals[6]=G1; + Vals[7]=H1; + + W[16] = W16; + W[17] = W17; + +#ifdef VECTORS4 + //Less dependencies to get both the local id and group id and then add them + W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); + uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); + //Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3 + W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U}; +#elif defined VECTORS2 + W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); + uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); + W[18] = PreW18 + (u){r, r ^ 0x2004000U}; +#else + W[3] = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); + u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U); + W[18] = PreW18 + r; +#endif + //the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions + + + + Vals[4] = PreVal4 + W[3]; + Vals[0] = PreVal0 + W[3]; + + sharoundC(4); + W[19] = PreW19 + W[3]; + sharoundC(5); + W[20] = P4C(20) + P1(20); + sharoundC(6); + W[21] = P1(21); + sharoundC(7); + W[22] = P3C(22) + P1(22); + sharoundC(8); + W[23] = W[16] + P1(23); + sharoundC(9); + W[24] = W[17] + P1(24); + sharoundC(10); + W[25] = P1(25) + P3(25); + W[26] = P1(26) + P3(26); + sharoundC(11); + W[27] = P1(27) + P3(27); + W[28] = P1(28) + P3(28); + sharoundC(12); + W[29] = P1(29) + P3(29); + sharoundC(13); + W[30] = P1(30) + P2C(30) + P3(30); + W[31] = PreW31 + (P1(31) + P3(31)); + sharoundC(14); + W[32] = PreW32 + (P1(32) + P3(32)); + sharoundC(15); + sharound(16); + sharound(17); + sharound(18); + sharound(19); + sharound(20); + sharound(21); + sharound(22); + sharound(23); + sharound(24); + sharound(25); + sharound(26); + sharound(27); + sharound(28); + sharound(29); + sharound(30); + sharound(31); + sharound(32); + sharoundW(33); + sharoundW(34); + sharoundW(35); + sharoundW(36); + sharoundW(37); + sharoundW(38); + sharoundW(39); + sharoundW(40); + sharoundW(41); + sharoundW(42); + sharoundW(43); + sharoundW(44); + sharoundW(45); + sharoundW(46); + sharoundW(47); + sharoundW(48); + sharoundW(49); + sharoundW(50); + sharoundW(51); + sharoundW(52); + sharoundW(53); + sharoundW(54); + sharoundW(55); + sharoundW(56); + sharoundW(57); + sharoundW(58); + sharoundW(59); + sharoundW(60); + sharoundW(61); + sharoundW(62); + sharoundW(63); + + W[64]=state0+Vals[0]; + W[65]=state1+Vals[1]; + W[66]=state2+Vals[2]; + W[67]=state3+Vals[3]; + W[68]=state4+Vals[4]; + W[69]=state5+Vals[5]; + W[70]=state6+Vals[6]; + W[71]=state7+Vals[7]; + + Vals[0]=H[0]; + Vals[1]=H[1]; + Vals[2]=H[2]; + Vals[3]=H[3]; + Vals[4]=H[4]; + Vals[5]=H[5]; + Vals[6]=H[6]; + Vals[7]=H[7]; + + //sharound(64 + 0); + const u Temp = (0xb0edbdd0U + K[0]) + W[64]; + Vals[7] = Temp + 0x08909ae5U; + Vals[3] = 0xa54ff53aU + Temp; + +#define P124(n) P2(n) + P1(n) + P4(n) + + + W[64 + 16] = + P2(64 + 16) + P4(64 + 16); + sharound(64 + 1); + W[64 + 17] = P1C(64 + 17) + P2(64 + 17) + P4(64 + 17); + sharound(64 + 2); + W[64 + 18] = P124(64 + 18); + sharound(64 + 3); + W[64 + 19] = P124(64 + 19); + sharound(64 + 4); + W[64 + 20] = P124(64 + 20); + sharound(64 + 5); + W[64 + 21] = P124(64 + 21); + sharound(64 + 6); + W[64 + 22] = P4(64 + 22) + P3C(64 + 22) + P2(64 + 22) + P1(64 + 22); + sharound(64 + 7); + W[64 + 23] = P4(64 + 23) + P3(64 + 23) + P2C(64 + 23) + P1(64 + 23); + sharoundC(64 + 8); + W[64 + 24] = P1(64 + 24) + P4C(64 + 24) + P3(64 + 24); + sharoundC(64 + 9); + W[64 + 25] = P3(64 + 25) + P1(64 + 25); + sharoundC(64 + 10); + W[64 + 26] = P3(64 + 26) + P1(64 + 26); + sharoundC(64 + 11); + W[64 + 27] = P3(64 + 27) + P1(64 + 27); + sharoundC(64 + 12); + W[64 + 28] = P3(64 + 28) + P1(64 + 28); + sharoundC(64 + 13); + W[64 + 29] = P1(64 + 29) + P3(64 + 29); + W[64 + 30] = P3(64 + 30) + P2C(64 + 30) + P1(64 + 30); + sharoundC(64 + 14); + W[64 + 31] = P4C(64 + 31) + P3(64 + 31) + P2(64 + 31) + P1(64 + 31); + sharoundC(64 + 15); + sharound(64 + 16); + sharound(64 + 17); + sharound(64 + 18); + sharound(64 + 19); + sharound(64 + 20); + sharound(64 + 21); + sharound(64 + 22); + sharound(64 + 23); + sharound(64 + 24); + sharound(64 + 25); + sharound(64 + 26); + sharound(64 + 27); + sharound(64 + 28); + sharound(64 + 29); + sharound(64 + 30); + sharound(64 + 31); + sharoundW(64 + 32); + sharoundW(64 + 33); + sharoundW(64 + 34); + sharoundW(64 + 35); + sharoundW(64 + 36); + sharoundW(64 + 37); + sharoundW(64 + 38); + sharoundW(64 + 39); + sharoundW(64 + 40); + sharoundW(64 + 41); + sharoundW(64 + 42); + sharoundW(64 + 43); + sharoundW(64 + 44); + sharoundW(64 + 45); + sharoundW(64 + 46); + sharoundW(64 + 47); + sharoundW(64 + 48); + sharoundW(64 + 49); + sharoundW(64 + 50); + sharoundW(64 + 51); + sharoundW(64 + 52); + sharoundW(64 + 53); + sharoundW(64 + 54); + sharoundW(64 + 55); + sharoundW(64 + 56); + sharoundW(64 + 57); + sharoundW(64 + 58); + + W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) - + (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); + +#define FOUND (0x800) +#define NFLAG (0x7FF) + +#ifdef VECTORS4 + bool result = W[117].x & W[117].y & W[117].z & W[117].w; + if (!result) { + if (!W[117].x) + output[FOUND] = output[NFLAG & W[3].x] = W[3].x; + if (!W[117].y) + output[FOUND] = output[NFLAG & W[3].y] = W[3].y; + if (!W[117].z) + output[FOUND] = output[NFLAG & W[3].z] = W[3].z; + if (!W[117].w) + output[FOUND] = output[NFLAG & W[3].w] = W[3].w; + } +#elif defined VECTORS2 + bool result = W[117].x & W[117].y; + if (!result) { + if (!W[117].x) + output[FOUND] = output[NFLAG & W[3].x] = W[3].x; + if (!W[117].y) + output[FOUND] = output[NFLAG & W[3].y] = W[3].y; + } +#else + if (!W[117]) + output[FOUND] = output[NFLAG & W[3]] = W[3]; +#endif +} diff --git a/poclbm120724.cl b/poclbm120724.cl new file mode 100644 index 00000000..3e8b9943 --- /dev/null +++ b/poclbm120724.cl @@ -0,0 +1,1353 @@ +// -ck modified kernel taken from Phoenix taken from poclbm, with aspects of +// phatk and others. +// Modified version copyright 2011-2012 Con Kolivas + +// This file is taken and modified from the public-domain poclbm project, and +// we have therefore decided to keep it public-domain in Phoenix. + +#ifdef VECTORS4 + typedef uint4 u; +#elif defined VECTORS2 + typedef uint2 u; +#else + typedef uint u; +#endif + +__constant uint K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + + +// This part is not from the stock poclbm kernel. It's part of an optimization +// added in the Phoenix Miner. + +// Some AMD devices have a BFI_INT opcode, which behaves exactly like the +// SHA-256 ch function, but provides it in exactly one instruction. If +// detected, use it for ch. Otherwise, construct ch out of simpler logical +// primitives. + +#ifdef BITALIGN + #pragma OPENCL EXTENSION cl_amd_media_ops : enable + #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y) +#else + #define rotr(x, y) rotate((u)x, (u)(32 - y)) +#endif +#ifdef BFI_INT + // Well, slight problem... It turns out BFI_INT isn't actually exposed to + // OpenCL (or CAL IL for that matter) in any way. However, there is + // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via + // amd_bytealign, takes the same inputs, and provides the same output. + // We can use that as a placeholder for BFI_INT and have the application + // patch it after compilation. + + // This is the BFI_INT function + #define ch(x, y, z) amd_bytealign(x, y, z) + + // Ma can also be implemented in terms of BFI_INT... + #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) ) + + // AMD's KernelAnalyzer throws errors compiling the kernel if we use + // amd_bytealign on constants with vectors enabled, so we use this to avoid + // problems. (this is used 4 times, and likely optimized out by the compiler.) + #define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) +#else // BFI_INT + //GCN actually fails if manually patched with BFI_INT + + #define ch(x, y, z) bitselect((u)z, (u)y, (u)x) + #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) + #define Ma2(x, y, z) Ma(x, y, z) +#endif + + +__kernel +__attribute__((vec_type_hint(u))) +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +void search(const uint state0, const uint state1, const uint state2, const uint state3, + const uint state4, const uint state5, const uint state6, const uint state7, + const uint b1, const uint c1, + const uint f1, const uint g1, const uint h1, +#ifndef GOFFSET + const u base, +#endif + const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, + const uint D1A, const uint C1addK5, const uint B1addK6, + const uint W16addK16, const uint W17addK17, + const uint PreVal4addT1, const uint Preval0, + __global uint * output) +{ + u Vals[24]; + u *W = &Vals[8]; + +#ifdef GOFFSET + const u nonce = (uint)(get_global_id(0)); +#else + const u nonce = base + (uint)(get_global_id(0)); +#endif + +Vals[5]=Preval0; +Vals[5]+=nonce; + +Vals[0]=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],b1,c1); +Vals[0]+=D1A; + +Vals[2]=Vals[0]; +Vals[2]+=h1; + +Vals[1]=PreVal4addT1; +Vals[1]+=nonce; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); + +Vals[6]=C1addK5; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],b1); + +Vals[3]=Vals[6]; +Vals[3]+=g1; +Vals[0]+=Ma2(g1,Vals[1],f1); +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma2(f1,Vals[0],Vals[1]); + +Vals[7]=B1addK6; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); + +Vals[4]=Vals[7]; +Vals[4]+=f1; + +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[7]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[8]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[9]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[10]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[11]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[12]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[13]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[14]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=0xC19BF3F4U; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=W16addK16; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=W17addK17; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U)); +W[2]+=fw2; +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[18]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]=nonce; +W[3]+=fw3; +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[19]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +W[4]+=0x80000000U; +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[20]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[21]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +W[6]+=0x00000280U; +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[22]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +W[7]+=fw0; +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[23]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +W[8]+=fw1; +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[24]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[9]=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[25]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[10]=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[26]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[11]=W[4]; +W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=W[11]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[27]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[12]=W[5]; +W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[0]+=W[12]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[28]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[13]=W[6]; +W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); +Vals[6]+=W[13]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[29]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[14]=0x00a00055U; +W[14]+=W[7]; +W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); +Vals[7]+=W[14]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[30]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[15]=fw15; +W[15]+=W[8]; +W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); +Vals[5]+=W[15]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[31]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[0]=fw01r; +W[0]+=W[9]; +W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); +Vals[2]+=W[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[32]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[1]=fw1; +W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); +W[1]+=W[10]; +W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); +Vals[3]+=W[1]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[33]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); +W[2]+=W[11]; +W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[34]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); +W[3]+=W[12]; +W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[35]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); +W[4]+=W[13]; +W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[36]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); +W[5]+=W[14]; +W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[37]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); +W[6]+=W[15]; +W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[38]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); +W[7]+=W[0]; +W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[39]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); +W[8]+=W[1]; +W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[40]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); +W[9]+=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[41]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); +W[10]+=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[42]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); +W[11]+=W[4]; +W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=W[11]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[43]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); +W[12]+=W[5]; +W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[0]+=W[12]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[44]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); +W[13]+=W[6]; +W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); +Vals[6]+=W[13]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[45]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); +W[14]+=W[7]; +W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); +Vals[7]+=W[14]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[46]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); +W[15]+=W[8]; +W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); +Vals[5]+=W[15]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[47]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); +W[0]+=W[9]; +W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); +Vals[2]+=W[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[48]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); +W[1]+=W[10]; +W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); +Vals[3]+=W[1]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[49]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); +W[2]+=W[11]; +W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[50]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); +W[3]+=W[12]; +W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[51]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); +W[4]+=W[13]; +W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[52]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); +W[5]+=W[14]; +W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[53]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); +W[6]+=W[15]; +W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[54]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); +W[7]+=W[0]; +W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[55]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); +W[8]+=W[1]; +W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[56]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); +W[9]+=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[57]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); +W[10]+=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[58]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); +W[11]+=W[4]; +W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=W[11]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[59]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); +W[12]+=W[5]; +W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[0]+=W[12]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[60]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); +W[13]+=W[6]; +W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); +Vals[6]+=W[13]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[61]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +Vals[7]+=W[14]; +Vals[7]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); +Vals[7]+=W[7]; +Vals[7]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[62]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +Vals[5]+=W[15]; +Vals[5]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); +Vals[5]+=W[8]; +Vals[5]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[63]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +Vals[5]+=state0; + +W[7]=state7; +W[7]+=Vals[2]; + +Vals[2]=0xF377ED68U; +Vals[2]+=Vals[5]; + +W[3]=state3; +W[3]+=Vals[0]; + +Vals[0]=0xa54ff53aU; +Vals[0]+=Vals[2]; +Vals[2]+=0x08909ae5U; + +W[6]=state6; +W[6]+=Vals[3]; + +Vals[3]=0x90BB1E3CU; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=(0x9b05688cU^(Vals[0]&0xca0b3af3U)); + +Vals[7]+=state1; +Vals[3]+=Vals[7]; + +W[2]=state2; +W[2]+=Vals[6]; + +Vals[6]=0x3c6ef372U; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma2(0xbb67ae85U,Vals[2],0x6a09e667U); + +W[5]=state5; +W[5]+=Vals[4]; + +Vals[4]=0x50C6645BU; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],0x510e527fU); +Vals[4]+=W[2]; + +W[1]=Vals[7]; +Vals[7]=0xbb67ae85U; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma2(0x6a09e667U,Vals[3],Vals[2]); + +W[4]=state4; +W[4]+=Vals[1]; + +Vals[1]=0x3AC42E24U; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=W[3]; + +W[0]=Vals[5]; + +Vals[5]=Vals[1]; +Vals[5]+=0x6a09e667U; + +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[4]; +Vals[0]+=W[4]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[5]; +Vals[6]+=W[5]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[6]; +Vals[7]+=W[6]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[7]; +Vals[5]+=W[7]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=0x5807AA98U; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[9]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[10]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[11]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[12]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[13]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[14]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=0xC19BF274U; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); +Vals[2]+=W[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[16]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); +W[1]+=0x00a00000U; +Vals[3]+=W[1]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[17]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); +W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[18]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); +W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[19]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); +W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[20]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); +W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[21]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); +W[6]+=0x00000100U; +W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[22]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]+=0x11002000U; +W[7]+=W[0]; +W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[23]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]=0x80000000U; +W[8]+=W[1]; +W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[24]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[9]=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[25]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[10]=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[26]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[11]=W[4]; +W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=W[11]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[27]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[12]=W[5]; +W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[0]+=W[12]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[28]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[13]=W[6]; +W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); +Vals[6]+=W[13]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[29]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[14]=0x00400022U; +W[14]+=W[7]; +W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); +Vals[7]+=W[14]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[30]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[15]=0x00000100U; +W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); +W[15]+=W[8]; +W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); +Vals[5]+=W[15]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[31]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); +W[0]+=W[9]; +W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); +Vals[2]+=W[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[32]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); +W[1]+=W[10]; +W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); +Vals[3]+=W[1]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[33]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); +W[2]+=W[11]; +W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[34]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); +W[3]+=W[12]; +W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[35]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); +W[4]+=W[13]; +W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[36]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); +W[5]+=W[14]; +W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[37]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); +W[6]+=W[15]; +W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[38]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); +W[7]+=W[0]; +W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[39]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); +W[8]+=W[1]; +W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[40]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); +W[9]+=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[41]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); +W[10]+=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[42]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); +W[11]+=W[4]; +W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=W[11]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[43]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); +W[12]+=W[5]; +W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[0]+=W[12]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[44]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); +W[13]+=W[6]; +W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); +Vals[6]+=W[13]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[45]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); +W[14]+=W[7]; +W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); +Vals[7]+=W[14]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[46]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); +W[15]+=W[8]; +W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); +Vals[5]+=W[15]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[47]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); +W[0]+=W[9]; +W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); +Vals[2]+=W[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[48]; +Vals[0]+=Vals[2]; +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + +W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); +W[1]+=W[10]; +W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); +Vals[3]+=W[1]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[49]; +Vals[6]+=Vals[3]; +Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); +Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); + +W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); +W[2]+=W[11]; +W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); +Vals[4]+=W[2]; +Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); +Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); +Vals[4]+=K[50]; +Vals[7]+=Vals[4]; +Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); +Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); + +W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); +W[3]+=W[12]; +W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); +Vals[1]+=W[3]; +Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); +Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); +Vals[1]+=K[51]; +Vals[5]+=Vals[1]; +Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); +Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); + +W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); +W[4]+=W[13]; +W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); +Vals[0]+=W[4]; +Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); +Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); +Vals[0]+=K[52]; +Vals[2]+=Vals[0]; +Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); +Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); + +W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); +W[5]+=W[14]; +W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); +Vals[6]+=W[5]; +Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); +Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); +Vals[6]+=K[53]; +Vals[3]+=Vals[6]; +Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); +Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); + +W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); +W[6]+=W[15]; +W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); +Vals[7]+=W[6]; +Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); +Vals[7]+=K[54]; +Vals[4]+=Vals[7]; +Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); +Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); + +W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); +W[7]+=W[0]; +W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); +Vals[5]+=W[7]; +Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); +Vals[5]+=K[55]; +Vals[1]+=Vals[5]; +Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); +Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); + +W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); +W[8]+=W[1]; +W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); +Vals[2]+=W[8]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); +Vals[2]+=K[56]; +Vals[0]+=Vals[2]; + +W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); +W[9]+=W[2]; +W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); +Vals[3]+=W[9]; +Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); +Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); +Vals[3]+=K[57]; +Vals[3]+=Vals[6]; + +W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); +W[10]+=W[3]; +W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); +Vals[4]+=W[10]; +Vals[4]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); +Vals[4]+=ch(Vals[3],Vals[0],Vals[1]); +Vals[4]+=K[58]; +Vals[4]+=Vals[7]; +Vals[1]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); +Vals[1]+=ch(Vals[4],Vals[3],Vals[0]); +Vals[1]+=W[11]; +Vals[1]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); +Vals[1]+=W[4]; +Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); +Vals[1]+=K[59]; +Vals[1]+=Vals[5]; + +#define FOUND (0x800) +#define NFLAG (0x7FF) + +#if defined(VECTORS2) || defined(VECTORS4) + Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); + Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); + Vals[2]+=W[12]; + Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); + Vals[2]+=W[5]; + Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); + Vals[2]+=Vals[0]; + Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); + Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); + + if (any(Vals[2] == 0x136032edU)) { + if (Vals[2].x == 0x136032edU) + output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + if (Vals[2].y == 0x136032edU) + output[FOUND] = output[NFLAG & nonce.y] = nonce.y; +#if defined(VECTORS4) + if (Vals[2].z == 0x136032edU) + output[FOUND] = output[NFLAG & nonce.z] = nonce.z; + if (Vals[2].w == 0x136032edU) + output[FOUND] = output[NFLAG & nonce.w] = nonce.w; +#endif + } +#else + if ((Vals[2]+ + Ma(Vals[6],Vals[5],Vals[7])+ + (rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22))+ + W[12]+ + (rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U))+ + W[5]+ + (rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U))+ + Vals[0]+ + (rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25))+ + ch(Vals[1],Vals[4],Vals[3])) == 0x136032edU) + output[FOUND] = output[NFLAG & nonce] = nonce; +#endif +} diff --git a/scrypt120724.cl b/scrypt120724.cl new file mode 100644 index 00000000..d38f6a54 --- /dev/null +++ b/scrypt120724.cl @@ -0,0 +1,757 @@ +#define rotl(x,y) rotate(x,y) +#define Ch(x,y,z) bitselect(z,y,x) +#define Maj(x,y,z) Ch((x^z),y,z) + +#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U)) + +#define Tr2(x) (rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U)) +#define Tr1(x) (rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U)) +#define Wr2(x) (rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U)) +#define Wr1(x) (rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U)) + +#define RND(a, b, c, d, e, f, g, h, k) \ + h += Tr1(e) + Ch(e, f, g) + k; \ + d += h; \ + h += Tr2(a) + Maj(a, b, c); + +void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) +{ + uint4 S0 = *state0; + uint4 S1 = *state1; + +#define A S0.x +#define B S0.y +#define C S0.z +#define D S0.w +#define E S1.x +#define F S1.y +#define G S1.z +#define H S1.w + + uint4 W[4]; + + W[ 0].x = block0.x; + RND(A,B,C,D,E,F,G,H, W[0].x+0x428a2f98U); + W[ 0].y = block0.y; + RND(H,A,B,C,D,E,F,G, W[0].y+0x71374491U); + W[ 0].z = block0.z; + RND(G,H,A,B,C,D,E,F, W[0].z+0xb5c0fbcfU); + W[ 0].w = block0.w; + RND(F,G,H,A,B,C,D,E, W[0].w+0xe9b5dba5U); + + W[ 1].x = block1.x; + RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); + W[ 1].y = block1.y; + RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); + W[ 1].z = block1.z; + RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); + W[ 1].w = block1.w; + RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); + + W[ 2].x = block2.x; + RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); + W[ 2].y = block2.y; + RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); + W[ 2].z = block2.z; + RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); + W[ 2].w = block2.w; + RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); + + W[ 3].x = block3.x; + RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); + W[ 3].y = block3.y; + RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); + W[ 3].z = block3.z; + RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); + W[ 3].w = block3.w; + RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); + + W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); + + W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); + + W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); + + W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); + + W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); + + W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); + + W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); + + W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); + + W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); + + W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); + + W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); + + W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); + + W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); + + W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); + + W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); + + W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); + + W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); + + W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); + + W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); + + W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); + + W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); + + W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); + + W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); + + W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); + + W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); + + W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); + + W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); + + W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); + + W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); + + W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); + + W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); + + W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); + + W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); + + W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); + + W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); + + W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); + + W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); + + W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); + + W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); + + W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); + + W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); + + W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); + + W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); + + W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); + + W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); + + W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); + + W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); + + W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); + +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef G +#undef H + + *state0 += S0; + *state1 += S1; +} + +void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) +{ +#define A (*state0).x +#define B (*state0).y +#define C (*state0).z +#define D (*state0).w +#define E (*state1).x +#define F (*state1).y +#define G (*state1).z +#define H (*state1).w + + uint4 W[4]; + + W[0].x = block0.x; + D=0x98c7e2a2U+W[0].x; + H=0xfc08884dU+W[0].x; + + W[0].y = block0.y; + C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; + G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); + + W[0].z = block0.z; + B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; + F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); + + W[0].w = block0.w; + A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; + E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); + + W[1].x = block1.x; + RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); + W[1].y = block1.y; + RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); + W[1].z = block1.z; + RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); + W[1].w = block1.w; + RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); + + W[2].x = block2.x; + RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); + W[2].y = block2.y; + RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); + W[2].z = block2.z; + RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); + W[2].w = block2.w; + RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); + + W[3].x = block3.x; + RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); + W[3].y = block3.y; + RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); + W[3].z = block3.z; + RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); + W[3].w = block3.w; + RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); + + W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); + + W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); + + W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); + + W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); + + W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); + + W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); + + W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); + + W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); + + W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); + + W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); + + W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); + + W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); + + W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); + + W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); + + W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); + + W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); + + W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); + + W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); + + W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); + + W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); + + W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); + + W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); + + W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); + + W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); + + W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); + + W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); + + W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); + + W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); + + W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); + + W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); + + W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); + + W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); + + W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); + RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); + + W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); + RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); + + W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); + RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); + + W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); + RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); + + W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); + RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); + + W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); + RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); + + W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); + RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); + + W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); + RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); + + W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); + RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); + + W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); + RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); + + W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); + RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); + + W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); + RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); + + W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); + RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); + + W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); + RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); + + W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); + RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); + + W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); + RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); + +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef G +#undef H + + *state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); + *state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); +} + +__constant uint fixedW[64] = +{ + 0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794, + 0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f, + 0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c, + 0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa, + 0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012, + 0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4, + 0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848, +}; + +void SHA256_fixed(uint4*restrict state0,uint4*restrict state1) +{ + uint4 S0 = *state0; + uint4 S1 = *state1; + +#define A S0.x +#define B S0.y +#define C S0.z +#define D S0.w +#define E S1.x +#define F S1.y +#define G S1.z +#define H S1.w + + RND(A,B,C,D,E,F,G,H, fixedW[0]); + RND(H,A,B,C,D,E,F,G, fixedW[1]); + RND(G,H,A,B,C,D,E,F, fixedW[2]); + RND(F,G,H,A,B,C,D,E, fixedW[3]); + RND(E,F,G,H,A,B,C,D, fixedW[4]); + RND(D,E,F,G,H,A,B,C, fixedW[5]); + RND(C,D,E,F,G,H,A,B, fixedW[6]); + RND(B,C,D,E,F,G,H,A, fixedW[7]); + RND(A,B,C,D,E,F,G,H, fixedW[8]); + RND(H,A,B,C,D,E,F,G, fixedW[9]); + RND(G,H,A,B,C,D,E,F, fixedW[10]); + RND(F,G,H,A,B,C,D,E, fixedW[11]); + RND(E,F,G,H,A,B,C,D, fixedW[12]); + RND(D,E,F,G,H,A,B,C, fixedW[13]); + RND(C,D,E,F,G,H,A,B, fixedW[14]); + RND(B,C,D,E,F,G,H,A, fixedW[15]); + RND(A,B,C,D,E,F,G,H, fixedW[16]); + RND(H,A,B,C,D,E,F,G, fixedW[17]); + RND(G,H,A,B,C,D,E,F, fixedW[18]); + RND(F,G,H,A,B,C,D,E, fixedW[19]); + RND(E,F,G,H,A,B,C,D, fixedW[20]); + RND(D,E,F,G,H,A,B,C, fixedW[21]); + RND(C,D,E,F,G,H,A,B, fixedW[22]); + RND(B,C,D,E,F,G,H,A, fixedW[23]); + RND(A,B,C,D,E,F,G,H, fixedW[24]); + RND(H,A,B,C,D,E,F,G, fixedW[25]); + RND(G,H,A,B,C,D,E,F, fixedW[26]); + RND(F,G,H,A,B,C,D,E, fixedW[27]); + RND(E,F,G,H,A,B,C,D, fixedW[28]); + RND(D,E,F,G,H,A,B,C, fixedW[29]); + RND(C,D,E,F,G,H,A,B, fixedW[30]); + RND(B,C,D,E,F,G,H,A, fixedW[31]); + RND(A,B,C,D,E,F,G,H, fixedW[32]); + RND(H,A,B,C,D,E,F,G, fixedW[33]); + RND(G,H,A,B,C,D,E,F, fixedW[34]); + RND(F,G,H,A,B,C,D,E, fixedW[35]); + RND(E,F,G,H,A,B,C,D, fixedW[36]); + RND(D,E,F,G,H,A,B,C, fixedW[37]); + RND(C,D,E,F,G,H,A,B, fixedW[38]); + RND(B,C,D,E,F,G,H,A, fixedW[39]); + RND(A,B,C,D,E,F,G,H, fixedW[40]); + RND(H,A,B,C,D,E,F,G, fixedW[41]); + RND(G,H,A,B,C,D,E,F, fixedW[42]); + RND(F,G,H,A,B,C,D,E, fixedW[43]); + RND(E,F,G,H,A,B,C,D, fixedW[44]); + RND(D,E,F,G,H,A,B,C, fixedW[45]); + RND(C,D,E,F,G,H,A,B, fixedW[46]); + RND(B,C,D,E,F,G,H,A, fixedW[47]); + RND(A,B,C,D,E,F,G,H, fixedW[48]); + RND(H,A,B,C,D,E,F,G, fixedW[49]); + RND(G,H,A,B,C,D,E,F, fixedW[50]); + RND(F,G,H,A,B,C,D,E, fixedW[51]); + RND(E,F,G,H,A,B,C,D, fixedW[52]); + RND(D,E,F,G,H,A,B,C, fixedW[53]); + RND(C,D,E,F,G,H,A,B, fixedW[54]); + RND(B,C,D,E,F,G,H,A, fixedW[55]); + RND(A,B,C,D,E,F,G,H, fixedW[56]); + RND(H,A,B,C,D,E,F,G, fixedW[57]); + RND(G,H,A,B,C,D,E,F, fixedW[58]); + RND(F,G,H,A,B,C,D,E, fixedW[59]); + RND(E,F,G,H,A,B,C,D, fixedW[60]); + RND(D,E,F,G,H,A,B,C, fixedW[61]); + RND(C,D,E,F,G,H,A,B, fixedW[62]); + RND(B,C,D,E,F,G,H,A, fixedW[63]); + +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef G +#undef H + *state0 += S0; + *state1 += S1; +} + +void shittify(uint4 B[8]) +{ + uint4 tmp[4]; + tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w); + tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w); + tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w); + tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w); + +#pragma unroll + for(uint i=0; i<4; ++i) + B[i] = EndianSwap(tmp[i]); + + tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w); + tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w); + tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w); + tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w); + +#pragma unroll + for(uint i=0; i<4; ++i) + B[i+4] = EndianSwap(tmp[i]); +} + +void unshittify(uint4 B[8]) +{ + uint4 tmp[4]; + tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w); + tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w); + tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w); + tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w); + +#pragma unroll + for(uint i=0; i<4; ++i) + B[i] = EndianSwap(tmp[i]); + + tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w); + tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w); + tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w); + tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w); + +#pragma unroll + for(uint i=0; i<4; ++i) + B[i+4] = EndianSwap(tmp[i]); +} + +void salsa(uint4 B[8]) +{ + uint4 w[4]; + +#pragma unroll + for(uint i=0; i<4; ++i) + w[i] = (B[i]^=B[i+4]); + +#pragma unroll + for(uint i=0; i<4; ++i) + { + w[0] ^= rotl(w[3] +w[2] , 7U); + w[1] ^= rotl(w[0] +w[3] , 9U); + w[2] ^= rotl(w[1] +w[0] ,13U); + w[3] ^= rotl(w[2] +w[1] ,18U); + w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); + w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); + w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); + w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); + } + +#pragma unroll + for(uint i=0; i<4; ++i) + w[i] = (B[i+4]^=(B[i]+=w[i])); + +#pragma unroll + for(uint i=0; i<4; ++i) + { + w[0] ^= rotl(w[3] +w[2] , 7U); + w[1] ^= rotl(w[0] +w[3] , 9U); + w[2] ^= rotl(w[1] +w[0] ,13U); + w[3] ^= rotl(w[2] +w[1] ,18U); + w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); + w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); + w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); + w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); + } + +#pragma unroll + for(uint i=0; i<4; ++i) + B[i+4] += w[i]; +} + +#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE) +#define CO Coord(z,x,y) + +void scrypt_core(uint4 X[8], __global uint4*restrict lookup) +{ + shittify(X); + const uint zSIZE = 8; + const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0)); + const uint xSIZE = CONCURRENT_THREADS; + uint x = get_global_id(0)%xSIZE; + + for(uint y=0; y<1024/LOOKUP_GAP; ++y) + { +#pragma unroll + for(uint z=0; z