diff --git a/configure.ac b/configure.ac index 75f680df..8cf840bf 100644 --- a/configure.ac +++ b/configure.ac @@ -389,11 +389,11 @@ fi AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install]) -AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120223"], [Filename for phatk kernel]) -AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120327"], [Filename for poclbm kernel]) -AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120427"], [Filename for diakgcn kernel]) -AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120328"], [Filename for diablo kernel]) -AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120713"], [Filename for scrypt kernel]) +AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120724"], [Filename for phatk kernel]) +AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120724"], [Filename for poclbm kernel]) +AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120724"], [Filename for diakgcn kernel]) +AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120724"], [Filename for diablo kernel]) +AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120724"], [Filename for scrypt kernel]) AC_SUBST(OPENCL_LIBS) diff --git a/diablo120328.cl b/diablo120328.cl deleted file mode 100644 index 4b64c300..00000000 --- a/diablo120328.cl +++ /dev/null @@ -1,1274 +0,0 @@ -/* - * DiabloMiner - OpenCL miner for BitCoin - * Copyright (C) 2010, 2011, 2012 Patrick McFarland - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more detail). - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifdef VECTORS4 - typedef uint4 z; -#elif defined(VECTORS2) - typedef uint2 z; -#else - typedef uint z; -#endif - -#ifdef BITALIGN -#pragma OPENCL EXTENSION cl_amd_media_ops : enable -#define Zrotr(a, b) amd_bitalign((z)a, (z)a, (z)(32 - b)) -#else -#define Zrotr(a, b) rotate((z)a, (z)b) -#endif - -#ifdef BFI_INT -#define ZCh(a, b, c) amd_bytealign(a, b, c) -#define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a)) -#else -#define ZCh(a, b, c) bitselect((z)c, (z)b, (z)a) -#define ZMa(a, b, c) bitselect((z)a, (z)b, (z)c ^ (z)a) -#endif - -#define ZR25(n) ((Zrotr((n), 25) ^ Zrotr((n), 14) ^ ((n) >> 3U))) -#define ZR15(n) ((Zrotr((n), 15) ^ Zrotr((n), 13) ^ ((n) >> 10U))) -#define ZR26(n) ((Zrotr((n), 26) ^ Zrotr((n), 21) ^ Zrotr((n), 7))) -#define ZR30(n) ((Zrotr((n), 30) ^ Zrotr((n), 19) ^ Zrotr((n), 10))) - -__kernel -__attribute__((vec_type_hint(z))) -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -void search( -#ifndef GOFFSET - const z base, -#endif - const uint PreVal4_state0, const uint PreVal4_state0_k7, - const uint PreVal4_T1, - const uint W18, const uint W19, - const uint W16, const uint W17, - const uint W16_plus_K16, const uint W17_plus_K17, - const uint W31, const uint W32, - const uint d1, const uint b1, const uint c1, - const uint h1, const uint f1, const uint g1, - const uint c1_plus_k5, const uint b1_plus_k6, - const uint state0, const uint state1, const uint state2, const uint state3, - const uint state4, const uint state5, const uint state6, const uint state7, - __global uint * output) -{ - - z ZA[930]; - -#ifdef GOFFSET - const z Znonce = (uint)(get_global_id(0)); -#else - const z Znonce = base + (uint)(get_global_id(0)); -#endif - - ZA[15] = Znonce + PreVal4_state0; - - ZA[16] = (ZCh(ZA[15], b1, c1) + d1) + ZR26(ZA[15]); - ZA[26] = Znonce + PreVal4_T1; - - ZA[27] = ZMa(f1, g1, ZA[26]) + ZR30(ZA[26]); - ZA[17] = ZA[16] + h1; - - ZA[19] = (ZCh(ZA[17], ZA[15], b1) + c1_plus_k5) + ZR26(ZA[17]); - ZA[28] = ZA[27] + ZA[16]; - - ZA[548] = ZMa(ZA[26], f1, ZA[28]) + ZR30(ZA[28]); - ZA[20] = ZA[19] + g1; - - ZA[22] = (ZCh(ZA[20], ZA[17], ZA[15]) + b1_plus_k6) + ZR26(ZA[20]); - ZA[29] = ZA[548] + ZA[19]; - - ZA[549] = ZMa(ZA[28], ZA[26], ZA[29]) + ZR30(ZA[29]); - ZA[23] = ZA[22] + f1; - - ZA[24] = ZCh(ZA[23], ZA[20], ZA[17]) + ZR26(ZA[23]); - ZA[180] = Znonce + PreVal4_state0_k7; - ZA[30] = ZA[549] + ZA[22]; - - ZA[31] = ZMa(ZA[29], ZA[28], ZA[30]) + ZR30(ZA[30]); - ZA[181] = ZA[180] + ZA[24]; - - ZA[182] = ZA[181] + ZA[26]; - ZA[183] = ZA[181] + ZA[31]; - ZA[18] = ZA[17] + 0xd807aa98U; - - ZA[186] = (ZCh(ZA[182], ZA[23], ZA[20]) + ZA[18]) + ZR26(ZA[182]); - ZA[184] = ZMa(ZA[30], ZA[29], ZA[183]) + ZR30(ZA[183]); - - ZA[187] = ZA[186] + ZA[28]; - ZA[188] = ZA[186] + ZA[184]; - ZA[21] = ZA[20] + 0x12835b01U; - - ZA[191] = (ZCh(ZA[187], ZA[182], ZA[23]) + ZA[21]) + ZR26(ZA[187]); - ZA[189] = ZMa(ZA[183], ZA[30], ZA[188]) + ZR30(ZA[188]); - - ZA[192] = ZA[191] + ZA[29]; - ZA[193] = ZA[191] + ZA[189]; - ZA[25] = ZA[23] + 0x243185beU; - - ZA[196] = (ZCh(ZA[192], ZA[187], ZA[182]) + ZA[25]) + ZR26(ZA[192]); - ZA[194] = ZMa(ZA[188], ZA[183], ZA[193]) + ZR30(ZA[193]); - - ZA[197] = ZA[196] + ZA[30]; - ZA[198] = ZA[196] + ZA[194]; - ZA[185] = ZA[182] + 0x550c7dc3U; - - ZA[201] = (ZCh(ZA[197], ZA[192], ZA[187]) + ZA[185]) + ZR26(ZA[197]); - ZA[199] = ZMa(ZA[193], ZA[188], ZA[198]) + ZR30(ZA[198]); - - ZA[202] = ZA[201] + ZA[183]; - ZA[203] = ZA[201] + ZA[199]; - ZA[190] = ZA[187] + 0x72be5d74U; - - ZA[206] = (ZCh(ZA[202], ZA[197], ZA[192]) + ZA[190]) + ZR26(ZA[202]); - ZA[204] = ZMa(ZA[198], ZA[193], ZA[203]) + ZR30(ZA[203]); - - ZA[207] = ZA[206] + ZA[188]; - ZA[208] = ZA[206] + ZA[204]; - ZA[195] = ZA[192] + 0x80deb1feU; - - ZA[211] = (ZCh(ZA[207], ZA[202], ZA[197]) + ZA[195]) + ZR26(ZA[207]); - ZA[209] = ZMa(ZA[203], ZA[198], ZA[208]) + ZR30(ZA[208]); - - ZA[212] = ZA[193] + ZA[211]; - ZA[213] = ZA[211] + ZA[209]; - ZA[200] = ZA[197] + 0x9bdc06a7U; - - ZA[216] = (ZCh(ZA[212], ZA[207], ZA[202]) + ZA[200]) + ZR26(ZA[212]); - ZA[214] = ZMa(ZA[208], ZA[203], ZA[213]) + ZR30(ZA[213]); - - ZA[217] = ZA[198] + ZA[216]; - ZA[218] = ZA[216] + ZA[214]; - ZA[205] = ZA[202] + 0xc19bf3f4U; - - ZA[220] = (ZCh(ZA[217], ZA[212], ZA[207]) + ZA[205]) + ZR26(ZA[217]); - ZA[219] = ZMa(ZA[213], ZA[208], ZA[218]) + ZR30(ZA[218]); - - ZA[222] = ZA[203] + ZA[220]; - ZA[223] = ZA[220] + ZA[219]; - ZA[210] = ZA[207] + W16_plus_K16; - - ZA[226] = (ZCh(ZA[222], ZA[217], ZA[212]) + ZA[210]) + ZR26(ZA[222]); - ZA[225] = ZMa(ZA[218], ZA[213], ZA[223]) + ZR30(ZA[223]); - - ZA[0] = ZR25(Znonce) + W18; - ZA[228] = ZA[226] + ZA[225]; - ZA[227] = ZA[208] + ZA[226]; - ZA[215] = ZA[212] + W17_plus_K17; - - ZA[231] = (ZCh(ZA[227], ZA[222], ZA[217]) + ZA[215]) + ZR26(ZA[227]); - ZA[229] = ZMa(ZA[223], ZA[218], ZA[228]) + ZR30(ZA[228]); - ZA[1] = ZA[0] + 0x0fc19dc6U; - - ZA[232] = ZA[213] + ZA[231]; - ZA[233] = ZA[231] + ZA[229]; - ZA[221] = ZA[217] + ZA[1]; - ZA[32] = Znonce + W19; - - ZA[236] = (ZCh(ZA[232], ZA[227], ZA[222]) + ZA[221]) + ZR26(ZA[232]); - ZA[234] = ZMa(ZA[228], ZA[223], ZA[233]) + ZR30(ZA[233]); - ZA[33] = ZA[32] + 0x240ca1ccU; - - ZA[3] = ZR15(ZA[0]) + 0x80000000U; - ZA[238] = ZA[236] + ZA[234]; - ZA[237] = ZA[218] + ZA[236]; - ZA[224] = ZA[222] + ZA[33]; - - ZA[241] = (ZCh(ZA[237], ZA[232], ZA[227]) + ZA[224]) + ZR26(ZA[237]); - ZA[239] = ZMa(ZA[233], ZA[228], ZA[238]) + ZR30(ZA[238]); - ZA[4] = ZA[3] + 0x2de92c6fU; - - ZA[35] = ZR15(ZA[32]); - ZA[243] = ZA[241] + ZA[239]; - ZA[242] = ZA[223] + ZA[241]; - ZA[230] = ZA[227] + ZA[4]; - - ZA[246] = (ZCh(ZA[242], ZA[237], ZA[232]) + ZA[230]) + ZR26(ZA[242]); - ZA[244] = ZMa(ZA[238], ZA[233], ZA[243]) + ZR30(ZA[243]); - ZA[36] = ZA[35] + 0x4a7484aaU; - - ZA[7] = ZR15(ZA[3]) + 0x00000280U; - ZA[248] = ZA[246] + ZA[244]; - ZA[247] = ZA[228] + ZA[246]; - ZA[235] = ZA[232] + ZA[36]; - - ZA[251] = (ZCh(ZA[247], ZA[242], ZA[237]) + ZA[235]) + ZR26(ZA[247]); - ZA[249] = ZMa(ZA[243], ZA[238], ZA[248]) + ZR30(ZA[248]); - ZA[8] = ZA[7] + 0x5cb0a9dcU; - - ZA[38] = ZR15(ZA[35]) + W16; - ZA[253] = ZA[251] + ZA[249]; - ZA[252] = ZA[233] + ZA[251]; - ZA[240] = ZA[237] + ZA[8]; - - ZA[256] = (ZCh(ZA[252], ZA[247], ZA[242]) + ZA[240]) + ZR26(ZA[252]); - ZA[254] = ZMa(ZA[248], ZA[243], ZA[253]) + ZR30(ZA[253]); - ZA[40] = ZA[38] + 0x76f988daU; - - ZA[10] = ZR15(ZA[7]) + W17; - ZA[258] = ZA[256] + ZA[254]; - ZA[257] = ZA[238] + ZA[256]; - ZA[245] = ZA[242] + ZA[40]; - - ZA[261] = (ZCh(ZA[257], ZA[252], ZA[247]) + ZA[245]) + ZR26(ZA[257]); - ZA[259] = ZMa(ZA[253], ZA[248], ZA[258]) + ZR30(ZA[258]); - ZA[13] = ZA[10] + 0x983e5152U; - - ZA[43] = ZR15(ZA[38]) + ZA[0]; - ZA[263] = ZA[261] + ZA[259]; - ZA[262] = ZA[243] + ZA[261]; - ZA[250] = ZA[247] + ZA[13]; - - ZA[266] = (ZCh(ZA[262], ZA[257], ZA[252]) + ZA[250]) + ZR26(ZA[262]); - ZA[264] = ZMa(ZA[258], ZA[253], ZA[263]) + ZR30(ZA[263]); - ZA[11] = ZR15(ZA[10]); - ZA[45] = ZA[43] + 0xa831c66dU; - - ZA[52] = ZA[11] + ZA[32]; - ZA[267] = ZA[248] + ZA[266]; - ZA[255] = ZA[252] + ZA[45]; - ZA[268] = ZA[266] + ZA[264]; - - ZA[271] = (ZCh(ZA[267], ZA[262], ZA[257]) + ZA[255]) + ZR26(ZA[267]); - ZA[269] = ZMa(ZA[263], ZA[258], ZA[268]) + ZR30(ZA[268]); - ZA[54] = ZA[52] + 0xb00327c8U; - - ZA[48] = ZR15(ZA[43]) + ZA[3]; - ZA[273] = ZA[271] + ZA[269]; - ZA[272] = ZA[253] + ZA[271]; - ZA[260] = ZA[257] + ZA[54]; - - ZA[276] = (ZCh(ZA[272], ZA[267], ZA[262]) + ZA[260]) + ZR26(ZA[272]); - ZA[274] = ZMa(ZA[268], ZA[263], ZA[273]) + ZR30(ZA[273]); - ZA[49] = ZA[48] + 0xbf597fc7U; - - ZA[61] = ZR15(ZA[52]) + ZA[35]; - ZA[278] = ZA[276] + ZA[274]; - ZA[277] = ZA[258] + ZA[276]; - ZA[265] = ZA[262] + ZA[49]; - - ZA[281] = (ZCh(ZA[277], ZA[272], ZA[267]) + ZA[265]) + ZR26(ZA[277]); - ZA[279] = ZMa(ZA[273], ZA[268], ZA[278]) + ZR30(ZA[278]); - ZA[62] = ZA[61] + 0xc6e00bf3U; - - ZA[53] = ZR15(ZA[48]) + ZA[7]; - ZA[283] = ZA[281] + ZA[279]; - ZA[282] = ZA[263] + ZA[281]; - ZA[270] = ZA[267] + ZA[62]; - - ZA[286] = (ZCh(ZA[282], ZA[277], ZA[272]) + ZA[270]) + ZR26(ZA[282]); - ZA[284] = ZMa(ZA[278], ZA[273], ZA[283]) + ZR30(ZA[283]); - ZA[39] = ZA[38] + 0x00A00055U; - ZA[55] = ZA[53] + 0xd5a79147U; - - ZA[66] = ZR15(ZA[61]) + ZA[39]; - ZA[288] = ZA[286] + ZA[284]; - ZA[287] = ZA[268] + ZA[286]; - ZA[275] = ZA[272] + ZA[55]; - - ZA[291] = (ZCh(ZA[287], ZA[282], ZA[277]) + ZA[275]) + ZR26(ZA[287]); - ZA[289] = ZMa(ZA[283], ZA[278], ZA[288]) + ZR30(ZA[288]); - ZA[12] = ZA[10] + W31; - ZA[68] = ZA[66] + 0x06ca6351U; - - ZA[67] = ZR15(ZA[53]) + ZA[12]; - ZA[293] = ZA[291] + ZA[289]; - ZA[292] = ZA[273] + ZA[291]; - ZA[280] = ZA[277] + ZA[68]; - - ZA[296] = (ZCh(ZA[292], ZA[287], ZA[282]) + ZA[280]) + ZR26(ZA[292]); - ZA[294] = ZMa(ZA[288], ZA[283], ZA[293]) + ZR30(ZA[293]); - ZA[2] = ZR25(ZA[0]); - ZA[69] = ZA[67] + 0x14292967U; - ZA[44] = ZA[43] + W32; - - ZA[75] = ZR15(ZA[66]) + ZA[44]; - ZA[298] = ZA[296] + ZA[294]; - ZA[297] = ZA[278] + ZA[296]; - ZA[285] = ZA[282] + ZA[69]; - ZA[5] = ZA[2] + W17; - - ZA[301] = (ZCh(ZA[297], ZA[292], ZA[287]) + ZA[285]) + ZR26(ZA[297]); - ZA[299] = ZMa(ZA[293], ZA[288], ZA[298]) + ZR30(ZA[298]); - ZA[56] = ZA[52] + ZA[5]; - ZA[76] = ZA[75] + 0x27b70a85U; - - ZA[34] = ZR25(ZA[32]) + ZA[0]; - ZA[70] = ZR15(ZA[67]) + ZA[56]; - ZA[302] = ZA[283] + ZA[301]; - ZA[303] = ZA[301] + ZA[299]; - ZA[290] = ZA[287] + ZA[76]; - - ZA[306] = (ZCh(ZA[302], ZA[297], ZA[292]) + ZA[290]) + ZR26(ZA[302]); - ZA[304] = ZMa(ZA[298], ZA[293], ZA[303]) + ZR30(ZA[303]); - ZA[6] = ZR25(ZA[3]); - ZA[77] = ZA[70] + 0x2e1b2138U; - ZA[50] = ZA[34] + ZA[48]; - - ZA[78] = ZR15(ZA[75]) + ZA[50]; - ZA[308] = ZA[306] + ZA[304]; - ZA[307] = ZA[288] + ZA[306]; - ZA[295] = ZA[292] + ZA[77]; - ZA[41] = ZA[32] + ZA[6]; - - ZA[311] = (ZCh(ZA[307], ZA[302], ZA[297]) + ZA[295]) + ZR26(ZA[307]); - ZA[309] = ZMa(ZA[303], ZA[298], ZA[308]) + ZR30(ZA[308]); - ZA[63] = ZA[41] + ZA[61]; - ZA[85] = ZA[78] + 0x4d2c6dfcU; - - ZA[37] = ZR25(ZA[35]) + ZA[3]; - ZA[79] = ZR15(ZA[70]) + ZA[63]; - ZA[312] = ZA[293] + ZA[311]; - ZA[313] = ZA[311] + ZA[309]; - ZA[300] = ZA[297] + ZA[85]; - - ZA[316] = (ZCh(ZA[312], ZA[307], ZA[302]) + ZA[300]) + ZR26(ZA[312]); - ZA[314] = ZMa(ZA[308], ZA[303], ZA[313]) + ZR30(ZA[313]); - ZA[9] = ZR25(ZA[7]); - ZA[86] = ZA[79] + 0x53380d13U; - ZA[57] = ZA[37] + ZA[53]; - - ZA[87] = ZR15(ZA[78]) + ZA[57]; - ZA[318] = ZA[316] + ZA[314]; - ZA[317] = ZA[298] + ZA[316]; - ZA[305] = ZA[302] + ZA[86]; - ZA[46] = ZA[35] + ZA[9]; - - ZA[321] = (ZCh(ZA[317], ZA[312], ZA[307]) + ZA[305]) + ZR26(ZA[317]); - ZA[319] = ZMa(ZA[313], ZA[308], ZA[318]) + ZR30(ZA[318]); - ZA[71] = ZA[46] + ZA[66]; - ZA[92] = ZA[87] + 0x650a7354U; - - ZA[42] = ZR25(ZA[38]) + ZA[7]; - ZA[88] = ZR15(ZA[79]) + ZA[71]; - ZA[322] = ZA[303] + ZA[321]; - ZA[323] = ZA[321] + ZA[319]; - ZA[310] = ZA[307] + ZA[92]; - - ZA[326] = (ZCh(ZA[322], ZA[317], ZA[312]) + ZA[310]) + ZR26(ZA[322]); - ZA[324] = ZMa(ZA[318], ZA[313], ZA[323]) + ZR30(ZA[323]); - ZA[14] = ZR25(ZA[10]); - ZA[93] = ZA[88] + 0x766a0abbU; - ZA[72] = ZA[42] + ZA[67]; - - ZA[94] = ZR15(ZA[87]) + ZA[72]; - ZA[328] = ZA[326] + ZA[324]; - ZA[327] = ZA[308] + ZA[326]; - ZA[315] = ZA[312] + ZA[93]; - ZA[51] = ZA[38] + ZA[14]; - - ZA[331] = (ZCh(ZA[327], ZA[322], ZA[317]) + ZA[315]) + ZR26(ZA[327]); - ZA[329] = ZMa(ZA[323], ZA[318], ZA[328]) + ZR30(ZA[328]); - ZA[80] = ZA[51] + ZA[75]; - ZA[100] = ZA[94] + 0x81c2c92eU; - - ZA[47] = ZR25(ZA[43]) + ZA[10]; - ZA[95] = ZR15(ZA[88]) + ZA[80]; - ZA[332] = ZA[313] + ZA[331]; - ZA[333] = ZA[331] + ZA[329]; - ZA[320] = ZA[317] + ZA[100]; - - ZA[336] = (ZCh(ZA[332], ZA[327], ZA[322]) + ZA[320]) + ZR26(ZA[332]); - ZA[334] = ZMa(ZA[328], ZA[323], ZA[333]) + ZR30(ZA[333]); - ZA[81] = ZA[47] + ZA[70]; - ZA[101] = ZA[95] + 0x92722c85U; - - ZA[58] = ZR25(ZA[52]) + ZA[43]; - ZA[102] = ZR15(ZA[94]) + ZA[81]; - ZA[337] = ZA[318] + ZA[336]; - ZA[338] = ZA[336] + ZA[334]; - ZA[325] = ZA[322] + ZA[101]; - - ZA[341] = (ZCh(ZA[337], ZA[332], ZA[327]) + ZA[325]) + ZR26(ZA[337]); - ZA[339] = ZMa(ZA[333], ZA[328], ZA[338]) + ZR30(ZA[338]); - ZA[89] = ZA[58] + ZA[78]; - ZA[108] = ZA[102] + 0xa2bfe8a1U; - - ZA[59] = ZR25(ZA[48]) + ZA[52]; - ZA[103] = ZR15(ZA[95]) + ZA[89]; - ZA[342] = ZA[323] + ZA[341]; - ZA[343] = ZA[341] + ZA[339]; - ZA[330] = ZA[327] + ZA[108]; - - ZA[346] = (ZCh(ZA[342], ZA[337], ZA[332]) + ZA[330]) + ZR26(ZA[342]); - ZA[344] = ZMa(ZA[338], ZA[333], ZA[343]) + ZR30(ZA[343]); - ZA[90] = ZA[59] + ZA[79]; - ZA[109] = ZA[103] + 0xa81a664bU; - - ZA[64] = ZR25(ZA[61]) + ZA[48]; - ZA[110] = ZR15(ZA[102]) + ZA[90]; - ZA[347] = ZA[328] + ZA[346]; - ZA[348] = ZA[346] + ZA[344]; - ZA[335] = ZA[332] + ZA[109]; - - ZA[351] = (ZCh(ZA[347], ZA[342], ZA[337]) + ZA[335]) + ZR26(ZA[347]); - ZA[349] = ZMa(ZA[343], ZA[338], ZA[348]) + ZR30(ZA[348]); - ZA[60] = ZR25(ZA[53]); - ZA[116] = ZA[110] + 0xc24b8b70U; - ZA[96] = ZA[87] + ZA[64]; - - ZA[111] = ZR15(ZA[103]) + ZA[96]; - ZA[353] = ZA[351] + ZA[349]; - ZA[352] = ZA[333] + ZA[351]; - ZA[340] = ZA[337] + ZA[116]; - ZA[65] = ZA[60] + ZA[61]; - - ZA[356] = (ZCh(ZA[352], ZA[347], ZA[342]) + ZA[340]) + ZR26(ZA[352]); - ZA[354] = ZMa(ZA[348], ZA[343], ZA[353]) + ZR30(ZA[353]); - ZA[97] = ZA[88] + ZA[65]; - ZA[117] = ZA[111] + 0xc76c51a3U; - - ZA[73] = ZR25(ZA[66]) + ZA[53]; - ZA[118] = ZR15(ZA[110]) + ZA[97]; - ZA[357] = ZA[338] + ZA[356]; - ZA[358] = ZA[356] + ZA[354]; - ZA[345] = ZA[342] + ZA[117]; - - ZA[361] = (ZCh(ZA[357], ZA[352], ZA[347]) + ZA[345]) + ZR26(ZA[357]); - ZA[359] = ZMa(ZA[353], ZA[348], ZA[358]) + ZR30(ZA[358]); - ZA[104] = ZA[73] + ZA[94]; - ZA[124] = ZA[118] + 0xd192e819U; - - ZA[74] = ZR25(ZA[67]) + ZA[66]; - ZA[119] = ZR15(ZA[111]) + ZA[104]; - ZA[362] = ZA[343] + ZA[361]; - ZA[363] = ZA[361] + ZA[359]; - ZA[350] = ZA[347] + ZA[124]; - - ZA[366] = (ZCh(ZA[362], ZA[357], ZA[352]) + ZA[350]) + ZR26(ZA[362]); - ZA[364] = ZMa(ZA[358], ZA[353], ZA[363]) + ZR30(ZA[363]); - ZA[105] = ZA[74] + ZA[95]; - ZA[125] = ZA[119] + 0xd6990624U; - - ZA[82] = ZR25(ZA[75]) + ZA[67]; - ZA[126] = ZR15(ZA[118]) + ZA[105]; - ZA[367] = ZA[348] + ZA[366]; - ZA[368] = ZA[366] + ZA[364]; - ZA[355] = ZA[352] + ZA[125]; - - ZA[371] = (ZCh(ZA[367], ZA[362], ZA[357]) + ZA[355]) + ZR26(ZA[367]); - ZA[369] = ZMa(ZA[363], ZA[358], ZA[368]) + ZR30(ZA[368]); - ZA[112] = ZA[102] + ZA[82]; - ZA[132] = ZA[126] + 0xf40e3585U; - - ZA[83] = ZR25(ZA[70]) + ZA[75]; - ZA[127] = ZR15(ZA[119]) + ZA[112]; - ZA[372] = ZA[353] + ZA[371]; - ZA[373] = ZA[371] + ZA[369]; - ZA[360] = ZA[357] + ZA[132]; - - ZA[376] = (ZCh(ZA[372], ZA[367], ZA[362]) + ZA[360]) + ZR26(ZA[372]); - ZA[374] = ZMa(ZA[368], ZA[363], ZA[373]) + ZR30(ZA[373]); - ZA[113] = ZA[103] + ZA[83]; - ZA[133] = ZA[127] + 0x106aa070U; - - ZA[84] = ZR25(ZA[78]) + ZA[70]; - ZA[134] = ZR15(ZA[126]) + ZA[113]; - ZA[377] = ZA[358] + ZA[376]; - ZA[378] = ZA[376] + ZA[374]; - ZA[365] = ZA[362] + ZA[133]; - - ZA[381] = (ZCh(ZA[377], ZA[372], ZA[367]) + ZA[365]) + ZR26(ZA[377]); - ZA[379] = ZMa(ZA[373], ZA[368], ZA[378]) + ZR30(ZA[378]); - ZA[120] = ZA[110] + ZA[84]; - ZA[140] = ZA[134] + 0x19a4c116U; - - ZA[91] = ZR25(ZA[79]) + ZA[78]; - ZA[135] = ZR15(ZA[127]) + ZA[120]; - ZA[382] = ZA[363] + ZA[381]; - ZA[383] = ZA[381] + ZA[379]; - ZA[370] = ZA[367] + ZA[140]; - - ZA[386] = (ZCh(ZA[382], ZA[377], ZA[372]) + ZA[370]) + ZR26(ZA[382]); - ZA[384] = ZMa(ZA[378], ZA[373], ZA[383]) + ZR30(ZA[383]); - ZA[121] = ZA[111] + ZA[91]; - ZA[141] = ZA[135] + 0x1e376c08U; - - ZA[98] = ZR25(ZA[87]) + ZA[79]; - ZA[142] = ZR15(ZA[134]) + ZA[121]; - ZA[387] = ZA[368] + ZA[386]; - ZA[388] = ZA[386] + ZA[384]; - ZA[375] = ZA[372] + ZA[141]; - - ZA[391] = (ZCh(ZA[387], ZA[382], ZA[377]) + ZA[375]) + ZR26(ZA[387]); - ZA[389] = ZMa(ZA[383], ZA[378], ZA[388]) + ZR30(ZA[388]); - ZA[128] = ZA[118] + ZA[98]; - ZA[147] = ZA[142] + 0x2748774cU; - - ZA[99] = ZR25(ZA[88]) + ZA[87]; - ZA[143] = ZR15(ZA[135]) + ZA[128]; - ZA[392] = ZA[373] + ZA[391]; - ZA[393] = ZA[391] + ZA[389]; - ZA[380] = ZA[377] + ZA[147]; - - ZA[396] = (ZCh(ZA[392], ZA[387], ZA[382]) + ZA[380]) + ZR26(ZA[392]); - ZA[394] = ZMa(ZA[388], ZA[383], ZA[393]) + ZR30(ZA[393]); - ZA[129] = ZA[119] + ZA[99]; - ZA[148] = ZA[143] + 0x34b0bcb5U; - - ZA[106] = ZR25(ZA[94]) + ZA[88]; - ZA[149] = ZR15(ZA[142]) + ZA[129]; - ZA[397] = ZA[378] + ZA[396]; - ZA[398] = ZA[396] + ZA[394]; - ZA[385] = ZA[382] + ZA[148]; - - ZA[401] = (ZCh(ZA[397], ZA[392], ZA[387]) + ZA[385]) + ZR26(ZA[397]); - ZA[399] = ZMa(ZA[393], ZA[388], ZA[398]) + ZR30(ZA[398]); - ZA[136] = ZA[126] + ZA[106]; - ZA[153] = ZA[149] + 0x391c0cb3U; - - ZA[107] = ZR25(ZA[95]) + ZA[94]; - ZA[150] = ZR15(ZA[143]) + ZA[136]; - ZA[402] = ZA[383] + ZA[401]; - ZA[403] = ZA[401] + ZA[399]; - ZA[390] = ZA[387] + ZA[153]; - - ZA[406] = (ZCh(ZA[402], ZA[397], ZA[392]) + ZA[390]) + ZR26(ZA[402]); - ZA[404] = ZMa(ZA[398], ZA[393], ZA[403]) + ZR30(ZA[403]); - ZA[137] = ZA[127] + ZA[107]; - ZA[154] = ZA[150] + 0x4ed8aa4aU; - - ZA[114] = ZR25(ZA[102]) + ZA[95]; - ZA[155] = ZR15(ZA[149]) + ZA[137]; - ZA[407] = ZA[388] + ZA[406]; - ZA[408] = ZA[406] + ZA[404]; - ZA[395] = ZA[392] + ZA[154]; - - ZA[411] = (ZCh(ZA[407], ZA[402], ZA[397]) + ZA[395]) + ZR26(ZA[407]); - ZA[409] = ZMa(ZA[403], ZA[398], ZA[408]) + ZR30(ZA[408]); - ZA[144] = ZA[134] + ZA[114]; - ZA[159] = ZA[155] + 0x5b9cca4fU; - - ZA[115] = ZR25(ZA[103]) + ZA[102]; - ZA[156] = ZR15(ZA[150]) + ZA[144]; - ZA[412] = ZA[393] + ZA[411]; - ZA[413] = ZA[411] + ZA[409]; - ZA[400] = ZA[397] + ZA[159]; - - ZA[416] = (ZCh(ZA[412], ZA[407], ZA[402]) + ZA[400]) + ZR26(ZA[412]); - ZA[414] = ZMa(ZA[408], ZA[403], ZA[413]) + ZR30(ZA[413]); - ZA[145] = ZA[135] + ZA[115]; - ZA[160] = ZA[156] + 0x682e6ff3U; - - ZA[122] = ZR25(ZA[110]) + ZA[103]; - ZA[161] = ZR15(ZA[155]) + ZA[145]; - ZA[417] = ZA[398] + ZA[416]; - ZA[418] = ZA[416] + ZA[414]; - ZA[405] = ZA[402] + ZA[160]; - - ZA[421] = (ZCh(ZA[417], ZA[412], ZA[407]) + ZA[405]) + ZR26(ZA[417]); - ZA[419] = ZMa(ZA[413], ZA[408], ZA[418]) + ZR30(ZA[418]); - ZA[151] = ZA[142] + ZA[122]; - ZA[165] = ZA[161] + 0x748f82eeU; - - ZA[123] = ZR25(ZA[111]) + ZA[110]; - ZA[162] = ZR15(ZA[156]) + ZA[151]; - ZA[422] = ZA[403] + ZA[421]; - ZA[423] = ZA[421] + ZA[419]; - ZA[410] = ZA[407] + ZA[165]; - - ZA[426] = (ZCh(ZA[422], ZA[417], ZA[412]) + ZA[410]) + ZR26(ZA[422]); - ZA[424] = ZMa(ZA[418], ZA[413], ZA[423]) + ZR30(ZA[423]); - ZA[152] = ZA[143] + ZA[123]; - ZA[166] = ZA[162] + 0x78a5636fU; - - ZA[130] = ZR25(ZA[118]) + ZA[111]; - ZA[167] = ZR15(ZA[161]) + ZA[152]; - ZA[427] = ZA[408] + ZA[426]; - ZA[428] = ZA[426] + ZA[424]; - ZA[415] = ZA[412] + ZA[166]; - - ZA[431] = (ZCh(ZA[427], ZA[422], ZA[417]) + ZA[415]) + ZR26(ZA[427]); - ZA[429] = ZMa(ZA[423], ZA[418], ZA[428]) + ZR30(ZA[428]); - ZA[157] = ZA[149] + ZA[130]; - ZA[170] = ZA[167] + 0x84c87814U; - - ZA[131] = ZR25(ZA[119]) + ZA[118]; - ZA[168] = ZR15(ZA[162]) + ZA[157]; - ZA[432] = ZA[413] + ZA[431]; - ZA[433] = ZA[431] + ZA[429]; - ZA[420] = ZA[417] + ZA[170]; - - ZA[436] = (ZCh(ZA[432], ZA[427], ZA[422]) + ZA[420]) + ZR26(ZA[432]); - ZA[434] = ZMa(ZA[428], ZA[423], ZA[433]) + ZR30(ZA[433]); - ZA[158] = ZA[150] + ZA[131]; - ZA[171] = ZA[168] + 0x8cc70208U; - - ZA[138] = ZR25(ZA[126]) + ZA[119]; - ZA[172] = ZR15(ZA[167]) + ZA[158]; - ZA[437] = ZA[418] + ZA[436]; - ZA[438] = ZA[436] + ZA[434]; - ZA[425] = ZA[422] + ZA[171]; - - ZA[441] = (ZCh(ZA[437], ZA[432], ZA[427]) + ZA[425]) + ZR26(ZA[437]); - ZA[439] = ZMa(ZA[433], ZA[428], ZA[438]) + ZR30(ZA[438]); - ZA[163] = ZA[155] + ZA[138]; - ZA[174] = ZA[172] + 0x90befffaU; - - ZA[139] = ZR25(ZA[127]) + ZA[126]; - ZA[173] = ZR15(ZA[168]) + ZA[163]; - ZA[442] = ZA[423] + ZA[441]; - ZA[443] = ZA[441] + ZA[439]; - ZA[430] = ZA[427] + ZA[174]; - - ZA[445] = (ZCh(ZA[442], ZA[437], ZA[432]) + ZA[430]) + ZR26(ZA[442]); - ZA[444] = ZMa(ZA[438], ZA[433], ZA[443]) + ZR30(ZA[443]); - ZA[164] = ZA[156] + ZA[139]; - ZA[175] = ZA[173] + 0xa4506cebU; - - ZA[146] = ZR25(ZA[134]) + ZA[127]; - ZA[176] = ZR15(ZA[172]) + ZA[164]; - ZA[446] = ZA[428] + ZA[445]; - ZA[447] = ZA[445] + ZA[444]; - ZA[435] = ZA[432] + ZA[175]; - - ZA[449] = (ZCh(ZA[446], ZA[442], ZA[437]) + ZA[435]) + ZR26(ZA[446]); - ZA[448] = ZMa(ZA[443], ZA[438], ZA[447]) + ZR30(ZA[447]); - ZA[169] = ZA[161] + ZA[146]; - ZA[178] = ZA[176] + 0xbef9a3f7U; - - ZA[177] = ZR15(ZA[173]) + ZA[169]; - ZA[451] = ZA[449] + ZA[448]; - ZA[450] = ZA[433] + ZA[449]; - ZA[440] = ZA[437] + ZA[178]; - - ZA[453] = (ZCh(ZA[450], ZA[446], ZA[442]) + ZA[440]) + ZR26(ZA[450]); - ZA[452] = ZMa(ZA[447], ZA[443], ZA[451]) + ZR30(ZA[451]); - ZA[179] = ZA[177] + 0xc67178f2U; - - ZA[454] = ZA[438] + ZA[453]; - ZA[494] = ZA[442] + ZA[179]; - ZA[455] = ZA[453] + ZA[452]; - - ZA[457] = (ZCh(ZA[454], ZA[450], ZA[446]) + ZA[494]) + ZR26(ZA[454]); - ZA[456] = ZMa(ZA[451], ZA[447], ZA[455]) + ZR30(ZA[455]); - - ZA[459] = ZA[457] + ZA[456]; - - ZA[461] = ZA[455] + state1; - ZA[460] = ZA[459] + state0; - - ZA[495] = ZA[460] + 0x98c7e2a2U; - ZA[469] = ZA[461] + 0x90bb1e3cU; - - ZA[498] = (ZCh(ZA[495], 0x510e527fU, 0x9b05688cU) + ZA[469]) + ZR26(ZA[495]); - ZA[462] = ZA[451] + state2; - - ZA[496] = ZA[460] + 0xfc08884dU; - ZA[506] = ZA[498] + 0x3c6ef372U; - ZA[470] = ZA[462] + 0x50c6645bU; - - ZA[507] = (ZCh(ZA[506], ZA[495], 0x510e527fU) + ZA[470]) + ZR26(ZA[506]); - ZA[500] = ZMa(0x6a09e667U, 0xbb67ae85U, ZA[496]) + ZR30(ZA[496]); - ZA[463] = ZA[447] + state3; - - ZA[458] = ZA[443] + ZA[457]; - ZA[499] = ZA[498] + ZA[500]; - ZA[508] = ZA[507] + 0xbb67ae85U; - ZA[473] = ZA[463] + 0x3ac42e24U; - - ZA[510] = (ZCh(ZA[508], ZA[506], ZA[495]) + ZA[473]) + ZR26(ZA[508]); - ZA[928] = ZMa(ZA[496], 0x6a09e667U, ZA[499]) + ZR30(ZA[499]); - ZA[464] = ZA[458] + state4; - - ZA[476] = ZA[464] + ZA[460] + 0xd21ea4fdU; - ZA[511] = ZA[510] + 0x6a09e667U; - ZA[509] = ZA[928] + ZA[507]; - ZA[465] = ZA[454] + state5; - - ZA[514] = (ZCh(ZA[511], ZA[508], ZA[506]) + ZA[476]) + ZR26(ZA[511]); - ZA[512] = ZMa(ZA[499], ZA[496], ZA[509]) + ZR30(ZA[509]); - ZA[478] = ZA[465] + 0x59f111f1U; - - ZA[519] = ZA[506] + ZA[478]; - ZA[516] = ZA[496] + ZA[514]; - ZA[513] = ZA[510] + ZA[512]; - ZA[466] = ZA[450] + state6; - - ZA[520] = (ZCh(ZA[516], ZA[511], ZA[508]) + ZA[519]) + ZR26(ZA[516]); - ZA[515] = ZMa(ZA[509], ZA[499], ZA[513]) + ZR30(ZA[513]); - ZA[480] = ZA[466] + 0x923f82a4U; - - ZA[524] = ZA[508] + ZA[480]; - ZA[521] = ZA[499] + ZA[520]; - ZA[517] = ZA[514] + ZA[515]; - ZA[467] = ZA[446] + state7; - - ZA[525] = (ZCh(ZA[521], ZA[516], ZA[511]) + ZA[524]) + ZR26(ZA[521]); - ZA[522] = ZMa(ZA[513], ZA[509], ZA[517]) + ZR30(ZA[517]); - ZA[484] = ZA[467] + 0xab1c5ed5U; - - ZA[529] = ZA[511] + ZA[484]; - ZA[526] = ZA[509] + ZA[525]; - ZA[523] = ZA[520] + ZA[522]; - - ZA[530] = (ZCh(ZA[526], ZA[521], ZA[516]) + ZA[529]) + ZR26(ZA[526]); - ZA[550] = ZMa(ZA[517], ZA[513], ZA[523]) + ZR30(ZA[523]); - - ZA[531] = ZA[513] + ZA[530]; - ZA[533] = ZA[516] + 0x5807aa98U; - ZA[527] = ZA[550] + ZA[525]; - - ZA[534] = (ZCh(ZA[531], ZA[526], ZA[521]) + ZA[533]) + ZR26(ZA[531]); - ZA[551] = ZMa(ZA[523], ZA[517], ZA[527]) + ZR30(ZA[527]); - - ZA[535] = ZA[517] + ZA[534]; - ZA[538] = ZA[521] + 0x12835b01U; - ZA[532] = ZA[551] + ZA[530]; - - ZA[539] = (ZCh(ZA[535], ZA[531], ZA[526]) + ZA[538]) + ZR26(ZA[535]); - ZA[552] = ZMa(ZA[527], ZA[523], ZA[532]) + ZR30(ZA[532]); - - ZA[540] = ZA[523] + ZA[539]; - ZA[542] = ZA[526] + 0x243185beU; - ZA[536] = ZA[552] + ZA[534]; - - ZA[543] = (ZCh(ZA[540], ZA[535], ZA[531]) + ZA[542]) + ZR26(ZA[540]); - ZA[553] = ZMa(ZA[532], ZA[527], ZA[536]) + ZR30(ZA[536]); - - ZA[544] = ZA[527] + ZA[543]; - ZA[555] = ZA[531] + 0x550c7dc3U; - ZA[541] = ZA[553] + ZA[539]; - - ZA[558] = (ZCh(ZA[544], ZA[540], ZA[535]) + ZA[555]) + ZR26(ZA[544]); - ZA[547] = ZMa(ZA[536], ZA[532], ZA[541]) + ZR30(ZA[541]); - - ZA[559] = ZA[532] + ZA[558]; - ZA[556] = ZA[535] + 0x72be5d74U; - ZA[545] = ZA[547] + ZA[543]; - - ZA[562] = (ZCh(ZA[559], ZA[544], ZA[540]) + ZA[556]) + ZR26(ZA[559]); - ZA[561] = ZMa(ZA[541], ZA[536], ZA[545]) + ZR30(ZA[545]); - - ZA[563] = ZA[536] + ZA[562]; - ZA[560] = ZA[561] + ZA[558]; - ZA[557] = ZA[540] + 0x80deb1feU; - - ZA[568] = (ZCh(ZA[563], ZA[559], ZA[544]) + ZA[557]) + ZR26(ZA[563]); - ZA[564] = ZMa(ZA[545], ZA[541], ZA[560]) + ZR30(ZA[560]); - - ZA[569] = ZA[541] + ZA[568]; - ZA[572] = ZA[544] + 0x9bdc06a7U; - ZA[565] = ZA[562] + ZA[564]; - - ZA[574] = (ZCh(ZA[569], ZA[563], ZA[559]) + ZA[572]) + ZR26(ZA[569]); - ZA[570] = ZMa(ZA[560], ZA[545], ZA[565]) + ZR30(ZA[565]); - ZA[468] = ZR25(ZA[461]); - - ZA[497] = ZA[468] + ZA[460]; - ZA[575] = ZA[545] + ZA[574]; - ZA[571] = ZA[568] + ZA[570]; - ZA[573] = ZA[559] + 0xc19bf274U; - - ZA[578] = (ZCh(ZA[575], ZA[569], ZA[563]) + ZA[573]) + ZR26(ZA[575]); - ZA[576] = ZMa(ZA[565], ZA[560], ZA[571]) + ZR30(ZA[571]); - ZA[929] = ZR25(ZA[462]); - ZA[503] = ZA[497] + 0xe49b69c1U; - - ZA[471] = ZA[929] + ZA[461] + 0x00a00000U; - ZA[582] = ZA[563] + ZA[503]; - ZA[579] = ZA[560] + ZA[578]; - ZA[577] = ZA[574] + ZA[576]; - - ZA[583] = (ZCh(ZA[579], ZA[575], ZA[569]) + ZA[582]) + ZR26(ZA[579]); - ZA[580] = ZMa(ZA[571], ZA[565], ZA[577]) + ZR30(ZA[577]); - ZA[488] = ZA[471] + 0xefbe4786U; - - ZA[472] = ZR25(ZA[463]) + ZA[462]; - ZA[587] = ZA[569] + ZA[488]; - ZA[584] = ZA[565] + ZA[583]; - ZA[581] = ZA[578] + ZA[580]; - - ZA[588] = (ZCh(ZA[584], ZA[579], ZA[575]) + ZA[587]) + ZR26(ZA[584]); - ZA[586] = ZMa(ZA[577], ZA[571], ZA[581]) + ZR30(ZA[581]); - ZA[501] = ZR15(ZA[497]) + ZA[472]; - ZA[475] = ZR15(ZA[471]); - ZA[926] = ZA[575] + 0x0fc19dc6U; - - ZA[474] = ZA[475] + ZA[463] + ZR25(ZA[464]); - ZA[927] = ZA[926] + ZA[501]; - ZA[589] = ZA[571] + ZA[588]; - ZA[585] = ZA[583] + ZA[586]; - - ZA[592] = (ZCh(ZA[589], ZA[584], ZA[579]) + ZA[927]) + ZR26(ZA[589]); - ZA[590] = ZMa(ZA[581], ZA[577], ZA[585]) + ZR30(ZA[585]); - ZA[477] = ZR25(ZA[465]) + ZA[464]; - ZA[489] = ZA[474] + 0x240ca1ccU; - - ZA[518] = ZR15(ZA[501]) + ZA[477]; - ZA[479] = ZR25(ZA[466]); - ZA[596] = ZA[579] + ZA[489]; - ZA[593] = ZA[577] + ZA[592]; - ZA[591] = ZA[588] + ZA[590]; - - ZA[597] = (ZCh(ZA[593], ZA[589], ZA[584]) + ZA[596]) + ZR26(ZA[593]); - ZA[594] = ZMa(ZA[585], ZA[581], ZA[591]) + ZR30(ZA[591]); - ZA[481] = ZA[479] + ZA[465]; - ZA[601] = ZA[518] + 0x2de92c6fU; - - ZA[482] = ZR15(ZA[474]) + ZA[481]; - ZA[602] = ZA[584] + ZA[601]; - ZA[598] = ZA[581] + ZA[597]; - ZA[595] = ZA[592] + ZA[594]; - - ZA[632] = (ZCh(ZA[598], ZA[593], ZA[589]) + ZA[602]) + ZR26(ZA[598]); - ZA[599] = ZMa(ZA[591], ZA[585], ZA[595]) + ZR30(ZA[595]); - ZA[483] = ZA[466] + 0x00000100U + ZR25(ZA[467]); - ZA[490] = ZA[482] + 0x4a7484aaU; - - ZA[528] = ZR15(ZA[518]) + ZA[483]; - ZA[736] = ZA[585] + ZA[632]; - ZA[605] = ZA[589] + ZA[490]; - ZA[600] = ZA[597] + ZA[599]; - ZA[485] = ZA[467] + 0x11002000U; - - ZA[738] = (ZCh(ZA[736], ZA[598], ZA[593]) + ZA[605]) + ZR26(ZA[736]); - ZA[744] = ZMa(ZA[595], ZA[591], ZA[600]) + ZR30(ZA[600]); - ZA[487] = ZR15(ZA[482]) + ZA[485]; - ZA[603] = ZA[528] + 0x5cb0a9dcU; - - ZA[502] = ZA[497] + ZA[487]; - ZA[739] = ZA[591] + ZA[738]; - ZA[604] = ZA[593] + ZA[603]; - ZA[737] = ZA[744] + ZA[632]; - - ZA[741] = (ZCh(ZA[739], ZA[736], ZA[598]) + ZA[604]) + ZR26(ZA[739]); - ZA[745] = ZMa(ZA[600], ZA[595], ZA[737]) + ZR30(ZA[737]); - ZA[486] = ZA[471] + 0x80000000U; - ZA[606] = ZA[502] + 0x76f988daU; - - ZA[537] = ZR15(ZA[528]) + ZA[486]; - ZA[742] = ZA[595] + ZA[741]; - ZA[613] = ZA[598] + ZA[606]; - ZA[740] = ZA[745] + ZA[738]; - - ZA[747] = (ZCh(ZA[742], ZA[739], ZA[736]) + ZA[613]) + ZR26(ZA[742]); - ZA[746] = ZMa(ZA[737], ZA[600], ZA[740]) + ZR30(ZA[740]); - ZA[607] = ZA[537] + 0x983e5152U; - - ZA[546] = ZR15(ZA[502]) + ZA[501]; - ZA[751] = ZA[736] + ZA[607]; - ZA[748] = ZA[600] + ZA[747]; - ZA[743] = ZA[746] + ZA[741]; - - ZA[752] = (ZCh(ZA[748], ZA[742], ZA[739]) + ZA[751]) + ZR26(ZA[748]); - ZA[749] = ZMa(ZA[740], ZA[737], ZA[743]) + ZR30(ZA[743]); - ZA[608] = ZA[546] + 0xa831c66dU; - - ZA[554] = ZR15(ZA[537]) + ZA[474]; - ZA[756] = ZA[739] + ZA[608]; - ZA[753] = ZA[737] + ZA[752]; - ZA[750] = ZA[747] + ZA[749]; - - ZA[757] = (ZCh(ZA[753], ZA[748], ZA[742]) + ZA[756]) + ZR26(ZA[753]); - ZA[754] = ZMa(ZA[743], ZA[740], ZA[750]) + ZR30(ZA[750]); - ZA[609] = ZA[554] + 0xb00327c8U; - - ZA[566] = ZR15(ZA[546]) + ZA[518]; - ZA[761] = ZA[742] + ZA[609]; - ZA[758] = ZA[740] + ZA[757]; - ZA[755] = ZA[752] + ZA[754]; - - ZA[762] = (ZCh(ZA[758], ZA[753], ZA[748]) + ZA[761]) + ZR26(ZA[758]); - ZA[759] = ZMa(ZA[750], ZA[743], ZA[755]) + ZR30(ZA[755]); - ZA[610] = ZA[566] + 0xbf597fc7U; - - ZA[567] = ZR15(ZA[554]) + ZA[482]; - ZA[766] = ZA[748] + ZA[610]; - ZA[763] = ZA[743] + ZA[762]; - ZA[760] = ZA[757] + ZA[759]; - - ZA[767] = (ZCh(ZA[763], ZA[758], ZA[753]) + ZA[766]) + ZR26(ZA[763]); - ZA[764] = ZMa(ZA[755], ZA[750], ZA[760]) + ZR30(ZA[760]); - ZA[611] = ZA[567] + 0xc6e00bf3U; - - ZA[614] = ZR15(ZA[566]) + ZA[528]; - ZA[771] = ZA[753] + ZA[611]; - ZA[768] = ZA[750] + ZA[767]; - ZA[765] = ZA[762] + ZA[764]; - - ZA[772] = (ZCh(ZA[768], ZA[763], ZA[758]) + ZA[771]) + ZR26(ZA[768]); - ZA[769] = ZMa(ZA[760], ZA[755], ZA[765]) + ZR30(ZA[765]); - ZA[612] = ZA[502] + 0x00400022U; - ZA[615] = ZA[614] + 0xd5a79147U; - - ZA[616] = ZR15(ZA[567]) + ZA[612]; - ZA[504] = ZR25(ZA[497]) + 0x00000100U; - ZA[776] = ZA[758] + ZA[615]; - ZA[773] = ZA[755] + ZA[772]; - ZA[770] = ZA[767] + ZA[769]; - - ZA[777] = (ZCh(ZA[773], ZA[768], ZA[763]) + ZA[776]) + ZR26(ZA[773]); - ZA[774] = ZMa(ZA[765], ZA[760], ZA[770]) + ZR30(ZA[770]); - ZA[492] = ZR25(ZA[471]); - ZA[618] = ZA[537] + ZA[504]; - ZA[617] = ZA[616] + 0x06ca6351U; - - ZA[619] = ZR15(ZA[614]) + ZA[618]; - ZA[781] = ZA[763] + ZA[617]; - ZA[778] = ZA[760] + ZA[777]; - ZA[775] = ZA[772] + ZA[774]; - ZA[505] = ZA[492] + ZA[497]; - - ZA[782] = (ZCh(ZA[778], ZA[773], ZA[768]) + ZA[781]) + ZR26(ZA[778]); - ZA[779] = ZMa(ZA[770], ZA[765], ZA[775]) + ZR30(ZA[775]); - ZA[621] = ZA[505] + ZA[546]; - ZA[620] = ZA[619] + 0x14292967U; - - ZA[622] = ZR15(ZA[616]) + ZA[621]; - ZA[625] = ZR25(ZA[501]); - ZA[786] = ZA[768] + ZA[620]; - ZA[783] = ZA[765] + ZA[782]; - ZA[624] = ZA[554] + ZA[471]; - ZA[780] = ZA[777] + ZA[779]; - - ZA[787] = (ZCh(ZA[783], ZA[778], ZA[773]) + ZA[786]) + ZR26(ZA[783]); - ZA[784] = ZMa(ZA[775], ZA[770], ZA[780]) + ZR30(ZA[780]); - ZA[493] = ZR25(ZA[474]); - ZA[626] = ZA[625] + ZA[624]; - ZA[623] = ZA[622] + 0x27b70a85U; - - ZA[627] = ZR15(ZA[619]) + ZA[626]; - ZA[791] = ZA[773] + ZA[623]; - ZA[788] = ZA[770] + ZA[787]; - ZA[785] = ZA[782] + ZA[784]; - ZA[629] = ZA[493] + ZA[501]; - - ZA[792] = (ZCh(ZA[788], ZA[783], ZA[778]) + ZA[791]) + ZR26(ZA[788]); - ZA[789] = ZMa(ZA[780], ZA[775], ZA[785]) + ZR30(ZA[785]); - ZA[630] = ZA[566] + ZA[629]; - ZA[628] = ZA[627] + 0x2e1b2138U; - - ZA[634] = ZR25(ZA[518]) + ZA[474]; - ZA[631] = ZR15(ZA[622]) + ZA[630]; - ZA[796] = ZA[778] + ZA[628]; - ZA[793] = ZA[775] + ZA[792]; - ZA[790] = ZA[787] + ZA[789]; - - ZA[797] = (ZCh(ZA[793], ZA[788], ZA[783]) + ZA[796]) + ZR26(ZA[793]); - ZA[794] = ZMa(ZA[785], ZA[780], ZA[790]) + ZR30(ZA[790]); - ZA[491] = ZR25(ZA[482]); - ZA[635] = ZA[567] + ZA[634]; - ZA[633] = ZA[631] + 0x4d2c6dfcU; - - ZA[636] = ZR15(ZA[627]) + ZA[635]; - ZA[801] = ZA[783] + ZA[633]; - ZA[798] = ZA[780] + ZA[797]; - ZA[795] = ZA[792] + ZA[794]; - ZA[638] = ZA[491] + ZA[518]; - - ZA[802] = (ZCh(ZA[798], ZA[793], ZA[788]) + ZA[801]) + ZR26(ZA[798]); - ZA[799] = ZMa(ZA[790], ZA[785], ZA[795]) + ZR30(ZA[795]); - ZA[639] = ZA[638] + ZA[614]; - ZA[637] = ZA[636] + 0x53380d13U; - - ZA[642] = ZR25(ZA[528]) + ZA[482]; - ZA[640] = ZR15(ZA[631]) + ZA[639]; - ZA[806] = ZA[788] + ZA[637]; - ZA[803] = ZA[785] + ZA[802]; - ZA[800] = ZA[797] + ZA[799]; - - ZA[807] = (ZCh(ZA[803], ZA[798], ZA[793]) + ZA[806]) + ZR26(ZA[803]); - ZA[804] = ZMa(ZA[795], ZA[790], ZA[800]) + ZR30(ZA[800]); - ZA[643] = ZA[616] + ZA[642]; - ZA[641] = ZA[640] + 0x650a7354U; - - ZA[646] = ZR25(ZA[502]) + ZA[528]; - ZA[644] = ZR15(ZA[636]) + ZA[643]; - ZA[811] = ZA[793] + ZA[641]; - ZA[808] = ZA[790] + ZA[807]; - ZA[805] = ZA[802] + ZA[804]; - - ZA[812] = (ZCh(ZA[808], ZA[803], ZA[798]) + ZA[811]) + ZR26(ZA[808]); - ZA[809] = ZMa(ZA[800], ZA[795], ZA[805]) + ZR30(ZA[805]); - ZA[647] = ZA[619] + ZA[646]; - ZA[645] = ZA[644] + 0x766a0abbU; - - ZA[650] = ZR25(ZA[537]) + ZA[502]; - ZA[648] = ZR15(ZA[640]) + ZA[647]; - ZA[816] = ZA[798] + ZA[645]; - ZA[813] = ZA[795] + ZA[812]; - ZA[810] = ZA[807] + ZA[809]; - - ZA[817] = (ZCh(ZA[813], ZA[808], ZA[803]) + ZA[816]) + ZR26(ZA[813]); - ZA[814] = ZMa(ZA[805], ZA[800], ZA[810]) + ZR30(ZA[810]); - ZA[925] = ZA[622] + ZA[650]; - ZA[649] = ZA[648] + 0x81c2c92eU; - - ZA[653] = ZR25(ZA[546]) + ZA[537]; - ZA[651] = ZR15(ZA[644]) + ZA[925]; - ZA[821] = ZA[803] + ZA[649]; - ZA[818] = ZA[800] + ZA[817]; - ZA[815] = ZA[812] + ZA[814]; - - ZA[822] = (ZCh(ZA[818], ZA[813], ZA[808]) + ZA[821]) + ZR26(ZA[818]); - ZA[819] = ZMa(ZA[810], ZA[805], ZA[815]) + ZR30(ZA[815]); - ZA[654] = ZA[627] + ZA[653]; - ZA[652] = ZA[651] + 0x92722c85U; - - ZA[657] = ZR25(ZA[554]) + ZA[546]; - ZA[655] = ZR15(ZA[648]) + ZA[654]; - ZA[826] = ZA[808] + ZA[652]; - ZA[823] = ZA[805] + ZA[822]; - ZA[820] = ZA[817] + ZA[819]; - - ZA[827] = (ZCh(ZA[823], ZA[818], ZA[813]) + ZA[826]) + ZR26(ZA[823]); - ZA[824] = ZMa(ZA[815], ZA[810], ZA[820]) + ZR30(ZA[820]); - ZA[658] = ZA[631] + ZA[657]; - ZA[656] = ZA[655] + 0xa2bfe8a1U; - - ZA[661] = ZR25(ZA[566]) + ZA[554]; - ZA[659] = ZR15(ZA[651]) + ZA[658]; - ZA[831] = ZA[813] + ZA[656]; - ZA[828] = ZA[810] + ZA[827]; - ZA[825] = ZA[822] + ZA[824]; - - ZA[832] = (ZCh(ZA[828], ZA[823], ZA[818]) + ZA[831]) + ZR26(ZA[828]); - ZA[829] = ZMa(ZA[820], ZA[815], ZA[825]) + ZR30(ZA[825]); - ZA[662] = ZA[636] + ZA[661]; - ZA[660] = ZA[659] + 0xa81a664bU; - - ZA[665] = ZR25(ZA[567]) + ZA[566]; - ZA[663] = ZR15(ZA[655]) + ZA[662]; - ZA[836] = ZA[818] + ZA[660]; - ZA[833] = ZA[815] + ZA[832]; - ZA[830] = ZA[827] + ZA[829]; - - ZA[837] = (ZCh(ZA[833], ZA[828], ZA[823]) + ZA[836]) + ZR26(ZA[833]); - ZA[834] = ZMa(ZA[825], ZA[820], ZA[830]) + ZR30(ZA[830]); - ZA[666] = ZA[640] + ZA[665]; - ZA[664] = ZA[663] + 0xc24b8b70U; - - ZA[669] = ZR25(ZA[614]) + ZA[567]; - ZA[667] = ZR15(ZA[659]) + ZA[666]; - ZA[841] = ZA[823] + ZA[664]; - ZA[838] = ZA[820] + ZA[837]; - ZA[835] = ZA[832] + ZA[834]; - - ZA[842] = (ZCh(ZA[838], ZA[833], ZA[828]) + ZA[841]) + ZR26(ZA[838]); - ZA[839] = ZMa(ZA[830], ZA[825], ZA[835]) + ZR30(ZA[835]); - ZA[670] = ZA[644] + ZA[669]; - ZA[668] = ZA[667] + 0xc76c51a3U; - - ZA[677] = ZR25(ZA[616]) + ZA[614]; - ZA[671] = ZR15(ZA[663]) + ZA[670]; - ZA[846] = ZA[828] + ZA[668]; - ZA[843] = ZA[825] + ZA[842]; - ZA[840] = ZA[837] + ZA[839]; - - ZA[847] = (ZCh(ZA[843], ZA[838], ZA[833]) + ZA[846]) + ZR26(ZA[843]); - ZA[844] = ZMa(ZA[835], ZA[830], ZA[840]) + ZR30(ZA[840]); - ZA[678] = ZA[648] + ZA[677]; - ZA[676] = ZA[671] + 0xd192e819U; - - ZA[682] = ZR25(ZA[619]) + ZA[616]; - ZA[679] = ZR15(ZA[667]) + ZA[678]; - ZA[851] = ZA[833] + ZA[676]; - ZA[848] = ZA[830] + ZA[847]; - ZA[845] = ZA[842] + ZA[844]; - - ZA[852] = (ZCh(ZA[848], ZA[843], ZA[838]) + ZA[851]) + ZR26(ZA[848]); - ZA[849] = ZMa(ZA[840], ZA[835], ZA[845]) + ZR30(ZA[845]); - ZA[683] = ZA[651] + ZA[682]; - ZA[680] = ZA[679] + 0xd6990624U; - - ZA[686] = ZR25(ZA[622]) + ZA[619]; - ZA[684] = ZR15(ZA[671]) + ZA[683]; - ZA[856] = ZA[838] + ZA[680]; - ZA[853] = ZA[835] + ZA[852]; - ZA[850] = ZA[847] + ZA[849]; - - ZA[857] = (ZCh(ZA[853], ZA[848], ZA[843]) + ZA[856]) + ZR26(ZA[853]); - ZA[854] = ZMa(ZA[845], ZA[840], ZA[850]) + ZR30(ZA[850]); - ZA[687] = ZA[655] + ZA[686]; - ZA[685] = ZA[684] + 0xf40e3585U; - - ZA[690] = ZR25(ZA[627]) + ZA[622]; - ZA[688] = ZR15(ZA[679]) + ZA[687]; - ZA[861] = ZA[843] + ZA[685]; - ZA[858] = ZA[840] + ZA[857]; - ZA[855] = ZA[852] + ZA[854]; - - ZA[862] = (ZCh(ZA[858], ZA[853], ZA[848]) + ZA[861]) + ZR26(ZA[858]); - ZA[859] = ZMa(ZA[850], ZA[845], ZA[855]) + ZR30(ZA[855]); - ZA[691] = ZA[659] + ZA[690]; - ZA[689] = ZA[688] + 0x106aa070U; - - ZA[694] = ZR25(ZA[631]) + ZA[627]; - ZA[692] = ZR15(ZA[684]) + ZA[691]; - ZA[866] = ZA[848] + ZA[689]; - ZA[863] = ZA[845] + ZA[862]; - ZA[860] = ZA[857] + ZA[859]; - - ZA[867] = (ZCh(ZA[863], ZA[858], ZA[853]) + ZA[866]) + ZR26(ZA[863]); - ZA[864] = ZMa(ZA[855], ZA[850], ZA[860]) + ZR30(ZA[860]); - ZA[695] = ZA[663] + ZA[694]; - ZA[693] = ZA[692] + 0x19a4c116U; - - ZA[698] = ZR25(ZA[636]) + ZA[631]; - ZA[696] = ZR15(ZA[688]) + ZA[695]; - ZA[871] = ZA[853] + ZA[693]; - ZA[868] = ZA[850] + ZA[867]; - ZA[865] = ZA[862] + ZA[864]; - - ZA[873] = (ZCh(ZA[868], ZA[863], ZA[858]) + ZA[871]) + ZR26(ZA[868]); - ZA[869] = ZMa(ZA[860], ZA[855], ZA[865]) + ZR30(ZA[865]); - ZA[699] = ZA[667] + ZA[698]; - ZA[697] = ZA[696] + 0x1e376c08U; - - ZA[702] = ZR25(ZA[640]) + ZA[636]; - ZA[700] = ZR15(ZA[692]) + ZA[699]; - ZA[877] = ZA[858] + ZA[697]; - ZA[874] = ZA[855] + ZA[873]; - ZA[870] = ZA[867] + ZA[869]; - - ZA[878] = (ZCh(ZA[874], ZA[868], ZA[863]) + ZA[877]) + ZR26(ZA[874]); - ZA[875] = ZMa(ZA[865], ZA[860], ZA[870]) + ZR30(ZA[870]); - ZA[703] = ZA[671] + ZA[702]; - ZA[701] = ZA[700] + 0x2748774cU; - - ZA[706] = ZR25(ZA[644]) + ZA[640]; - ZA[704] = ZR15(ZA[696]) + ZA[703]; - ZA[882] = ZA[863] + ZA[701]; - ZA[879] = ZA[860] + ZA[878]; - ZA[876] = ZA[873] + ZA[875]; - - ZA[883] = (ZCh(ZA[879], ZA[874], ZA[868]) + ZA[882]) + ZR26(ZA[879]); - ZA[880] = ZMa(ZA[870], ZA[865], ZA[876]) + ZR30(ZA[876]); - ZA[707] = ZA[679] + ZA[706]; - ZA[705] = ZA[704] + 0x34b0bcb5U; - - ZA[710] = ZR25(ZA[648]) + ZA[644]; - ZA[708] = ZR15(ZA[700]) + ZA[707]; - ZA[887] = ZA[868] + ZA[705]; - ZA[884] = ZA[865] + ZA[883]; - ZA[881] = ZA[878] + ZA[880]; - - ZA[888] = (ZCh(ZA[884], ZA[879], ZA[874]) + ZA[887]) + ZR26(ZA[884]); - ZA[885] = ZMa(ZA[876], ZA[870], ZA[881]) + ZR30(ZA[881]); - ZA[711] = ZA[684] + ZA[710]; - ZA[709] = ZA[708] + 0x391c0cb3U; - - ZA[714] = ZR25(ZA[651]) + ZA[648]; - ZA[712] = ZR15(ZA[704]) + ZA[711]; - ZA[892] = ZA[874] + ZA[709]; - ZA[889] = ZA[870] + ZA[888]; - ZA[886] = ZA[883] + ZA[885]; - - ZA[893] = (ZCh(ZA[889], ZA[884], ZA[879]) + ZA[892]) + ZR26(ZA[889]); - ZA[890] = ZMa(ZA[881], ZA[876], ZA[886]) + ZR30(ZA[886]); - ZA[715] = ZA[688] + ZA[714]; - ZA[713] = ZA[712] + 0x4ed8aa4aU; - - ZA[718] = ZR25(ZA[655]) + ZA[651]; - ZA[716] = ZR15(ZA[708]) + ZA[715]; - ZA[897] = ZA[879] + ZA[713]; - ZA[894] = ZA[876] + ZA[893]; - ZA[891] = ZA[888] + ZA[890]; - - ZA[898] = (ZCh(ZA[894], ZA[889], ZA[884]) + ZA[897]) + ZR26(ZA[894]); - ZA[895] = ZMa(ZA[886], ZA[881], ZA[891]) + ZR30(ZA[891]); - ZA[719] = ZA[692] + ZA[718]; - ZA[717] = ZA[716] + 0x5b9cca4fU; - - ZA[722] = ZR25(ZA[659]) + ZA[655]; - ZA[720] = ZR15(ZA[712]) + ZA[719]; - ZA[902] = ZA[884] + ZA[717]; - ZA[899] = ZA[881] + ZA[898]; - ZA[896] = ZA[893] + ZA[895]; - - ZA[903] = (ZCh(ZA[899], ZA[894], ZA[889]) + ZA[902]) + ZR26(ZA[899]); - ZA[900] = ZMa(ZA[891], ZA[886], ZA[896]) + ZR30(ZA[896]); - ZA[723] = ZA[696] + ZA[722]; - ZA[721] = ZA[720] + 0x682e6ff3U; - - ZA[672] = ZR25(ZA[663]) + ZA[659]; - ZA[724] = ZR15(ZA[716]) + ZA[723]; - ZA[907] = ZA[889] + ZA[721]; - ZA[904] = ZA[886] + ZA[903]; - ZA[901] = ZA[898] + ZA[900]; - - ZA[908] = (ZCh(ZA[904], ZA[899], ZA[894]) + ZA[907]) + ZR26(ZA[904]); - ZA[905] = ZMa(ZA[896], ZA[891], ZA[901]) + ZR30(ZA[901]); - ZA[673] = ZR25(ZA[667]) + ZA[663]; - ZA[726] = ZA[700] + ZA[672]; - ZA[725] = ZA[724] + 0x748f82eeU; - - ZA[727] = ZR15(ZA[720]) + ZA[726]; - ZA[912] = ZA[894] + ZA[725]; - ZA[909] = ZA[891] + ZA[908]; - ZA[906] = ZA[903] + ZA[905]; - ZA[675] = ZA[667] + 0x8cc70208U; - ZA[729] = ZA[704] + ZA[673]; - - ZA[913] = (ZCh(ZA[909], ZA[904], ZA[899]) + ZA[912]) + ZR26(ZA[909]); - ZA[910] = ZMa(ZA[901], ZA[896], ZA[906]) + ZR30(ZA[906]); - ZA[674] = ZR25(ZA[671]) + ZA[675]; - ZA[730] = ZR15(ZA[724]) + ZA[729]; - ZA[728] = ZA[727] + 0x78a5636fU; - - ZA[681] = ZR25(ZA[679]) + ZA[671]; - ZA[917] = ZA[899] + ZA[901] + ZA[728]; - ZA[914] = ZA[896] + ZA[913]; - ZA[911] = ZA[908] + ZA[910]; - ZA[732] = ZA[708] + ZA[674]; - ZA[731] = ZA[730] + 0x84c87814U; - - ZA[918] = (ZCh(ZA[914], ZA[909], ZA[904]) + ZA[917]) + ZR26(ZA[914]); - ZA[915] = ZMa(ZA[906], ZA[901], ZA[911]) + ZR30(ZA[911]); - ZA[733] = ZR15(ZA[727]) + ZA[732]; - ZA[919] = ZA[906] + ZA[904] + ZA[731]; - ZA[734] = ZA[712] + ZA[681]; - - ZA[920] = (ZCh(ZA[918], ZA[914], ZA[909]) + ZA[919]) + ZR26(ZA[918]); - ZA[735] = ZR15(ZA[730]) + ZA[734]; - ZA[921] = ZA[911] + ZA[909] + ZA[733]; - ZA[916] = ZA[913] + ZA[915]; - - ZA[922] = (ZCh(ZA[920], ZA[918], ZA[914]) + ZA[921]) + ZR26(ZA[920]); - ZA[923] = ZA[916] + ZA[914] + ZA[735]; - - ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]); - -#define FOUND (0x800) -#define NFLAG (0x7FF) - -#if defined(VECTORS4) - bool result = any(ZA[924] == 0x136032EDU); - - if (result) { - if (ZA[924].x == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (ZA[924].y == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; - if (ZA[924].z == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z; - if (ZA[924].w == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w; - } -#elif defined(VECTORS2) - bool result = any(ZA[924] == 0x136032EDU); - - if (result) { - if (ZA[924].x == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (ZA[924].y == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; - } -#else - if (ZA[924] == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce] = Znonce; -#endif -} diff --git a/diakgcn120427.cl b/diakgcn120427.cl deleted file mode 100644 index 7dd73fb9..00000000 --- a/diakgcn120427.cl +++ /dev/null @@ -1,587 +0,0 @@ -// DiaKGCN 27-04-2012 - OpenCL kernel by Diapolo -// -// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. -// The kernel was rewritten by me (Diapolo) and is still public-domain! - -#ifdef VECTORS4 - typedef uint4 u; -#elif defined VECTORS2 - typedef uint2 u; -#else - typedef uint u; -#endif - -#ifdef BITALIGN - #pragma OPENCL EXTENSION cl_amd_media_ops : enable - #ifdef BFI_INT - #define ch(x, y, z) amd_bytealign(x, y, z) - #define ma(x, y, z) amd_bytealign(z ^ x, y, x) - #else - #define ch(x, y, z) bitselect(z, y, x) - #define ma(z, x, y) bitselect(z, y, z ^ x) - #endif -#else - #define ch(x, y, z) (z ^ (x & (y ^ z))) - #define ma(x, y, z) ((x & z) | (y & (x | z))) -#endif - -#define rotr15(n) (rotate(n, 15U) ^ rotate(n, 13U) ^ (n >> 10U)) -#define rotr25(n) (rotate(n, 25U) ^ rotate(n, 14U) ^ (n >> 3U)) -#define rotr26(n) (rotate(n, 26U) ^ rotate(n, 21U) ^ rotate(n, 7U)) -#define rotr30(n) (rotate(n, 30U) ^ rotate(n, 19U) ^ rotate(n, 10U)) - -__kernel - __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) - void search( -#ifndef GOFFSET - const u base, -#endif - const uint PreVal0, const uint PreVal4, - const uint H1, const uint D1A, const uint B1, const uint C1, - const uint F1, const uint G1, const uint C1addK5, const uint B1addK6, const uint PreVal0addK7, - const uint W16addK16, const uint W17addK17, - const uint PreW18, const uint PreW19, - const uint W16, const uint W17, - const uint PreW31, const uint PreW32, - const uint state0, const uint state1, const uint state2, const uint state3, - const uint state4, const uint state5, const uint state6, const uint state7, - const uint state0A, const uint state0B, - const uint state1A, const uint state2A, const uint state3A, const uint state4A, - const uint state5A, const uint state6A, const uint state7A, - __global uint * output) -{ - u V[8]; - u W[16]; - -#ifdef VECTORS4 - const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; -#elif defined VECTORS2 - const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; -#else - #ifdef GOFFSET - const u nonce = (uint)(get_global_id(0)); - #else - const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base; - #endif -#endif - - V[0] = PreVal0 + nonce; - V[1] = B1; - V[2] = C1; - V[3] = D1A; - V[4] = PreVal4 + nonce; - V[5] = F1; - V[6] = G1; - V[7] = H1; - - V[7] += V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - -//---------------------------------------------------------------------------------- - -#ifdef VECTORS4 - W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); -#elif defined VECTORS2 - W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); -#else - W[0] = PreW18 + rotr25(nonce); -#endif - W[1] = PreW19 + nonce; - W[2] = 0x80000000U + rotr15(W[0]); - W[3] = rotr15(W[1]); - W[4] = 0x00000280U + rotr15(W[2]); - W[5] = W16 + rotr15(W[3]); - W[6] = W17 + rotr15(W[4]); - W[7] = W[0] + rotr15(W[5]); - W[8] = W[1] + rotr15(W[6]); - W[9] = W[2] + rotr15(W[7]); - W[10] = W[3] + rotr15(W[8]); - W[11] = W[4] + rotr15(W[9]); - W[12] = W[5] + 0x00a00055U + rotr15(W[10]); - W[13] = W[6] + PreW31 + rotr15(W[11]); - W[14] = W[7] + PreW32 + rotr15(W[12]); - W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]); - - V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0]; - V[5] = 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - -//---------------------------------------------------------------------------------- - - W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); - W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); - W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); - W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); - W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); - W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); - W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); - W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); - W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); - W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); - W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); - W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); - W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); - W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); - W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); - - V[1] += 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - -//---------------------------------------------------------------------------------- - - W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); - W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); - W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); - W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); - W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); - W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); - W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); - W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); - W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); - W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); - W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); - W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); - W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); - - V[1] += 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - -//---------------------------------------------------------------------------------- - - W[0] = state0 + V[0] + rotr25(state1 + V[1]); - W[1] = state1 + V[1] + 0x00a00000U + rotr25(state2 + V[2]); - W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]); - W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]); - W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]); - W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]); - W[6] = state6 + V[6] + 0x00000100U + rotr15(W[4]) + rotr25(state7 + V[7]); - W[7] = state7 + V[7] + W[0] + 0x11002000U + rotr15(W[5]); - W[8] = W[1] + 0x80000000U + rotr15(W[6]); - W[9] = W[2] + rotr15(W[7]); - W[10] = W[3] + rotr15(W[8]); - W[11] = W[4] + rotr15(W[9]); - W[12] = W[5] + rotr15(W[10]); - W[13] = W[6] + rotr15(W[11]); - W[14] = W[7] + 0x00400022U + rotr15(W[12]); - W[15] = W[8] + 0x00000100U + rotr15(W[13]) + rotr25(W[0]); - - // 0x71374491U + 0x1f83d9abU + state1 - const u state1AaddV1 = state1A + V[1]; - // 0xb5c0fbcfU + 0x9b05688cU + state2 - const u state2AaddV2 = state2A + V[2]; - // 0x510e527fU + 0xe9b5dba5U + state3 - const u state3AaddV3 = state3A + V[3]; - // 0x3956c25bU + state4 - const u state4AaddV4 = state4A + V[4]; - // 0x59f111f1U + state5 - const u state5AaddV5 = state5A + V[5]; - // 0x923f82a4U + state6 - const u state6AaddV6 = state6A + V[6]; - // 0xab1c5ed5U + state7 - const u state7AaddV7 = state7A + V[7]; - - // 0x98c7e2a2U + state0 - V[3] = state0A + V[0]; - // 0xfc08884dU + state0 - V[7] = state0B + V[0]; - V[0] = 0x6a09e667U; - V[1] = 0xbb67ae85U; - V[2] = 0x3c6ef372U; - V[4] = 0x510e527fU; - V[5] = 0x9b05688cU; - V[6] = 0x1f83d9abU; - - V[2] += state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - -//---------------------------------------------------------------------------------- - - W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); - W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); - W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); - W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); - W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); - W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); - W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); - W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); - W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); - W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); - W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); - W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); - W[13] = W[13] + W[6] + rotr15(W[11]) + rotr25(W[14]); - W[14] = W[14] + W[7] + rotr15(W[12]) + rotr25(W[15]); - W[15] = W[15] + W[8] + rotr15(W[13]) + rotr25( W[0]); - - V[3] += 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - -//---------------------------------------------------------------------------------- - - W[0] = W[0] + W[9] + rotr15(W[14]) + rotr25( W[1]); - W[1] = W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]); - W[2] = W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]); - W[3] = W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]); - W[4] = W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]); - W[5] = W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]); - W[6] = W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]); - W[7] = W[7] + W[0] + rotr15( W[5]) + rotr25( W[8]); - W[8] = W[8] + W[1] + rotr15( W[6]) + rotr25( W[9]); - W[9] = W[9] + W[2] + rotr15( W[7]) + rotr25(W[10]); - W[10] = W[10] + W[3] + rotr15( W[8]) + rotr25(W[11]); - W[11] = W[11] + W[4] + rotr15( W[9]) + rotr25(W[12]); - W[12] = W[12] + W[5] + rotr15(W[10]) + rotr25(W[13]); - - V[3] += 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - V[6] = 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]); - - V[1] += 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); - - V[0] += 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - V[4] = 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); - - V[7] += 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - V[3] = 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]); - - V[6] += 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]); - V[2] = 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]); - - V[5] += 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]); - V[1] = 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]); - - V[4] += 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]); - V[0] = 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]); - - V[3] += 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]); - V[7] = 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]); - - V[2] += 0x78a5636fU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]); - - V[1] += 0x84c87814U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - - V[0] += 0x8cc70208U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]); - - V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - -#define FOUND (0x800) -#define NFLAG (0x7FF) - -#ifdef VECTORS4 - if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) - output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); -#elif defined VECTORS2 - if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) - output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y; -#else - if (V[7] == 0x136032edU) - output[FOUND] = output[NFLAG & nonce] = nonce; -#endif -} diff --git a/phatk120223.cl b/phatk120223.cl deleted file mode 100644 index 0f604436..00000000 --- a/phatk120223.cl +++ /dev/null @@ -1,417 +0,0 @@ -// This file is taken and modified from the public-domain poclbm project, and -// I have therefore decided to keep it public-domain. -// Modified version copyright 2011-2012 Con Kolivas - -#ifdef VECTORS4 - typedef uint4 u; -#elif defined VECTORS2 - typedef uint2 u; -#else - typedef uint u; -#endif - -__constant uint K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -__constant uint ConstW[128] = { -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000U, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000280U, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x80000000U, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100U, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, -0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 -}; - -__constant uint H[8] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 -}; - - -#ifdef BITALIGN - #pragma OPENCL EXTENSION cl_amd_media_ops : enable - #define rot(x, y) amd_bitalign(x, x, (uint)(32 - y)) - -// This part is not from the stock poclbm kernel. It's part of an optimization -// added in the Phoenix Miner. - -// Some AMD devices have Vals[0] BFI_INT opcode, which behaves exactly like the -// SHA-256 Ch function, but provides it in exactly one instruction. If -// detected, use it for Ch. Otherwise, construct Ch out of simpler logical -// primitives. - - #ifdef BFI_INT - // Well, slight problem... It turns out BFI_INT isn't actually exposed to - // OpenCL (or CAL IL for that matter) in any way. However, there is - // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via - // amd_bytealign, takes the same inputs, and provides the same output. - // We can use that as a placeholder for BFI_INT and have the application - // patch it after compilation. - - // This is the BFI_INT function - #define Ch(x, y, z) amd_bytealign(x,y,z) - // Ma can also be implemented in terms of BFI_INT... - #define Ma(z, x, y) amd_bytealign(z^x,y,x) - #else // BFI_INT - // Later SDKs optimise this to BFI INT without patching and GCN - // actually fails if manually patched with BFI_INT - - #define Ch(x, y, z) bitselect((u)z, (u)y, (u)x) - #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) - #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y) - #endif -#else // BITALIGN - #define Ch(x, y, z) (z ^ (x & (y ^ z))) - #define Ma(x, y, z) ((x & z) | (y & (x | z))) - #define rot(x, y) rotate((u)x, (u)y) - #define rotr(x, y) rotate((u)x, (u)(32-y)) -#endif - - - -//Various intermediate calculations for each SHA round -#define s0(n) (S0(Vals[(0 + 128 - (n)) % 8])) -#define S0(n) (rot(n, 30u)^rot(n, 19u)^rot(n,10u)) - -#define s1(n) (S1(Vals[(4 + 128 - (n)) % 8])) -#define S1(n) (rot(n, 26u)^rot(n, 21u)^rot(n, 7u)) - -#define ch(n) Ch(Vals[(4 + 128 - (n)) % 8],Vals[(5 + 128 - (n)) % 8],Vals[(6 + 128 - (n)) % 8]) -#define maj(n) Ma(Vals[(1 + 128 - (n)) % 8],Vals[(2 + 128 - (n)) % 8],Vals[(0 + 128 - (n)) % 8]) - -//t1 calc when W is already calculated -#define t1(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W[(n)] + s1(n) + ch(n) - -//t1 calc which calculates W -#define t1W(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] + W(n) + s1(n) + ch(n) - -//Used for constant W Values (the compiler optimizes out zeros) -#define t1C(n) (K[(n) % 64]+ ConstW[(n)]) + Vals[(7 + 128 - (n)) % 8] + s1(n) + ch(n) - -//t2 Calc -#define t2(n) maj(n) + s0(n) - -#define rotC(x,n) (x<> (32-n)) - -//W calculation used for SHA round -#define W(n) (W[n] = P4(n) + P3(n) + P2(n) + P1(n)) - - - -//Partial W calculations (used for the begining where only some values are nonzero) -#define P1(n) ((rot(W[(n)-2],15u)^rot(W[(n)-2],13u)^((W[(n)-2])>>10U))) -#define P2(n) ((rot(W[(n)-15],25u)^rot(W[(n)-15],14u)^((W[(n)-15])>>3U))) - - -#define p1(x) ((rot(x,15u)^rot(x,13u)^((x)>>10U))) -#define p2(x) ((rot(x,25u)^rot(x,14u)^((x)>>3U))) - - -#define P3(n) W[n-7] -#define P4(n) W[n-16] - - -//Partial Calcs for constant W values -#define P1C(n) ((rotC(ConstW[(n)-2],15)^rotC(ConstW[(n)-2],13)^((ConstW[(n)-2])>>10U))) -#define P2C(n) ((rotC(ConstW[(n)-15],25)^rotC(ConstW[(n)-15],14)^((ConstW[(n)-15])>>3U))) -#define P3C(x) ConstW[x-7] -#define P4C(x) ConstW[x-16] - -//SHA round with built in W calc -#define sharoundW(n) Barrier1(n); Vals[(3 + 128 - (n)) % 8] += t1W(n); Vals[(7 + 128 - (n)) % 8] = t1W(n) + t2(n); - -//SHA round without W calc -#define sharound(n) Barrier2(n); Vals[(3 + 128 - (n)) % 8] += t1(n); Vals[(7 + 128 - (n)) % 8] = t1(n) + t2(n); - -//SHA round for constant W values -#define sharoundC(n) Barrier3(n); Vals[(3 + 128 - (n)) % 8] += t1C(n); Vals[(7 + 128 - (n)) % 8] = t1C(n) + t2(n); - -//The compiler is stupid... I put this in there only to stop the compiler from (de)optimizing the order -#define Barrier1(n) t1 = t1C((n+1)) -#define Barrier2(n) t1 = t1C((n)) -#define Barrier3(n) t1 = t1C((n)) - -//#define WORKSIZE 256 -#define MAXBUFFERS (4095) - -__kernel - __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -void search( const uint state0, const uint state1, const uint state2, const uint state3, - const uint state4, const uint state5, const uint state6, const uint state7, - const uint B1, const uint C1, const uint D1, - const uint F1, const uint G1, const uint H1, - const u base, - const uint W16, const uint W17, - const uint PreVal4, const uint PreVal0, - const uint PreW18, const uint PreW19, - const uint PreW31, const uint PreW32, - - __global uint * output) -{ - - - u W[124]; - u Vals[8]; - -//Dummy Variable to prevent compiler from reordering between rounds - u t1; - - //Vals[0]=state0; - Vals[1]=B1; - Vals[2]=C1; - Vals[3]=D1; - //Vals[4]=PreVal4; - Vals[5]=F1; - Vals[6]=G1; - Vals[7]=H1; - - W[16] = W16; - W[17] = W17; - -#ifdef VECTORS4 - //Less dependencies to get both the local id and group id and then add them - W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); - uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); - //Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3 - W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U}; -#elif defined VECTORS2 - W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); - uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); - W[18] = PreW18 + (u){r, r ^ 0x2004000U}; -#else - W[3] = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); - u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U); - W[18] = PreW18 + r; -#endif - //the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions - - - - Vals[4] = PreVal4 + W[3]; - Vals[0] = PreVal0 + W[3]; - - sharoundC(4); - W[19] = PreW19 + W[3]; - sharoundC(5); - W[20] = P4C(20) + P1(20); - sharoundC(6); - W[21] = P1(21); - sharoundC(7); - W[22] = P3C(22) + P1(22); - sharoundC(8); - W[23] = W[16] + P1(23); - sharoundC(9); - W[24] = W[17] + P1(24); - sharoundC(10); - W[25] = P1(25) + P3(25); - W[26] = P1(26) + P3(26); - sharoundC(11); - W[27] = P1(27) + P3(27); - W[28] = P1(28) + P3(28); - sharoundC(12); - W[29] = P1(29) + P3(29); - sharoundC(13); - W[30] = P1(30) + P2C(30) + P3(30); - W[31] = PreW31 + (P1(31) + P3(31)); - sharoundC(14); - W[32] = PreW32 + (P1(32) + P3(32)); - sharoundC(15); - sharound(16); - sharound(17); - sharound(18); - sharound(19); - sharound(20); - sharound(21); - sharound(22); - sharound(23); - sharound(24); - sharound(25); - sharound(26); - sharound(27); - sharound(28); - sharound(29); - sharound(30); - sharound(31); - sharound(32); - sharoundW(33); - sharoundW(34); - sharoundW(35); - sharoundW(36); - sharoundW(37); - sharoundW(38); - sharoundW(39); - sharoundW(40); - sharoundW(41); - sharoundW(42); - sharoundW(43); - sharoundW(44); - sharoundW(45); - sharoundW(46); - sharoundW(47); - sharoundW(48); - sharoundW(49); - sharoundW(50); - sharoundW(51); - sharoundW(52); - sharoundW(53); - sharoundW(54); - sharoundW(55); - sharoundW(56); - sharoundW(57); - sharoundW(58); - sharoundW(59); - sharoundW(60); - sharoundW(61); - sharoundW(62); - sharoundW(63); - - W[64]=state0+Vals[0]; - W[65]=state1+Vals[1]; - W[66]=state2+Vals[2]; - W[67]=state3+Vals[3]; - W[68]=state4+Vals[4]; - W[69]=state5+Vals[5]; - W[70]=state6+Vals[6]; - W[71]=state7+Vals[7]; - - Vals[0]=H[0]; - Vals[1]=H[1]; - Vals[2]=H[2]; - Vals[3]=H[3]; - Vals[4]=H[4]; - Vals[5]=H[5]; - Vals[6]=H[6]; - Vals[7]=H[7]; - - //sharound(64 + 0); - const u Temp = (0xb0edbdd0U + K[0]) + W[64]; - Vals[7] = Temp + 0x08909ae5U; - Vals[3] = 0xa54ff53aU + Temp; - -#define P124(n) P2(n) + P1(n) + P4(n) - - - W[64 + 16] = + P2(64 + 16) + P4(64 + 16); - sharound(64 + 1); - W[64 + 17] = P1C(64 + 17) + P2(64 + 17) + P4(64 + 17); - sharound(64 + 2); - W[64 + 18] = P124(64 + 18); - sharound(64 + 3); - W[64 + 19] = P124(64 + 19); - sharound(64 + 4); - W[64 + 20] = P124(64 + 20); - sharound(64 + 5); - W[64 + 21] = P124(64 + 21); - sharound(64 + 6); - W[64 + 22] = P4(64 + 22) + P3C(64 + 22) + P2(64 + 22) + P1(64 + 22); - sharound(64 + 7); - W[64 + 23] = P4(64 + 23) + P3(64 + 23) + P2C(64 + 23) + P1(64 + 23); - sharoundC(64 + 8); - W[64 + 24] = P1(64 + 24) + P4C(64 + 24) + P3(64 + 24); - sharoundC(64 + 9); - W[64 + 25] = P3(64 + 25) + P1(64 + 25); - sharoundC(64 + 10); - W[64 + 26] = P3(64 + 26) + P1(64 + 26); - sharoundC(64 + 11); - W[64 + 27] = P3(64 + 27) + P1(64 + 27); - sharoundC(64 + 12); - W[64 + 28] = P3(64 + 28) + P1(64 + 28); - sharoundC(64 + 13); - W[64 + 29] = P1(64 + 29) + P3(64 + 29); - W[64 + 30] = P3(64 + 30) + P2C(64 + 30) + P1(64 + 30); - sharoundC(64 + 14); - W[64 + 31] = P4C(64 + 31) + P3(64 + 31) + P2(64 + 31) + P1(64 + 31); - sharoundC(64 + 15); - sharound(64 + 16); - sharound(64 + 17); - sharound(64 + 18); - sharound(64 + 19); - sharound(64 + 20); - sharound(64 + 21); - sharound(64 + 22); - sharound(64 + 23); - sharound(64 + 24); - sharound(64 + 25); - sharound(64 + 26); - sharound(64 + 27); - sharound(64 + 28); - sharound(64 + 29); - sharound(64 + 30); - sharound(64 + 31); - sharoundW(64 + 32); - sharoundW(64 + 33); - sharoundW(64 + 34); - sharoundW(64 + 35); - sharoundW(64 + 36); - sharoundW(64 + 37); - sharoundW(64 + 38); - sharoundW(64 + 39); - sharoundW(64 + 40); - sharoundW(64 + 41); - sharoundW(64 + 42); - sharoundW(64 + 43); - sharoundW(64 + 44); - sharoundW(64 + 45); - sharoundW(64 + 46); - sharoundW(64 + 47); - sharoundW(64 + 48); - sharoundW(64 + 49); - sharoundW(64 + 50); - sharoundW(64 + 51); - sharoundW(64 + 52); - sharoundW(64 + 53); - sharoundW(64 + 54); - sharoundW(64 + 55); - sharoundW(64 + 56); - sharoundW(64 + 57); - sharoundW(64 + 58); - - W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) - - (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); - -#define FOUND (0x800) -#define NFLAG (0x7FF) - -#ifdef VECTORS4 - bool result = W[117].x & W[117].y & W[117].z & W[117].w; - if (!result) { - if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; - if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; - if (!W[117].z) - output[FOUND] = output[NFLAG & W[3].z] = W[3].z; - if (!W[117].w) - output[FOUND] = output[NFLAG & W[3].w] = W[3].w; - } -#elif defined VECTORS2 - bool result = W[117].x & W[117].y; - if (!result) { - if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; - if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; - } -#else - if (!W[117]) - output[FOUND] = output[NFLAG & W[3]] = W[3]; -#endif -} diff --git a/poclbm120327.cl b/poclbm120327.cl deleted file mode 100644 index 3e8b9943..00000000 --- a/poclbm120327.cl +++ /dev/null @@ -1,1353 +0,0 @@ -// -ck modified kernel taken from Phoenix taken from poclbm, with aspects of -// phatk and others. -// Modified version copyright 2011-2012 Con Kolivas - -// This file is taken and modified from the public-domain poclbm project, and -// we have therefore decided to keep it public-domain in Phoenix. - -#ifdef VECTORS4 - typedef uint4 u; -#elif defined VECTORS2 - typedef uint2 u; -#else - typedef uint u; -#endif - -__constant uint K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -// This part is not from the stock poclbm kernel. It's part of an optimization -// added in the Phoenix Miner. - -// Some AMD devices have a BFI_INT opcode, which behaves exactly like the -// SHA-256 ch function, but provides it in exactly one instruction. If -// detected, use it for ch. Otherwise, construct ch out of simpler logical -// primitives. - -#ifdef BITALIGN - #pragma OPENCL EXTENSION cl_amd_media_ops : enable - #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y) -#else - #define rotr(x, y) rotate((u)x, (u)(32 - y)) -#endif -#ifdef BFI_INT - // Well, slight problem... It turns out BFI_INT isn't actually exposed to - // OpenCL (or CAL IL for that matter) in any way. However, there is - // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via - // amd_bytealign, takes the same inputs, and provides the same output. - // We can use that as a placeholder for BFI_INT and have the application - // patch it after compilation. - - // This is the BFI_INT function - #define ch(x, y, z) amd_bytealign(x, y, z) - - // Ma can also be implemented in terms of BFI_INT... - #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) ) - - // AMD's KernelAnalyzer throws errors compiling the kernel if we use - // amd_bytealign on constants with vectors enabled, so we use this to avoid - // problems. (this is used 4 times, and likely optimized out by the compiler.) - #define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) -#else // BFI_INT - //GCN actually fails if manually patched with BFI_INT - - #define ch(x, y, z) bitselect((u)z, (u)y, (u)x) - #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x) - #define Ma2(x, y, z) Ma(x, y, z) -#endif - - -__kernel -__attribute__((vec_type_hint(u))) -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -void search(const uint state0, const uint state1, const uint state2, const uint state3, - const uint state4, const uint state5, const uint state6, const uint state7, - const uint b1, const uint c1, - const uint f1, const uint g1, const uint h1, -#ifndef GOFFSET - const u base, -#endif - const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, - const uint D1A, const uint C1addK5, const uint B1addK6, - const uint W16addK16, const uint W17addK17, - const uint PreVal4addT1, const uint Preval0, - __global uint * output) -{ - u Vals[24]; - u *W = &Vals[8]; - -#ifdef GOFFSET - const u nonce = (uint)(get_global_id(0)); -#else - const u nonce = base + (uint)(get_global_id(0)); -#endif - -Vals[5]=Preval0; -Vals[5]+=nonce; - -Vals[0]=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],b1,c1); -Vals[0]+=D1A; - -Vals[2]=Vals[0]; -Vals[2]+=h1; - -Vals[1]=PreVal4addT1; -Vals[1]+=nonce; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); - -Vals[6]=C1addK5; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],b1); - -Vals[3]=Vals[6]; -Vals[3]+=g1; -Vals[0]+=Ma2(g1,Vals[1],f1); -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma2(f1,Vals[0],Vals[1]); - -Vals[7]=B1addK6; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); - -Vals[4]=Vals[7]; -Vals[4]+=f1; - -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[7]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[8]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[9]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[10]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[11]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[12]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[13]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[14]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=0xC19BF3F4U; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=W16addK16; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=W17addK17; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U)); -W[2]+=fw2; -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[18]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]=nonce; -W[3]+=fw3; -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[19]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -W[4]+=0x80000000U; -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[20]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[21]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -W[6]+=0x00000280U; -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[22]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -W[7]+=fw0; -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[23]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -W[8]+=fw1; -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[24]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[9]=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[25]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[10]=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[26]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[11]=W[4]; -W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=W[11]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[27]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[12]=W[5]; -W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); -Vals[0]+=W[12]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[28]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[13]=W[6]; -W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); -Vals[6]+=W[13]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[29]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[14]=0x00a00055U; -W[14]+=W[7]; -W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); -Vals[7]+=W[14]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[30]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[15]=fw15; -W[15]+=W[8]; -W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); -Vals[5]+=W[15]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[31]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[0]=fw01r; -W[0]+=W[9]; -W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); -Vals[2]+=W[0]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[32]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[1]=fw1; -W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); -W[1]+=W[10]; -W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); -Vals[3]+=W[1]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[33]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); -W[2]+=W[11]; -W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[34]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); -W[3]+=W[12]; -W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[35]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); -W[4]+=W[13]; -W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[36]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); -W[5]+=W[14]; -W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[37]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); -W[6]+=W[15]; -W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[38]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); -W[7]+=W[0]; -W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[39]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); -W[8]+=W[1]; -W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[40]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); -W[9]+=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[41]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); -W[10]+=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[42]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); -W[11]+=W[4]; -W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=W[11]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[43]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); -W[12]+=W[5]; -W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); -Vals[0]+=W[12]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[44]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); -W[13]+=W[6]; -W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); -Vals[6]+=W[13]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[45]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); -W[14]+=W[7]; -W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); -Vals[7]+=W[14]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[46]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); -W[15]+=W[8]; -W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); -Vals[5]+=W[15]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[47]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); -W[0]+=W[9]; -W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); -Vals[2]+=W[0]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[48]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); -W[1]+=W[10]; -W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); -Vals[3]+=W[1]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[49]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); -W[2]+=W[11]; -W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[50]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); -W[3]+=W[12]; -W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[51]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); -W[4]+=W[13]; -W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[52]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); -W[5]+=W[14]; -W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[53]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); -W[6]+=W[15]; -W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[54]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); -W[7]+=W[0]; -W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[55]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); -W[8]+=W[1]; -W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[56]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); -W[9]+=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[57]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); -W[10]+=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[58]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); -W[11]+=W[4]; -W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=W[11]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[59]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); -W[12]+=W[5]; -W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); -Vals[0]+=W[12]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[60]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); -W[13]+=W[6]; -W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); -Vals[6]+=W[13]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[61]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -Vals[7]+=W[14]; -Vals[7]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); -Vals[7]+=W[7]; -Vals[7]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[62]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -Vals[5]+=W[15]; -Vals[5]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); -Vals[5]+=W[8]; -Vals[5]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[63]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -Vals[5]+=state0; - -W[7]=state7; -W[7]+=Vals[2]; - -Vals[2]=0xF377ED68U; -Vals[2]+=Vals[5]; - -W[3]=state3; -W[3]+=Vals[0]; - -Vals[0]=0xa54ff53aU; -Vals[0]+=Vals[2]; -Vals[2]+=0x08909ae5U; - -W[6]=state6; -W[6]+=Vals[3]; - -Vals[3]=0x90BB1E3CU; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=(0x9b05688cU^(Vals[0]&0xca0b3af3U)); - -Vals[7]+=state1; -Vals[3]+=Vals[7]; - -W[2]=state2; -W[2]+=Vals[6]; - -Vals[6]=0x3c6ef372U; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma2(0xbb67ae85U,Vals[2],0x6a09e667U); - -W[5]=state5; -W[5]+=Vals[4]; - -Vals[4]=0x50C6645BU; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],0x510e527fU); -Vals[4]+=W[2]; - -W[1]=Vals[7]; -Vals[7]=0xbb67ae85U; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma2(0x6a09e667U,Vals[3],Vals[2]); - -W[4]=state4; -W[4]+=Vals[1]; - -Vals[1]=0x3AC42E24U; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=W[3]; - -W[0]=Vals[5]; - -Vals[5]=Vals[1]; -Vals[5]+=0x6a09e667U; - -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[4]; -Vals[0]+=W[4]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[5]; -Vals[6]+=W[5]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[6]; -Vals[7]+=W[6]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[7]; -Vals[5]+=W[7]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=0x5807AA98U; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[9]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[10]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[11]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[12]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[13]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[14]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=0xC19BF274U; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); -Vals[2]+=W[0]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[16]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); -W[1]+=0x00a00000U; -Vals[3]+=W[1]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[17]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); -W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[18]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); -W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[19]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); -W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[20]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); -W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[21]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); -W[6]+=0x00000100U; -W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[22]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]+=0x11002000U; -W[7]+=W[0]; -W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[23]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]=0x80000000U; -W[8]+=W[1]; -W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[24]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[9]=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[25]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[10]=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[26]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[11]=W[4]; -W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=W[11]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[27]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[12]=W[5]; -W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); -Vals[0]+=W[12]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[28]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[13]=W[6]; -W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); -Vals[6]+=W[13]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[29]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[14]=0x00400022U; -W[14]+=W[7]; -W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); -Vals[7]+=W[14]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[30]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[15]=0x00000100U; -W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); -W[15]+=W[8]; -W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); -Vals[5]+=W[15]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[31]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); -W[0]+=W[9]; -W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); -Vals[2]+=W[0]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[32]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); -W[1]+=W[10]; -W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); -Vals[3]+=W[1]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[33]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); -W[2]+=W[11]; -W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[34]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); -W[3]+=W[12]; -W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[35]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); -W[4]+=W[13]; -W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[36]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); -W[5]+=W[14]; -W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[37]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); -W[6]+=W[15]; -W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[38]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); -W[7]+=W[0]; -W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[39]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); -W[8]+=W[1]; -W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[40]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); -W[9]+=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[41]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); -W[10]+=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[42]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); -W[11]+=W[4]; -W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=W[11]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[43]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); -W[12]+=W[5]; -W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); -Vals[0]+=W[12]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[44]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U)); -W[13]+=W[6]; -W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U)); -Vals[6]+=W[13]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[45]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U)); -W[14]+=W[7]; -W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U)); -Vals[7]+=W[14]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[46]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U)); -W[15]+=W[8]; -W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); -Vals[5]+=W[15]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[47]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U)); -W[0]+=W[9]; -W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U)); -Vals[2]+=W[0]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[48]; -Vals[0]+=Vals[2]; -Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); -Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - -W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U)); -W[1]+=W[10]; -W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U)); -Vals[3]+=W[1]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[49]; -Vals[6]+=Vals[3]; -Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22)); -Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]); - -W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U)); -W[2]+=W[11]; -W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U)); -Vals[4]+=W[2]; -Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25)); -Vals[4]+=ch(Vals[6],Vals[0],Vals[1]); -Vals[4]+=K[50]; -Vals[7]+=Vals[4]; -Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22)); -Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]); - -W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U)); -W[3]+=W[12]; -W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U)); -Vals[1]+=W[3]; -Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25)); -Vals[1]+=ch(Vals[7],Vals[6],Vals[0]); -Vals[1]+=K[51]; -Vals[5]+=Vals[1]; -Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22)); -Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]); - -W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U)); -W[4]+=W[13]; -W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U)); -Vals[0]+=W[4]; -Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25)); -Vals[0]+=ch(Vals[5],Vals[7],Vals[6]); -Vals[0]+=K[52]; -Vals[2]+=Vals[0]; -Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22)); -Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]); - -W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U)); -W[5]+=W[14]; -W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U)); -Vals[6]+=W[5]; -Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25)); -Vals[6]+=ch(Vals[2],Vals[5],Vals[7]); -Vals[6]+=K[53]; -Vals[3]+=Vals[6]; -Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22)); -Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]); - -W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U)); -W[6]+=W[15]; -W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U)); -Vals[7]+=W[6]; -Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[7]+=ch(Vals[3],Vals[2],Vals[5]); -Vals[7]+=K[54]; -Vals[4]+=Vals[7]; -Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22)); -Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]); - -W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U)); -W[7]+=W[0]; -W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); -Vals[5]+=W[7]; -Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[5]+=ch(Vals[4],Vals[3],Vals[2]); -Vals[5]+=K[55]; -Vals[1]+=Vals[5]; -Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22)); -Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]); - -W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U)); -W[8]+=W[1]; -W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U)); -Vals[2]+=W[8]; -Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); -Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); -Vals[2]+=K[56]; -Vals[0]+=Vals[2]; - -W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U)); -W[9]+=W[2]; -W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U)); -Vals[3]+=W[9]; -Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25)); -Vals[3]+=ch(Vals[0],Vals[1],Vals[4]); -Vals[3]+=K[57]; -Vals[3]+=Vals[6]; - -W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U)); -W[10]+=W[3]; -W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U)); -Vals[4]+=W[10]; -Vals[4]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25)); -Vals[4]+=ch(Vals[3],Vals[0],Vals[1]); -Vals[4]+=K[58]; -Vals[4]+=Vals[7]; -Vals[1]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25)); -Vals[1]+=ch(Vals[4],Vals[3],Vals[0]); -Vals[1]+=W[11]; -Vals[1]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U)); -Vals[1]+=W[4]; -Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); -Vals[1]+=K[59]; -Vals[1]+=Vals[5]; - -#define FOUND (0x800) -#define NFLAG (0x7FF) - -#if defined(VECTORS2) || defined(VECTORS4) - Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); - Vals[2]+=W[12]; - Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); - Vals[2]+=W[5]; - Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); - Vals[2]+=Vals[0]; - Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); - Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); - - if (any(Vals[2] == 0x136032edU)) { - if (Vals[2].x == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (Vals[2].y == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; -#if defined(VECTORS4) - if (Vals[2].z == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.z] = nonce.z; - if (Vals[2].w == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.w] = nonce.w; -#endif - } -#else - if ((Vals[2]+ - Ma(Vals[6],Vals[5],Vals[7])+ - (rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22))+ - W[12]+ - (rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U))+ - W[5]+ - (rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U))+ - Vals[0]+ - (rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25))+ - ch(Vals[1],Vals[4],Vals[3])) == 0x136032edU) - output[FOUND] = output[NFLAG & nonce] = nonce; -#endif -} diff --git a/scrypt120713.cl b/scrypt120713.cl deleted file mode 100644 index d38f6a54..00000000 --- a/scrypt120713.cl +++ /dev/null @@ -1,757 +0,0 @@ -#define rotl(x,y) rotate(x,y) -#define Ch(x,y,z) bitselect(z,y,x) -#define Maj(x,y,z) Ch((x^z),y,z) - -#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U)) - -#define Tr2(x) (rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U)) -#define Tr1(x) (rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U)) -#define Wr2(x) (rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U)) -#define Wr1(x) (rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U)) - -#define RND(a, b, c, d, e, f, g, h, k) \ - h += Tr1(e) + Ch(e, f, g) + k; \ - d += h; \ - h += Tr2(a) + Maj(a, b, c); - -void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) -{ - uint4 S0 = *state0; - uint4 S1 = *state1; - -#define A S0.x -#define B S0.y -#define C S0.z -#define D S0.w -#define E S1.x -#define F S1.y -#define G S1.z -#define H S1.w - - uint4 W[4]; - - W[ 0].x = block0.x; - RND(A,B,C,D,E,F,G,H, W[0].x+0x428a2f98U); - W[ 0].y = block0.y; - RND(H,A,B,C,D,E,F,G, W[0].y+0x71374491U); - W[ 0].z = block0.z; - RND(G,H,A,B,C,D,E,F, W[0].z+0xb5c0fbcfU); - W[ 0].w = block0.w; - RND(F,G,H,A,B,C,D,E, W[0].w+0xe9b5dba5U); - - W[ 1].x = block1.x; - RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); - W[ 1].y = block1.y; - RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); - W[ 1].z = block1.z; - RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); - W[ 1].w = block1.w; - RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); - - W[ 2].x = block2.x; - RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); - W[ 2].y = block2.y; - RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); - W[ 2].z = block2.z; - RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); - W[ 2].w = block2.w; - RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); - - W[ 3].x = block3.x; - RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); - W[ 3].y = block3.y; - RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); - W[ 3].z = block3.z; - RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); - W[ 3].w = block3.w; - RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); - - W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); - - W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); - - W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); - - W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); - - W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); - - W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); - - W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); - - W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); - - W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); - - W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); - - W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); - - W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); - - W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); - - W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); - - W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); - - W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); - - W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); - - W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); - - W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); - - W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); - - W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); - - W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); - - W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); - - W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); - - W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); - - W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); - - W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); - - W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); - - W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); - - W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); - - W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); - - W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); - - W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); - - W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); - - W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); - - W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); - - W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); - - W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); - - W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); - - W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); - - W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); - - W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); - - W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); - - W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); - - W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); - - W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); - - W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); - - W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); - -#undef A -#undef B -#undef C -#undef D -#undef E -#undef F -#undef G -#undef H - - *state0 += S0; - *state1 += S1; -} - -void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3) -{ -#define A (*state0).x -#define B (*state0).y -#define C (*state0).z -#define D (*state0).w -#define E (*state1).x -#define F (*state1).y -#define G (*state1).z -#define H (*state1).w - - uint4 W[4]; - - W[0].x = block0.x; - D=0x98c7e2a2U+W[0].x; - H=0xfc08884dU+W[0].x; - - W[0].y = block0.y; - C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; - G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); - - W[0].z = block0.z; - B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; - F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); - - W[0].w = block0.w; - A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; - E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); - - W[1].x = block1.x; - RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU); - W[1].y = block1.y; - RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U); - W[1].z = block1.z; - RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U); - W[1].w = block1.w; - RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U); - - W[2].x = block2.x; - RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U); - W[2].y = block2.y; - RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U); - W[2].z = block2.z; - RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU); - W[2].w = block2.w; - RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U); - - W[3].x = block3.x; - RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U); - W[3].y = block3.y; - RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU); - W[3].z = block3.z; - RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U); - W[3].w = block3.w; - RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); - - W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U); - - W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U); - - W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U); - - W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU); - - W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU); - - W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU); - - W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU); - - W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU); - - W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U); - - W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU); - - W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U); - - W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U); - - W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U); - - W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U); - - W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U); - - W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U); - - W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U); - - W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U); - - W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU); - - W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U); - - W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U); - - W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU); - - W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU); - - W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U); - - W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U); - - W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU); - - W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U); - - W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U); - - W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U); - - W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U); - - W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U); - - W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U); - - W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U); - - W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U); - - W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU); - - W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U); - - W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U); - - W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU); - - W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU); - - W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U); - - W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU); - - W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU); - - W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U); - - W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U); - - W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU); - - W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU); - - W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U); - - W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U); - -#undef A -#undef B -#undef C -#undef D -#undef E -#undef F -#undef G -#undef H - - *state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); - *state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); -} - -__constant uint fixedW[64] = -{ - 0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, - 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794, - 0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f, - 0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c, - 0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa, - 0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012, - 0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4, - 0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848, -}; - -void SHA256_fixed(uint4*restrict state0,uint4*restrict state1) -{ - uint4 S0 = *state0; - uint4 S1 = *state1; - -#define A S0.x -#define B S0.y -#define C S0.z -#define D S0.w -#define E S1.x -#define F S1.y -#define G S1.z -#define H S1.w - - RND(A,B,C,D,E,F,G,H, fixedW[0]); - RND(H,A,B,C,D,E,F,G, fixedW[1]); - RND(G,H,A,B,C,D,E,F, fixedW[2]); - RND(F,G,H,A,B,C,D,E, fixedW[3]); - RND(E,F,G,H,A,B,C,D, fixedW[4]); - RND(D,E,F,G,H,A,B,C, fixedW[5]); - RND(C,D,E,F,G,H,A,B, fixedW[6]); - RND(B,C,D,E,F,G,H,A, fixedW[7]); - RND(A,B,C,D,E,F,G,H, fixedW[8]); - RND(H,A,B,C,D,E,F,G, fixedW[9]); - RND(G,H,A,B,C,D,E,F, fixedW[10]); - RND(F,G,H,A,B,C,D,E, fixedW[11]); - RND(E,F,G,H,A,B,C,D, fixedW[12]); - RND(D,E,F,G,H,A,B,C, fixedW[13]); - RND(C,D,E,F,G,H,A,B, fixedW[14]); - RND(B,C,D,E,F,G,H,A, fixedW[15]); - RND(A,B,C,D,E,F,G,H, fixedW[16]); - RND(H,A,B,C,D,E,F,G, fixedW[17]); - RND(G,H,A,B,C,D,E,F, fixedW[18]); - RND(F,G,H,A,B,C,D,E, fixedW[19]); - RND(E,F,G,H,A,B,C,D, fixedW[20]); - RND(D,E,F,G,H,A,B,C, fixedW[21]); - RND(C,D,E,F,G,H,A,B, fixedW[22]); - RND(B,C,D,E,F,G,H,A, fixedW[23]); - RND(A,B,C,D,E,F,G,H, fixedW[24]); - RND(H,A,B,C,D,E,F,G, fixedW[25]); - RND(G,H,A,B,C,D,E,F, fixedW[26]); - RND(F,G,H,A,B,C,D,E, fixedW[27]); - RND(E,F,G,H,A,B,C,D, fixedW[28]); - RND(D,E,F,G,H,A,B,C, fixedW[29]); - RND(C,D,E,F,G,H,A,B, fixedW[30]); - RND(B,C,D,E,F,G,H,A, fixedW[31]); - RND(A,B,C,D,E,F,G,H, fixedW[32]); - RND(H,A,B,C,D,E,F,G, fixedW[33]); - RND(G,H,A,B,C,D,E,F, fixedW[34]); - RND(F,G,H,A,B,C,D,E, fixedW[35]); - RND(E,F,G,H,A,B,C,D, fixedW[36]); - RND(D,E,F,G,H,A,B,C, fixedW[37]); - RND(C,D,E,F,G,H,A,B, fixedW[38]); - RND(B,C,D,E,F,G,H,A, fixedW[39]); - RND(A,B,C,D,E,F,G,H, fixedW[40]); - RND(H,A,B,C,D,E,F,G, fixedW[41]); - RND(G,H,A,B,C,D,E,F, fixedW[42]); - RND(F,G,H,A,B,C,D,E, fixedW[43]); - RND(E,F,G,H,A,B,C,D, fixedW[44]); - RND(D,E,F,G,H,A,B,C, fixedW[45]); - RND(C,D,E,F,G,H,A,B, fixedW[46]); - RND(B,C,D,E,F,G,H,A, fixedW[47]); - RND(A,B,C,D,E,F,G,H, fixedW[48]); - RND(H,A,B,C,D,E,F,G, fixedW[49]); - RND(G,H,A,B,C,D,E,F, fixedW[50]); - RND(F,G,H,A,B,C,D,E, fixedW[51]); - RND(E,F,G,H,A,B,C,D, fixedW[52]); - RND(D,E,F,G,H,A,B,C, fixedW[53]); - RND(C,D,E,F,G,H,A,B, fixedW[54]); - RND(B,C,D,E,F,G,H,A, fixedW[55]); - RND(A,B,C,D,E,F,G,H, fixedW[56]); - RND(H,A,B,C,D,E,F,G, fixedW[57]); - RND(G,H,A,B,C,D,E,F, fixedW[58]); - RND(F,G,H,A,B,C,D,E, fixedW[59]); - RND(E,F,G,H,A,B,C,D, fixedW[60]); - RND(D,E,F,G,H,A,B,C, fixedW[61]); - RND(C,D,E,F,G,H,A,B, fixedW[62]); - RND(B,C,D,E,F,G,H,A, fixedW[63]); - -#undef A -#undef B -#undef C -#undef D -#undef E -#undef F -#undef G -#undef H - *state0 += S0; - *state1 += S1; -} - -void shittify(uint4 B[8]) -{ - uint4 tmp[4]; - tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w); - tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w); - tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w); - tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w); - -#pragma unroll - for(uint i=0; i<4; ++i) - B[i] = EndianSwap(tmp[i]); - - tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w); - tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w); - tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w); - tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w); - -#pragma unroll - for(uint i=0; i<4; ++i) - B[i+4] = EndianSwap(tmp[i]); -} - -void unshittify(uint4 B[8]) -{ - uint4 tmp[4]; - tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w); - tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w); - tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w); - tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w); - -#pragma unroll - for(uint i=0; i<4; ++i) - B[i] = EndianSwap(tmp[i]); - - tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w); - tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w); - tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w); - tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w); - -#pragma unroll - for(uint i=0; i<4; ++i) - B[i+4] = EndianSwap(tmp[i]); -} - -void salsa(uint4 B[8]) -{ - uint4 w[4]; - -#pragma unroll - for(uint i=0; i<4; ++i) - w[i] = (B[i]^=B[i+4]); - -#pragma unroll - for(uint i=0; i<4; ++i) - { - w[0] ^= rotl(w[3] +w[2] , 7U); - w[1] ^= rotl(w[0] +w[3] , 9U); - w[2] ^= rotl(w[1] +w[0] ,13U); - w[3] ^= rotl(w[2] +w[1] ,18U); - w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); - w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); - w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); - w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); - } - -#pragma unroll - for(uint i=0; i<4; ++i) - w[i] = (B[i+4]^=(B[i]+=w[i])); - -#pragma unroll - for(uint i=0; i<4; ++i) - { - w[0] ^= rotl(w[3] +w[2] , 7U); - w[1] ^= rotl(w[0] +w[3] , 9U); - w[2] ^= rotl(w[1] +w[0] ,13U); - w[3] ^= rotl(w[2] +w[1] ,18U); - w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U); - w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U); - w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U); - w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U); - } - -#pragma unroll - for(uint i=0; i<4; ++i) - B[i+4] += w[i]; -} - -#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE) -#define CO Coord(z,x,y) - -void scrypt_core(uint4 X[8], __global uint4*restrict lookup) -{ - shittify(X); - const uint zSIZE = 8; - const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0)); - const uint xSIZE = CONCURRENT_THREADS; - uint x = get_global_id(0)%xSIZE; - - for(uint y=0; y<1024/LOOKUP_GAP; ++y) - { -#pragma unroll - for(uint z=0; z