mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-10 06:47:53 +00:00
06763c20b1
Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
1377 lines
27 KiB
Plaintext
1377 lines
27 KiB
Plaintext
#include <stdint.h>
|
||
#include <cuda_runtime.h>
|
||
|
||
// aus heavy.cu
|
||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||
|
||
typedef unsigned char BitSequence;
|
||
typedef unsigned long long DataLength;
|
||
|
||
#define SPH_C64(x) ((uint64_t)(x ## ULL))
|
||
#define SPH_C32(x) ((uint32_t)(x ## U))
|
||
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
||
|
||
static __constant__ uint32_t d_ShaviteInitVector[16];
|
||
static const uint32_t h_ShaviteInitVector[] = {
|
||
SPH_C32(0x72FCCDD8), SPH_C32(0x79CA4727), SPH_C32(0x128A077B), SPH_C32(0x40D55AEC),
|
||
SPH_C32(0xD1901A06), SPH_C32(0x430AE307), SPH_C32(0xB29F5CD1), SPH_C32(0xDF07FBFC),
|
||
SPH_C32(0x8E45D73D), SPH_C32(0x681AB538), SPH_C32(0xBDE86578), SPH_C32(0xDD577E47),
|
||
SPH_C32(0xE275EADE), SPH_C32(0x502D9FCD), SPH_C32(0xB9357178), SPH_C32(0x022A4B9A)
|
||
};
|
||
|
||
#include "cuda_x11_aes.cu"
|
||
|
||
static __device__ __forceinline__ void AES_ROUND_NOKEY(
|
||
const uint32_t* __restrict__ sharedMemory,
|
||
uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3)
|
||
{
|
||
uint32_t y0, y1, y2, y3;
|
||
aes_round(sharedMemory,
|
||
x0, x1, x2, x3,
|
||
y0, y1, y2, y3);
|
||
|
||
x0 = y0;
|
||
x1 = y1;
|
||
x2 = y2;
|
||
x3 = y3;
|
||
}
|
||
|
||
static __device__ __forceinline__ void KEY_EXPAND_ELT(
|
||
const uint32_t* __restrict__ sharedMemory,
|
||
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3)
|
||
{
|
||
uint32_t y0, y1, y2, y3;
|
||
aes_round(sharedMemory,
|
||
k0, k1, k2, k3,
|
||
y0, y1, y2, y3);
|
||
|
||
k0 = y1;
|
||
k1 = y2;
|
||
k2 = y3;
|
||
k3 = y0;
|
||
}
|
||
|
||
static __device__ void
|
||
c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
|
||
{
|
||
uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
|
||
uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
|
||
uint32_t x0, x1, x2, x3;
|
||
uint32_t rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
|
||
uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
||
uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
||
uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
||
const uint32_t counter = 512;
|
||
|
||
p0 = state[0x0];
|
||
p1 = state[0x1];
|
||
p2 = state[0x2];
|
||
p3 = state[0x3];
|
||
p4 = state[0x4];
|
||
p5 = state[0x5];
|
||
p6 = state[0x6];
|
||
p7 = state[0x7];
|
||
p8 = state[0x8];
|
||
p9 = state[0x9];
|
||
pA = state[0xA];
|
||
pB = state[0xB];
|
||
pC = state[0xC];
|
||
pD = state[0xD];
|
||
pE = state[0xE];
|
||
pF = state[0xF];
|
||
/* round 0 */
|
||
rk00 = msg[0];
|
||
x0 = p4 ^ rk00;
|
||
rk01 = msg[1];
|
||
x1 = p5 ^ rk01;
|
||
rk02 = msg[2];
|
||
x2 = p6 ^ rk02;
|
||
rk03 = msg[3];
|
||
x3 = p7 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 = msg[4];
|
||
x0 ^= rk04;
|
||
rk05 = msg[5];
|
||
x1 ^= rk05;
|
||
rk06 = msg[6];
|
||
x2 ^= rk06;
|
||
rk07 = msg[7];
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 = msg[8];
|
||
x0 ^= rk08;
|
||
rk09 = msg[9];
|
||
x1 ^= rk09;
|
||
rk0A = msg[10];
|
||
x2 ^= rk0A;
|
||
rk0B = msg[11];
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C = msg[12];
|
||
x0 ^= rk0C;
|
||
rk0D = msg[13];
|
||
x1 ^= rk0D;
|
||
rk0E = msg[14];
|
||
x2 ^= rk0E;
|
||
rk0F = msg[15];
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
rk10 = msg[16];
|
||
x0 = pC ^ rk10;
|
||
rk11 = msg[17];
|
||
x1 = pD ^ rk11;
|
||
rk12 = msg[18];
|
||
x2 = pE ^ rk12;
|
||
rk13 = msg[19];
|
||
x3 = pF ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 = msg[20];
|
||
x0 ^= rk14;
|
||
rk15 = msg[21];
|
||
x1 ^= rk15;
|
||
rk16 = msg[22];
|
||
x2 ^= rk16;
|
||
rk17 = msg[23];
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 = msg[24];
|
||
x0 ^= rk18;
|
||
rk19 = msg[25];
|
||
x1 ^= rk19;
|
||
rk1A = msg[26];
|
||
x2 ^= rk1A;
|
||
rk1B = msg[27];
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C = msg[28];
|
||
x0 ^= rk1C;
|
||
rk1D = msg[29];
|
||
x1 ^= rk1D;
|
||
rk1E = msg[30];
|
||
x2 ^= rk1E;
|
||
rk1F = msg[31];
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
|
||
// 1
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
rk00 ^= counter;
|
||
rk03 ^= 0xFFFFFFFF;
|
||
x0 = p0 ^ rk00;
|
||
x1 = p1 ^ rk01;
|
||
x2 = p2 ^ rk02;
|
||
x3 = p3 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p8 ^ rk10;
|
||
x1 = p9 ^ rk11;
|
||
x2 = pA ^ rk12;
|
||
x3 = pB ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
|
||
rk00 ^= rk19;
|
||
x0 = pC ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = pD ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = pE ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = pF ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = p4 ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = p5 ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = p6 ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = p7 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
/* round 3, 7, 11 */
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p8 ^ rk00;
|
||
x1 = p9 ^ rk01;
|
||
x2 = pA ^ rk02;
|
||
x3 = pB ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p0 ^ rk10;
|
||
x1 = p1 ^ rk11;
|
||
x2 = p2 ^ rk12;
|
||
x3 = p3 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
/* round 4, 8, 12 */
|
||
rk00 ^= rk19;
|
||
x0 = p4 ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = p5 ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = p6 ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = p7 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = pC ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = pD ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = pE ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = pF ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
|
||
// 2
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p0 ^ rk00;
|
||
x1 = p1 ^ rk01;
|
||
x2 = p2 ^ rk02;
|
||
x3 = p3 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
rk07 ^= SPH_T32(~counter);
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p8 ^ rk10;
|
||
x1 = p9 ^ rk11;
|
||
x2 = pA ^ rk12;
|
||
x3 = pB ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
|
||
rk00 ^= rk19;
|
||
x0 = pC ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = pD ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = pE ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = pF ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = p4 ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = p5 ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = p6 ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = p7 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
/* round 3, 7, 11 */
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p8 ^ rk00;
|
||
x1 = p9 ^ rk01;
|
||
x2 = pA ^ rk02;
|
||
x3 = pB ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p0 ^ rk10;
|
||
x1 = p1 ^ rk11;
|
||
x2 = p2 ^ rk12;
|
||
x3 = p3 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
/* round 4, 8, 12 */
|
||
rk00 ^= rk19;
|
||
x0 = p4 ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = p5 ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = p6 ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = p7 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = pC ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = pD ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = pE ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = pF ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
|
||
// 3
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p0 ^ rk00;
|
||
x1 = p1 ^ rk01;
|
||
x2 = p2 ^ rk02;
|
||
x3 = p3 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p8 ^ rk10;
|
||
x1 = p9 ^ rk11;
|
||
x2 = pA ^ rk12;
|
||
x3 = pB ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
rk1E ^= counter;
|
||
rk1F ^= 0xFFFFFFFF;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
|
||
rk00 ^= rk19;
|
||
x0 = pC ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = pD ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = pE ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = pF ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = p4 ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = p5 ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = p6 ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = p7 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
/* round 3, 7, 11 */
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p8 ^ rk00;
|
||
x1 = p9 ^ rk01;
|
||
x2 = pA ^ rk02;
|
||
x3 = pB ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p0 ^ rk10;
|
||
x1 = p1 ^ rk11;
|
||
x2 = p2 ^ rk12;
|
||
x3 = p3 ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
/* round 4, 8, 12 */
|
||
rk00 ^= rk19;
|
||
x0 = p4 ^ rk00;
|
||
rk01 ^= rk1A;
|
||
x1 = p5 ^ rk01;
|
||
rk02 ^= rk1B;
|
||
x2 = p6 ^ rk02;
|
||
rk03 ^= rk1C;
|
||
x3 = p7 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk04 ^= rk1D;
|
||
x0 ^= rk04;
|
||
rk05 ^= rk1E;
|
||
x1 ^= rk05;
|
||
rk06 ^= rk1F;
|
||
x2 ^= rk06;
|
||
rk07 ^= rk00;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk08 ^= rk01;
|
||
x0 ^= rk08;
|
||
rk09 ^= rk02;
|
||
x1 ^= rk09;
|
||
rk0A ^= rk03;
|
||
x2 ^= rk0A;
|
||
rk0B ^= rk04;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk0C ^= rk05;
|
||
x0 ^= rk0C;
|
||
rk0D ^= rk06;
|
||
x1 ^= rk0D;
|
||
rk0E ^= rk07;
|
||
x2 ^= rk0E;
|
||
rk0F ^= rk08;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p0 ^= x0;
|
||
p1 ^= x1;
|
||
p2 ^= x2;
|
||
p3 ^= x3;
|
||
rk10 ^= rk09;
|
||
x0 = pC ^ rk10;
|
||
rk11 ^= rk0A;
|
||
x1 = pD ^ rk11;
|
||
rk12 ^= rk0B;
|
||
x2 = pE ^ rk12;
|
||
rk13 ^= rk0C;
|
||
x3 = pF ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk14 ^= rk0D;
|
||
x0 ^= rk14;
|
||
rk15 ^= rk0E;
|
||
x1 ^= rk15;
|
||
rk16 ^= rk0F;
|
||
x2 ^= rk16;
|
||
rk17 ^= rk10;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk18 ^= rk11;
|
||
x0 ^= rk18;
|
||
rk19 ^= rk12;
|
||
x1 ^= rk19;
|
||
rk1A ^= rk13;
|
||
x2 ^= rk1A;
|
||
rk1B ^= rk14;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
rk1C ^= rk15;
|
||
x0 ^= rk1C;
|
||
rk1D ^= rk16;
|
||
x1 ^= rk1D;
|
||
rk1E ^= rk17;
|
||
x2 ^= rk1E;
|
||
rk1F ^= rk18;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p8 ^= x0;
|
||
p9 ^= x1;
|
||
pA ^= x2;
|
||
pB ^= x3;
|
||
|
||
/* round 13 */
|
||
KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03);
|
||
rk00 ^= rk1C;
|
||
rk01 ^= rk1D;
|
||
rk02 ^= rk1E;
|
||
rk03 ^= rk1F;
|
||
x0 = p0 ^ rk00;
|
||
x1 = p1 ^ rk01;
|
||
x2 = p2 ^ rk02;
|
||
x3 = p3 ^ rk03;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07);
|
||
rk04 ^= rk00;
|
||
rk05 ^= rk01;
|
||
rk06 ^= rk02;
|
||
rk07 ^= rk03;
|
||
x0 ^= rk04;
|
||
x1 ^= rk05;
|
||
x2 ^= rk06;
|
||
x3 ^= rk07;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B);
|
||
rk08 ^= rk04;
|
||
rk09 ^= rk05;
|
||
rk0A ^= rk06;
|
||
rk0B ^= rk07;
|
||
x0 ^= rk08;
|
||
x1 ^= rk09;
|
||
x2 ^= rk0A;
|
||
x3 ^= rk0B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F);
|
||
rk0C ^= rk08;
|
||
rk0D ^= rk09;
|
||
rk0E ^= rk0A;
|
||
rk0F ^= rk0B;
|
||
x0 ^= rk0C;
|
||
x1 ^= rk0D;
|
||
x2 ^= rk0E;
|
||
x3 ^= rk0F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
pC ^= x0;
|
||
pD ^= x1;
|
||
pE ^= x2;
|
||
pF ^= x3;
|
||
KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13);
|
||
rk10 ^= rk0C;
|
||
rk11 ^= rk0D;
|
||
rk12 ^= rk0E;
|
||
rk13 ^= rk0F;
|
||
x0 = p8 ^ rk10;
|
||
x1 = p9 ^ rk11;
|
||
x2 = pA ^ rk12;
|
||
x3 = pB ^ rk13;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17);
|
||
rk14 ^= rk10;
|
||
rk15 ^= rk11;
|
||
rk16 ^= rk12;
|
||
rk17 ^= rk13;
|
||
x0 ^= rk14;
|
||
x1 ^= rk15;
|
||
x2 ^= rk16;
|
||
x3 ^= rk17;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B);
|
||
rk18 ^= rk14;
|
||
rk19 ^= rk15 ^ counter;
|
||
rk1A ^= rk16;
|
||
rk1B ^= rk17 ^ 0xFFFFFFFF;
|
||
x0 ^= rk18;
|
||
x1 ^= rk19;
|
||
x2 ^= rk1A;
|
||
x3 ^= rk1B;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F);
|
||
rk1C ^= rk18;
|
||
rk1D ^= rk19;
|
||
rk1E ^= rk1A;
|
||
rk1F ^= rk1B;
|
||
x0 ^= rk1C;
|
||
x1 ^= rk1D;
|
||
x2 ^= rk1E;
|
||
x3 ^= rk1F;
|
||
AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3);
|
||
p4 ^= x0;
|
||
p5 ^= x1;
|
||
p6 ^= x2;
|
||
p7 ^= x3;
|
||
state[0x0] ^= p8;
|
||
state[0x1] ^= p9;
|
||
state[0x2] ^= pA;
|
||
state[0x3] ^= pB;
|
||
state[0x4] ^= pC;
|
||
state[0x5] ^= pD;
|
||
state[0x6] ^= pE;
|
||
state[0x7] ^= pF;
|
||
state[0x8] ^= p0;
|
||
state[0x9] ^= p1;
|
||
state[0xA] ^= p2;
|
||
state[0xB] ^= p3;
|
||
state[0xC] ^= p4;
|
||
state[0xD] ^= p5;
|
||
state[0xE] ^= p6;
|
||
state[0xF] ^= p7;
|
||
}
|
||
|
||
|
||
// Die Hash-Funktion
|
||
__global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
|
||
{
|
||
__shared__ uint32_t sharedMemory[1024];
|
||
|
||
aes_gpu_init(sharedMemory);
|
||
|
||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||
if (thread < threads)
|
||
{
|
||
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
||
|
||
int hashPosition = nounce - startNounce;
|
||
uint32_t *Hash = (uint32_t*)&g_hash[8 * hashPosition];
|
||
|
||
// kopiere init-state
|
||
uint32_t state[16];
|
||
|
||
#pragma unroll 16
|
||
for(int i=0;i<16;i++)
|
||
state[i] = d_ShaviteInitVector[i];
|
||
|
||
// nachricht laden
|
||
uint32_t msg[32];
|
||
|
||
// f<>lle die Nachricht mit 64-byte (vorheriger Hash)
|
||
#pragma unroll 16
|
||
for(int i=0;i<16;i++)
|
||
msg[i] = Hash[i];
|
||
|
||
// Nachrichtenende
|
||
msg[16] = 0x80;
|
||
#pragma unroll 10
|
||
for(int i=17;i<27;i++)
|
||
msg[i] = 0;
|
||
|
||
msg[27] = 0x02000000;
|
||
msg[28] = 0;
|
||
msg[29] = 0;
|
||
msg[30] = 0;
|
||
msg[31] = 0x02000000;
|
||
|
||
c512(sharedMemory, state, msg);
|
||
|
||
#pragma unroll 16
|
||
for(int i=0;i<16;i++)
|
||
Hash[i] = state[i];
|
||
}
|
||
}
|
||
|
||
|
||
// Setup-Funktionen
|
||
__host__ void x11_shavite512_cpu_init(int thr_id, int threads)
|
||
{
|
||
aes_cpu_init();
|
||
|
||
cudaMemcpyToSymbol( d_ShaviteInitVector,
|
||
h_ShaviteInitVector,
|
||
sizeof(h_ShaviteInitVector),
|
||
0, cudaMemcpyHostToDevice);
|
||
}
|
||
|
||
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||
{
|
||
const int threadsperblock = 256;
|
||
|
||
// berechne wie viele Thread Blocks wir brauchen
|
||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||
dim3 block(threadsperblock);
|
||
|
||
// Gr<47><72>e des dynamischen Shared Memory Bereichs
|
||
size_t shared_size = 0;
|
||
|
||
x11_shavite512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||
MyStreamSynchronize(NULL, order, thr_id);
|
||
}
|
||
|