#include // memcpy() #include "cuda_helper.h" #define TPB 128 __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding) #include "cuda_x11_aes.cu" __device__ __forceinline__ static void AES_ROUND_NOKEY( const uint32_t* __restrict__ sharedMemory, uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3) { uint32_t y0, y1, y2, y3; aes_round(sharedMemory, x0, x1, x2, x3, y0, y1, y2, y3); x0 = y0; x1 = y1; x2 = y2; x3 = y3; } __device__ __forceinline__ static void KEY_EXPAND_ELT( const uint32_t* __restrict__ sharedMemory, uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3) { uint32_t y0, y1, y2, y3; aes_round(sharedMemory, k0, k1, k2, k3, y0, y1, y2, y3); k0 = y1; k1 = y2; k2 = y3; k3 = y0; } __device__ __forceinline__ static void c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg, const uint32_t count) { uint32_t p0, p1, p2, p3, p4, p5, p6, p7; uint32_t p8, p9, pA, pB, pC, pD, pE, pF; uint32_t x0, x1, x2, x3; uint32_t rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; const uint32_t counter = count; p0 = state[0x0]; p1 = state[0x1]; p2 = state[0x2]; p3 = state[0x3]; p4 = state[0x4]; p5 = state[0x5]; p6 = state[0x6]; p7 = state[0x7]; p8 = state[0x8]; p9 = state[0x9]; pA = state[0xA]; pB = state[0xB]; pC = state[0xC]; pD = state[0xD]; pE = state[0xE]; pF = state[0xF]; /* round 0 */ rk00 = msg[0]; x0 = p4 ^ msg[0]; rk01 = msg[1]; x1 = p5 ^ msg[1]; rk02 = msg[2]; x2 = p6 ^ msg[2]; rk03 = msg[3]; x3 = p7 ^ msg[3]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 = msg[4]; x0 ^= msg[4]; rk05 = msg[5]; x1 ^= msg[5]; rk06 = msg[6]; x2 ^= msg[6]; rk07 = msg[7]; x3 ^= msg[7]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 = msg[8]; x0 ^= msg[8]; rk09 = msg[9]; x1 ^= msg[9]; rk0A = msg[10]; x2 ^= msg[10]; rk0B = msg[11]; x3 ^= msg[11]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C = msg[12]; x0 ^= msg[12]; rk0D = msg[13]; x1 ^= msg[13]; rk0E = msg[14]; x2 ^= msg[14]; rk0F = msg[15]; x3 ^= msg[15]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; if (count == 512) { rk10 = 0x80U; x0 = pC ^ 0x80U; rk11 = 0; x1 = pD; rk12 = 0; x2 = pE; rk13 = 0; x3 = pF; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 = 0; rk15 = 0; rk16 = 0; rk17 = 0; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 = 0; rk19 = 0; rk1A = 0; rk1B = 0x02000000U; x3 ^= 0x02000000U; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C = 0; rk1D = 0; rk1E = 0; rk1F = 0x02000000; x3 ^= 0x02000000; } else { rk10 = msg[16]; x0 = pC ^ msg[16]; rk11 = msg[17]; x1 = pD ^ msg[17]; rk12 = msg[18]; x2 = pE ^ msg[18]; rk13 = msg[19]; x3 = pF ^ msg[19]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 = msg[20]; x0 ^= msg[20]; rk15 = msg[21]; x1 ^= msg[21]; rk16 = msg[22]; x2 ^= msg[22]; rk17 = msg[23]; x3 ^= msg[23]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 = msg[24]; x0 ^= msg[24]; rk19 = msg[25]; x1 ^= msg[25]; rk1A = msg[26]; x2 ^= msg[26]; rk1B = msg[27]; x3 ^= msg[27]; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C = msg[28]; x0 ^= msg[28]; rk1D = msg[29]; x1 ^= msg[29]; rk1E = msg[30]; x2 ^= msg[30]; rk1F = msg[31]; x3 ^= msg[31]; } AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; // 1 KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; rk00 ^= counter; rk03 ^= 0xFFFFFFFF; x0 = p0 ^ rk00; x1 = p1 ^ rk01; x2 = p2 ^ rk02; x3 = p3 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p8 ^ rk10; x1 = p9 ^ rk11; x2 = pA ^ rk12; x3 = pB ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; rk00 ^= rk19; x0 = pC ^ rk00; rk01 ^= rk1A; x1 = pD ^ rk01; rk02 ^= rk1B; x2 = pE ^ rk02; rk03 ^= rk1C; x3 = pF ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; rk10 ^= rk09; x0 = p4 ^ rk10; rk11 ^= rk0A; x1 = p5 ^ rk11; rk12 ^= rk0B; x2 = p6 ^ rk12; rk13 ^= rk0C; x3 = p7 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; /* round 3, 7, 11 */ KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p8 ^ rk00; x1 = p9 ^ rk01; x2 = pA ^ rk02; x3 = pB ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p0 ^ rk10; x1 = p1 ^ rk11; x2 = p2 ^ rk12; x3 = p3 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; /* round 4, 8, 12 */ rk00 ^= rk19; x0 = p4 ^ rk00; rk01 ^= rk1A; x1 = p5 ^ rk01; rk02 ^= rk1B; x2 = p6 ^ rk02; rk03 ^= rk1C; x3 = p7 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; rk10 ^= rk09; x0 = pC ^ rk10; rk11 ^= rk0A; x1 = pD ^ rk11; rk12 ^= rk0B; x2 = pE ^ rk12; rk13 ^= rk0C; x3 = pF ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; // 2 KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p0 ^ rk00; x1 = p1 ^ rk01; x2 = p2 ^ rk02; x3 = p3 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; rk07 ^= SPH_T32(~counter); x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p8 ^ rk10; x1 = p9 ^ rk11; x2 = pA ^ rk12; x3 = pB ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; rk00 ^= rk19; x0 = pC ^ rk00; rk01 ^= rk1A; x1 = pD ^ rk01; rk02 ^= rk1B; x2 = pE ^ rk02; rk03 ^= rk1C; x3 = pF ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; rk10 ^= rk09; x0 = p4 ^ rk10; rk11 ^= rk0A; x1 = p5 ^ rk11; rk12 ^= rk0B; x2 = p6 ^ rk12; rk13 ^= rk0C; x3 = p7 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; /* round 3, 7, 11 */ KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p8 ^ rk00; x1 = p9 ^ rk01; x2 = pA ^ rk02; x3 = pB ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p0 ^ rk10; x1 = p1 ^ rk11; x2 = p2 ^ rk12; x3 = p3 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; /* round 4, 8, 12 */ rk00 ^= rk19; x0 = p4 ^ rk00; rk01 ^= rk1A; x1 = p5 ^ rk01; rk02 ^= rk1B; x2 = p6 ^ rk02; rk03 ^= rk1C; x3 = p7 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; rk10 ^= rk09; x0 = pC ^ rk10; rk11 ^= rk0A; x1 = pD ^ rk11; rk12 ^= rk0B; x2 = pE ^ rk12; rk13 ^= rk0C; x3 = pF ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; // 3 KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p0 ^ rk00; x1 = p1 ^ rk01; x2 = p2 ^ rk02; x3 = p3 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p8 ^ rk10; x1 = p9 ^ rk11; x2 = pA ^ rk12; x3 = pB ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; rk1E ^= counter; rk1F ^= 0xFFFFFFFF; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; rk00 ^= rk19; x0 = pC ^ rk00; rk01 ^= rk1A; x1 = pD ^ rk01; rk02 ^= rk1B; x2 = pE ^ rk02; rk03 ^= rk1C; x3 = pF ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; rk10 ^= rk09; x0 = p4 ^ rk10; rk11 ^= rk0A; x1 = p5 ^ rk11; rk12 ^= rk0B; x2 = p6 ^ rk12; rk13 ^= rk0C; x3 = p7 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; /* round 3, 7, 11 */ KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p8 ^ rk00; x1 = p9 ^ rk01; x2 = pA ^ rk02; x3 = pB ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p0 ^ rk10; x1 = p1 ^ rk11; x2 = p2 ^ rk12; x3 = p3 ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15; rk1A ^= rk16; rk1B ^= rk17; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; /* round 4, 8, 12 */ rk00 ^= rk19; x0 = p4 ^ rk00; rk01 ^= rk1A; x1 = p5 ^ rk01; rk02 ^= rk1B; x2 = p6 ^ rk02; rk03 ^= rk1C; x3 = p7 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk04 ^= rk1D; x0 ^= rk04; rk05 ^= rk1E; x1 ^= rk05; rk06 ^= rk1F; x2 ^= rk06; rk07 ^= rk00; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk08 ^= rk01; x0 ^= rk08; rk09 ^= rk02; x1 ^= rk09; rk0A ^= rk03; x2 ^= rk0A; rk0B ^= rk04; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk0C ^= rk05; x0 ^= rk0C; rk0D ^= rk06; x1 ^= rk0D; rk0E ^= rk07; x2 ^= rk0E; rk0F ^= rk08; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p0 ^= x0; p1 ^= x1; p2 ^= x2; p3 ^= x3; rk10 ^= rk09; x0 = pC ^ rk10; rk11 ^= rk0A; x1 = pD ^ rk11; rk12 ^= rk0B; x2 = pE ^ rk12; rk13 ^= rk0C; x3 = pF ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk14 ^= rk0D; x0 ^= rk14; rk15 ^= rk0E; x1 ^= rk15; rk16 ^= rk0F; x2 ^= rk16; rk17 ^= rk10; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk18 ^= rk11; x0 ^= rk18; rk19 ^= rk12; x1 ^= rk19; rk1A ^= rk13; x2 ^= rk1A; rk1B ^= rk14; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); rk1C ^= rk15; x0 ^= rk1C; rk1D ^= rk16; x1 ^= rk1D; rk1E ^= rk17; x2 ^= rk1E; rk1F ^= rk18; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p8 ^= x0; p9 ^= x1; pA ^= x2; pB ^= x3; /* round 13 */ KEY_EXPAND_ELT(sharedMemory, rk00, rk01, rk02, rk03); rk00 ^= rk1C; rk01 ^= rk1D; rk02 ^= rk1E; rk03 ^= rk1F; x0 = p0 ^ rk00; x1 = p1 ^ rk01; x2 = p2 ^ rk02; x3 = p3 ^ rk03; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk04, rk05, rk06, rk07); rk04 ^= rk00; rk05 ^= rk01; rk06 ^= rk02; rk07 ^= rk03; x0 ^= rk04; x1 ^= rk05; x2 ^= rk06; x3 ^= rk07; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk08, rk09, rk0A, rk0B); rk08 ^= rk04; rk09 ^= rk05; rk0A ^= rk06; rk0B ^= rk07; x0 ^= rk08; x1 ^= rk09; x2 ^= rk0A; x3 ^= rk0B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk0C, rk0D, rk0E, rk0F); rk0C ^= rk08; rk0D ^= rk09; rk0E ^= rk0A; rk0F ^= rk0B; x0 ^= rk0C; x1 ^= rk0D; x2 ^= rk0E; x3 ^= rk0F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); pC ^= x0; pD ^= x1; pE ^= x2; pF ^= x3; KEY_EXPAND_ELT(sharedMemory, rk10, rk11, rk12, rk13); rk10 ^= rk0C; rk11 ^= rk0D; rk12 ^= rk0E; rk13 ^= rk0F; x0 = p8 ^ rk10; x1 = p9 ^ rk11; x2 = pA ^ rk12; x3 = pB ^ rk13; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk14, rk15, rk16, rk17); rk14 ^= rk10; rk15 ^= rk11; rk16 ^= rk12; rk17 ^= rk13; x0 ^= rk14; x1 ^= rk15; x2 ^= rk16; x3 ^= rk17; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk18, rk19, rk1A, rk1B); rk18 ^= rk14; rk19 ^= rk15 ^ counter; rk1A ^= rk16; rk1B ^= rk17 ^ 0xFFFFFFFF; x0 ^= rk18; x1 ^= rk19; x2 ^= rk1A; x3 ^= rk1B; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); KEY_EXPAND_ELT(sharedMemory, rk1C, rk1D, rk1E, rk1F); rk1C ^= rk18; rk1D ^= rk19; rk1E ^= rk1A; rk1F ^= rk1B; x0 ^= rk1C; x1 ^= rk1D; x2 ^= rk1E; x3 ^= rk1F; AES_ROUND_NOKEY(sharedMemory, x0, x1, x2, x3); p4 ^= x0; p5 ^= x1; p6 ^= x2; p7 ^= x3; state[0x0] ^= p8; state[0x1] ^= p9; state[0x2] ^= pA; state[0x3] ^= pB; state[0x4] ^= pC; state[0x5] ^= pD; state[0x6] ^= pE; state[0x7] ^= pF; state[0x8] ^= p0; state[0x9] ^= p1; state[0xA] ^= p2; state[0xB] ^= p3; state[0xC] ^= p4; state[0xD] ^= p5; state[0xE] ^= p6; state[0xF] ^= p7; } __device__ __forceinline__ void shavite_gpu_init(uint32_t *sharedMemory) { /* each thread startup will fill a uint32 */ if (threadIdx.x < 128) { sharedMemory[threadIdx.x] = d_AES0[threadIdx.x]; sharedMemory[threadIdx.x + 256] = d_AES1[threadIdx.x]; sharedMemory[threadIdx.x + 512] = d_AES2[threadIdx.x]; sharedMemory[threadIdx.x + 768] = d_AES3[threadIdx.x]; sharedMemory[threadIdx.x + 64 * 2] = d_AES0[threadIdx.x + 64 * 2]; sharedMemory[threadIdx.x + 64 * 2 + 256] = d_AES1[threadIdx.x + 64 * 2]; sharedMemory[threadIdx.x + 64 * 2 + 512] = d_AES2[threadIdx.x + 64 * 2]; sharedMemory[threadIdx.x + 64 * 2 + 768] = d_AES3[threadIdx.x + 64 * 2]; } } // GPU Hash __global__ __launch_bounds__(TPB, 7) /* 64 registers with 128,8 - 72 regs with 128,7 */ void x11_shavite512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { __shared__ uint32_t sharedMemory[1024]; shavite_gpu_init(sharedMemory); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); int hashPosition = nounce - startNounce; uint32_t *Hash = (uint32_t*)&g_hash[hashPosition<<3]; // kopiere init-state uint32_t state[16] = { SPH_C32(0x72FCCDD8), SPH_C32(0x79CA4727), SPH_C32(0x128A077B), SPH_C32(0x40D55AEC), SPH_C32(0xD1901A06), SPH_C32(0x430AE307), SPH_C32(0xB29F5CD1), SPH_C32(0xDF07FBFC), SPH_C32(0x8E45D73D), SPH_C32(0x681AB538), SPH_C32(0xBDE86578), SPH_C32(0xDD577E47), SPH_C32(0xE275EADE), SPH_C32(0x502D9FCD), SPH_C32(0xB9357178), SPH_C32(0x022A4B9A) }; // nachricht laden uint32_t msg[32]; // fülle die Nachricht mit 64-byte (vorheriger Hash) #pragma unroll 16 for(int i=0;i<16;i++) msg[i] = Hash[i]; // Nachrichtenende msg[16] = 0x80; #pragma unroll 10 for(int i=17;i<27;i++) msg[i] = 0; msg[27] = 0x02000000; msg[28] = 0; msg[29] = 0; msg[30] = 0; msg[31] = 0x02000000; c512(sharedMemory, state, msg, 512); #pragma unroll 16 for(int i=0;i<16;i++) Hash[i] = state[i]; } } __global__ __launch_bounds__(TPB, 7) void x11_shavite512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { __shared__ uint32_t sharedMemory[1024]; shavite_gpu_init(sharedMemory); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { const uint32_t nounce = startNounce + thread; // kopiere init-state uint32_t state[16] = { SPH_C32(0x72FCCDD8), SPH_C32(0x79CA4727), SPH_C32(0x128A077B), SPH_C32(0x40D55AEC), SPH_C32(0xD1901A06), SPH_C32(0x430AE307), SPH_C32(0xB29F5CD1), SPH_C32(0xDF07FBFC), SPH_C32(0x8E45D73D), SPH_C32(0x681AB538), SPH_C32(0xBDE86578), SPH_C32(0xDD577E47), SPH_C32(0xE275EADE), SPH_C32(0x502D9FCD), SPH_C32(0xB9357178), SPH_C32(0x022A4B9A) }; uint32_t msg[32]; #pragma unroll 32 for(int i=0;i<32;i++) { msg[i] = c_PaddedMessage80[i]; } msg[19] = cuda_swab32(nounce); msg[20] = 0x80; msg[27] = 0x2800000; msg[31] = 0x2000000; c512(sharedMemory, state, msg, 640); uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; #pragma unroll 16 for(int i=0;i<16;i++) outHash[i] = state[i]; } //thread < threads } __host__ void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); x11_shavite512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); //MyStreamSynchronize(NULL, order, thr_id); } __host__ void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); x11_shavite512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); } __host__ void x11_shavite512_cpu_init(int thr_id, uint32_t threads) { aes_cpu_init(thr_id); } __host__ void x11_shavite512_setBlock_80(void *pdata) { // Message mit Padding bereitstellen // lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen. unsigned char PaddedMessage[128]; memcpy(PaddedMessage, pdata, 80); memset(PaddedMessage+80, 0, 48); cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); }