mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-10 14:57:53 +00:00
faster x15, +23KH or 4ms on whirpool (30ms vs 34ms)
tpruvot: i didnt pick the asm replace_hiword, slower on linux
This commit is contained in:
parent
73f22b237a
commit
26b9fe3586
@ -61,7 +61,7 @@ __device__ __forceinline__ uint64_t MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
|
||||
|
||||
// das Hi Word in einem 64 Bit Typen ersetzen
|
||||
__device__ __forceinline__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) {
|
||||
return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32ULL);
|
||||
return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32U);
|
||||
}
|
||||
|
||||
// das Lo Word in einem 64 Bit Typen ersetzen
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
|
||||
#define threadsperblock 512
|
||||
#define threadsperblock 256
|
||||
|
||||
#define USE_SHARED 1
|
||||
|
||||
@ -37,7 +37,7 @@ __constant__ static uint64_t mixTob7Tox[256];
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2014 djm34 & tpruvot
|
||||
* Copyright (c) 2014 djm34 & tpruvot & SP
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
@ -61,6 +61,7 @@ __constant__ static uint64_t mixTob7Tox[256];
|
||||
* ===========================(LICENSE END)=============================
|
||||
* @author djm34
|
||||
* @author tpruvot
|
||||
* @author SP
|
||||
*/
|
||||
|
||||
static const uint64_t old1_T0[256] = {
|
||||
@ -2193,7 +2194,6 @@ static const uint64_t plain_RC[10] = {
|
||||
|
||||
/* ====================================================================== */
|
||||
|
||||
#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF)
|
||||
|
||||
#define TRANSFER(dst, src) { \
|
||||
dst[0] = src ## 0; \
|
||||
@ -2207,6 +2207,8 @@ static const uint64_t plain_RC[10] = {
|
||||
}
|
||||
|
||||
#if !USE_ALL_TABLES
|
||||
#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF)
|
||||
|
||||
/* method disabled to reduce code size */
|
||||
__device__ __forceinline__
|
||||
static uint64_t table_skew(uint64_t val, int num) {
|
||||
@ -2243,22 +2245,14 @@ static uint64_t ROUND_ELT(const uint64_t* sharedMemory, uint64_t* __restrict__ i
|
||||
|
||||
__device__ __forceinline__
|
||||
static uint64_t ROUND_ELT(const uint64_t* sharedMemory, uint64_t* __restrict__ in,
|
||||
int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7)
|
||||
const int i0, const int i1, const int i2, const int i3, const int i4, const int i5, const int i6, const int i7)
|
||||
{
|
||||
uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7;
|
||||
idx0 = BYTE(in[i0], 0);
|
||||
idx1 = BYTE(in[i1], 1) + 256;
|
||||
idx2 = BYTE(in[i2], 2) + 512;
|
||||
idx3 = BYTE(in[i3], 3) + 768;
|
||||
idx4 = BYTE(in[i4], 4) + 1024;
|
||||
idx5 = BYTE(in[i5], 5) + 1280;
|
||||
idx6 = BYTE(in[i6], 6) + 1536;
|
||||
idx7 = BYTE(in[i7], 7) + 1792;
|
||||
|
||||
return xor8(sharedMemory[idx0],sharedMemory[idx1],sharedMemory[idx2],sharedMemory[idx3],
|
||||
sharedMemory[idx4],sharedMemory[idx5],sharedMemory[idx6],sharedMemory[idx7]);
|
||||
uint32_t* in32 = (uint32_t*)in;
|
||||
return (sharedMemory[__byte_perm(in32[(i0 << 1)], 0, 0x4440)] ^ sharedMemory[__byte_perm(in32[(i1 << 1)], 0, 0x4441) + 256] ^
|
||||
sharedMemory[__byte_perm(in32[(i2 << 1)], 0, 0x4442) + 512] ^ sharedMemory[__byte_perm(in32[(i3 << 1)], 0, 0x4443) + 768] ^
|
||||
sharedMemory[__byte_perm(in32[(i4 << 1) + 1], 0, 0x4440) + 1024] ^ sharedMemory[__byte_perm(in32[(i5 << 1) + 1], 0, 0x4441) + 1280] ^
|
||||
sharedMemory[__byte_perm(in32[(i6 << 1) + 1], 0, 0x4442) + 1536] ^ sharedMemory[__byte_perm(in32[(i7 << 1) + 1], 0, 0x4443) + 1792]);
|
||||
}
|
||||
|
||||
#endif /* USE_ALL_TABLES */
|
||||
|
||||
#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) { \
|
||||
@ -2270,10 +2264,21 @@ static uint64_t ROUND_ELT(const uint64_t* sharedMemory, uint64_t* __restrict__ i
|
||||
out ## 5 = xor1(ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6), c5); \
|
||||
out ## 6 = xor1(ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7), c6); \
|
||||
out ## 7 = xor1(ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0), c7); \
|
||||
}
|
||||
}
|
||||
|
||||
#define ROUND1(table, in, out, c) { \
|
||||
out ## 0 = xor1(ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1), c); \
|
||||
out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2); \
|
||||
out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3); \
|
||||
out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4); \
|
||||
out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5); \
|
||||
out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6); \
|
||||
out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7); \
|
||||
out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0); \
|
||||
}
|
||||
|
||||
#define ROUND_KSCHED(table, in, out, c) \
|
||||
ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) \
|
||||
ROUND1(table, in, out, c) \
|
||||
TRANSFER(in, out)
|
||||
|
||||
#define ROUND_WENC(table, in, key, out) \
|
||||
|
Loading…
Reference in New Issue
Block a user