mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-24 05:24:16 +00:00
cubehash: improve a bit and cleanup
This commit is contained in:
parent
08a3f3402c
commit
9c053042c9
@ -1,6 +1,5 @@
|
|||||||
#include "cuda_helper.h"
|
#include <cuda_helper.h>
|
||||||
|
#include <cuda_vectors.h>
|
||||||
typedef unsigned char BitSequence;
|
|
||||||
|
|
||||||
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */
|
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */
|
||||||
#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */
|
#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */
|
||||||
@ -18,17 +17,14 @@ typedef unsigned char BitSequence;
|
|||||||
|
|
||||||
__device__ __constant__
|
__device__ __constant__
|
||||||
static const uint32_t c_IV_512[32] = {
|
static const uint32_t c_IV_512[32] = {
|
||||||
0x2AEA2A61, 0x50F494D4, 0x2D538B8B,
|
0x2AEA2A61, 0x50F494D4, 0x2D538B8B, 0x4167D83E,
|
||||||
0x4167D83E, 0x3FEE2313, 0xC701CF8C,
|
0x3FEE2313, 0xC701CF8C, 0xCC39968E, 0x50AC5695,
|
||||||
0xCC39968E, 0x50AC5695, 0x4D42C787,
|
0x4D42C787, 0xA647A8B3, 0x97CF0BEF, 0x825B4537,
|
||||||
0xA647A8B3, 0x97CF0BEF, 0x825B4537,
|
0xEEF864D2, 0xF22090C4, 0xD0E5CD33, 0xA23911AE,
|
||||||
0xEEF864D2, 0xF22090C4, 0xD0E5CD33,
|
0xFCD398D9, 0x148FE485, 0x1B017BEF, 0xB6444532,
|
||||||
0xA23911AE, 0xFCD398D9, 0x148FE485,
|
0x6A536159, 0x2FF5781C, 0x91FA7934, 0x0DBADEA9,
|
||||||
0x1B017BEF, 0xB6444532, 0x6A536159,
|
0xD65C8A2B, 0xA5A70E75, 0xB1C62456, 0xBC796576,
|
||||||
0x2FF5781C, 0x91FA7934, 0x0DBADEA9,
|
0x1921C8F7, 0xE7989AF1, 0x7795D246, 0xD43E3B44
|
||||||
0xD65C8A2B, 0xA5A70E75, 0xB1C62456,
|
|
||||||
0xBC796576, 0x1921C8F7, 0xE7989AF1,
|
|
||||||
0x7795D246, 0xD43E3B44
|
|
||||||
};
|
};
|
||||||
|
|
||||||
__device__ __forceinline__
|
__device__ __forceinline__
|
||||||
@ -149,107 +145,68 @@ static void rrounds(uint32_t x[2][2][2][2][2])
|
|||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__
|
__device__ __forceinline__
|
||||||
static void block_tox(uint32_t block[16], uint32_t x[2][2][2][2][2])
|
static void block_tox(uint32_t* const block, uint32_t x[2][2][2][2][2])
|
||||||
{
|
{
|
||||||
int k;
|
// read 32 bytes input from global mem with uint2 chunks
|
||||||
int l;
|
AS_UINT2(x[0][0][0][0]) ^= AS_UINT2(&block[0]);
|
||||||
int m;
|
AS_UINT2(x[0][0][0][1]) ^= AS_UINT2(&block[2]);
|
||||||
uint32_t *in = block;
|
AS_UINT2(x[0][0][1][0]) ^= AS_UINT2(&block[4]);
|
||||||
|
AS_UINT2(x[0][0][1][1]) ^= AS_UINT2(&block[6]);
|
||||||
#pragma unroll 2
|
|
||||||
for (k = 0;k < 2;++k)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (l = 0;l < 2;++l)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (m = 0;m < 2;++m)
|
|
||||||
x[0][0][k][l][m] ^= *in++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__
|
__device__ __forceinline__
|
||||||
static void hash_fromx(uint32_t hash[16], uint32_t x[2][2][2][2][2])
|
static void hash_fromx(uint32_t hash[16], uint32_t const x[2][2][2][2][2])
|
||||||
{
|
{
|
||||||
int j;
|
// used to write final hash to global mem
|
||||||
int k;
|
AS_UINT2(&hash[ 0]) = AS_UINT2(x[0][0][0][0]);
|
||||||
int l;
|
AS_UINT2(&hash[ 2]) = AS_UINT2(x[0][0][0][1]);
|
||||||
int m;
|
AS_UINT2(&hash[ 4]) = AS_UINT2(x[0][0][1][0]);
|
||||||
uint32_t *out = hash;
|
AS_UINT2(&hash[ 6]) = AS_UINT2(x[0][0][1][1]);
|
||||||
|
AS_UINT2(&hash[ 8]) = AS_UINT2(x[0][1][0][0]);
|
||||||
#pragma unroll 2
|
AS_UINT2(&hash[10]) = AS_UINT2(x[0][1][0][1]);
|
||||||
for (j = 0;j < 2;++j)
|
AS_UINT2(&hash[12]) = AS_UINT2(x[0][1][1][0]);
|
||||||
#pragma unroll 2
|
AS_UINT2(&hash[14]) = AS_UINT2(x[0][1][1][1]);
|
||||||
for (k = 0;k < 2;++k)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (l = 0;l < 2;++l)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (m = 0;m < 2;++m)
|
|
||||||
*out++ = x[0][j][k][l][m];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
#define Init(x) \
|
||||||
void Init(uint32_t x[2][2][2][2][2])
|
AS_UINT2(x[0][0][0][0]) = AS_UINT2(&c_IV_512[ 0]); \
|
||||||
|
AS_UINT2(x[0][0][0][1]) = AS_UINT2(&c_IV_512[ 2]); \
|
||||||
|
AS_UINT2(x[0][0][1][0]) = AS_UINT2(&c_IV_512[ 4]); \
|
||||||
|
AS_UINT2(x[0][0][1][1]) = AS_UINT2(&c_IV_512[ 6]); \
|
||||||
|
AS_UINT2(x[0][1][0][0]) = AS_UINT2(&c_IV_512[ 8]); \
|
||||||
|
AS_UINT2(x[0][1][0][1]) = AS_UINT2(&c_IV_512[10]); \
|
||||||
|
AS_UINT2(x[0][1][1][0]) = AS_UINT2(&c_IV_512[12]); \
|
||||||
|
AS_UINT2(x[0][1][1][1]) = AS_UINT2(&c_IV_512[14]); \
|
||||||
|
AS_UINT2(x[1][0][0][0]) = AS_UINT2(&c_IV_512[16]); \
|
||||||
|
AS_UINT2(x[1][0][0][1]) = AS_UINT2(&c_IV_512[18]); \
|
||||||
|
AS_UINT2(x[1][0][1][0]) = AS_UINT2(&c_IV_512[20]); \
|
||||||
|
AS_UINT2(x[1][0][1][1]) = AS_UINT2(&c_IV_512[22]); \
|
||||||
|
AS_UINT2(x[1][1][0][0]) = AS_UINT2(&c_IV_512[24]); \
|
||||||
|
AS_UINT2(x[1][1][0][1]) = AS_UINT2(&c_IV_512[26]); \
|
||||||
|
AS_UINT2(x[1][1][1][0]) = AS_UINT2(&c_IV_512[28]); \
|
||||||
|
AS_UINT2(x[1][1][1][1]) = AS_UINT2(&c_IV_512[30]);
|
||||||
|
|
||||||
|
__device__ __forceinline__
|
||||||
|
static void Update32(uint32_t x[2][2][2][2][2], uint32_t* const data)
|
||||||
{
|
{
|
||||||
int i,j,k,l,m;
|
/* "xor the block into the first b bytes of the state" */
|
||||||
#if 0
|
block_tox(data, x);
|
||||||
/* "the first three state words x_00000, x_00001, x_00010" */
|
/* "and then transform the state invertibly through r identical rounds" */
|
||||||
/* "are set to the integers h/8, b, r respectively." */
|
rrounds(x);
|
||||||
/* "the remaining state words are set to 0." */
|
|
||||||
#pragma unroll 2
|
|
||||||
for (i = 0;i < 2;++i)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (j = 0;j < 2;++j)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (k = 0;k < 2;++k)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (l = 0;l < 2;++l)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (m = 0;m < 2;++m)
|
|
||||||
x[i][j][k][l][m] = 0;
|
|
||||||
x[0][0][0][0][0] = 512/8;
|
|
||||||
x[0][0][0][0][1] = CUBEHASH_BLOCKBYTES;
|
|
||||||
x[0][0][0][1][0] = CUBEHASH_ROUNDS;
|
|
||||||
|
|
||||||
/* "the state is then transformed invertibly through 10r identical rounds */
|
|
||||||
for (i = 0;i < 10;++i) rrounds(x);
|
|
||||||
#else
|
|
||||||
const uint32_t *iv = c_IV_512;
|
|
||||||
|
|
||||||
#pragma unroll 2
|
|
||||||
for (i = 0;i < 2;++i)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (j = 0;j < 2;++j)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (k = 0;k < 2;++k)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (l = 0;l < 2;++l)
|
|
||||||
#pragma unroll 2
|
|
||||||
for (m = 0;m < 2;++m)
|
|
||||||
x[i][j][k][l][m] = *iv++;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__
|
__device__ __forceinline__
|
||||||
static void Update32(uint32_t x[2][2][2][2][2], const BitSequence *data)
|
static void Final(uint32_t x[2][2][2][2][2], uint32_t *hashval)
|
||||||
{
|
{
|
||||||
/* "xor the block into the first b bytes of the state" */
|
/* "the integer 1 is xored into the last state word x_11111" */
|
||||||
/* "and then transform the state invertibly through r identical rounds" */
|
x[1][1][1][1][1] ^= 1;
|
||||||
block_tox((uint32_t*)data, x);
|
|
||||||
rrounds(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
__device__ __forceinline__
|
/* "the state is then transformed invertibly through 10r identical rounds" */
|
||||||
static void Final(uint32_t x[2][2][2][2][2], BitSequence *hashval)
|
#pragma unroll 10
|
||||||
{
|
for (int i = 0; i < 10; i++) rrounds(x);
|
||||||
int i;
|
|
||||||
|
|
||||||
/* "the integer 1 is xored into the last state word x_11111" */
|
/* "output the first h/8 bytes of the state" */
|
||||||
x[1][1][1][1][1] ^= 1;
|
hash_fromx(hashval, x);
|
||||||
|
|
||||||
/* "the state is then transformed invertibly through 10r identical rounds" */
|
|
||||||
#pragma unroll 10
|
|
||||||
for (i = 0;i < 10;++i) rrounds(x);
|
|
||||||
|
|
||||||
/* "output the first h/8 bytes of the state" */
|
|
||||||
hash_fromx((uint32_t*)hashval, x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -269,20 +226,17 @@ void x11_cubehash512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_
|
|||||||
uint32_t x[2][2][2][2][2];
|
uint32_t x[2][2][2][2][2];
|
||||||
Init(x);
|
Init(x);
|
||||||
|
|
||||||
// erste Hälfte des Hashes (32 bytes)
|
Update32(x, &Hash[0]);
|
||||||
Update32(x, (const BitSequence*)Hash);
|
Update32(x, &Hash[8]);
|
||||||
|
|
||||||
// zweite Hälfte des Hashes (32 bytes)
|
|
||||||
Update32(x, (const BitSequence*)(Hash+8));
|
|
||||||
|
|
||||||
// Padding Block
|
// Padding Block
|
||||||
uint32_t last[8];
|
uint32_t last[8];
|
||||||
last[0] = 0x80;
|
last[0] = 0x80;
|
||||||
#pragma unroll 7
|
#pragma unroll 7
|
||||||
for (int i=1; i < 8; i++) last[i] = 0;
|
for (int i=1; i < 8; i++) last[i] = 0;
|
||||||
Update32(x, (const BitSequence*)last);
|
Update32(x, last);
|
||||||
|
|
||||||
Final(x, (BitSequence*)Hash);
|
Final(x, Hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -332,12 +286,12 @@ void cubehash512_gpu_hash_80(const uint32_t threads, const uint32_t startNounce,
|
|||||||
// first 32 bytes
|
// first 32 bytes
|
||||||
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[0]);
|
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[0]);
|
||||||
AS_UINT4(&message[4]) = AS_UINT4(&c_PaddedMessage80[4]);
|
AS_UINT4(&message[4]) = AS_UINT4(&c_PaddedMessage80[4]);
|
||||||
Update32(x, (const BitSequence*)message);
|
Update32(x, message);
|
||||||
|
|
||||||
// second 32 bytes
|
// second 32 bytes
|
||||||
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[8]);
|
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[8]);
|
||||||
AS_UINT4(&message[4]) = AS_UINT4(&c_PaddedMessage80[12]);
|
AS_UINT4(&message[4]) = AS_UINT4(&c_PaddedMessage80[12]);
|
||||||
Update32(x, (const BitSequence*)message);
|
Update32(x, message);
|
||||||
|
|
||||||
// last 16 bytes + Padding
|
// last 16 bytes + Padding
|
||||||
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[16]);
|
AS_UINT4(&message[0]) = AS_UINT4(&c_PaddedMessage80[16]);
|
||||||
@ -346,9 +300,9 @@ void cubehash512_gpu_hash_80(const uint32_t threads, const uint32_t startNounce,
|
|||||||
message[5] = 0;
|
message[5] = 0;
|
||||||
message[6] = 0;
|
message[6] = 0;
|
||||||
message[7] = 0;
|
message[7] = 0;
|
||||||
Update32(x, (const BitSequence*)message);
|
Update32(x, message);
|
||||||
|
|
||||||
BitSequence* output = (BitSequence*) (&g_outhash[(size_t)8 * thread]);
|
uint32_t* output = (uint32_t*) (&g_outhash[(size_t)8 * thread]);
|
||||||
Final(x, output);
|
Final(x, output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user