tribus: handle jh80 midstate (+10%)
This commit is contained in:
parent
807e141a4d
commit
7ce3f2ccce
@ -1,3 +1,8 @@
|
||||
/**
|
||||
* JH512 64 and 80 kernels
|
||||
*
|
||||
* JH80 by tpruvot - 2017 - under GPLv3
|
||||
**/
|
||||
#include <cuda_helper.h>
|
||||
|
||||
// #include <stdio.h> // printf
|
||||
@ -335,7 +340,7 @@ void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce,
|
||||
// Setup function
|
||||
__host__ void quark_jh512_cpu_init(int thr_id, uint32_t threads) {}
|
||||
|
||||
#define WANT_JH80
|
||||
#define WANT_JH80_MIDSTATE
|
||||
#ifdef WANT_JH80
|
||||
|
||||
__constant__
|
||||
@ -417,3 +422,87 @@ void jh512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef WANT_JH80_MIDSTATE
|
||||
|
||||
__constant__ static uint32_t c_JHState[32];
|
||||
__constant__ static uint32_t c_Message[4];
|
||||
|
||||
__global__
|
||||
void jh512_gpu_hash_80(const uint32_t threads, const uint32_t startNounce, uint32_t * g_outhash)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (thread < threads)
|
||||
{
|
||||
// 1 (precomputed state)
|
||||
uint32_t x[8][4];
|
||||
AS_UINT4(&x[0][0]) = AS_UINT4(&c_JHState[ 0]);
|
||||
AS_UINT4(&x[1][0]) = AS_UINT4(&c_JHState[ 4]);
|
||||
AS_UINT4(&x[2][0]) = AS_UINT4(&c_JHState[ 8]);
|
||||
AS_UINT4(&x[3][0]) = AS_UINT4(&c_JHState[12]);
|
||||
|
||||
AS_UINT4(&x[4][0]) = AS_UINT4(&c_JHState[16]);
|
||||
AS_UINT4(&x[5][0]) = AS_UINT4(&c_JHState[20]);
|
||||
AS_UINT4(&x[6][0]) = AS_UINT4(&c_JHState[24]);
|
||||
AS_UINT4(&x[7][0]) = AS_UINT4(&c_JHState[28]);
|
||||
|
||||
// 2 (16 bytes with nonce)
|
||||
uint32_t h[4];
|
||||
AS_UINT2(&h[0]) = AS_UINT2(&c_Message[0]);
|
||||
h[2] = c_Message[2];
|
||||
h[3] = cuda_swab32(startNounce + thread);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 4; i++)
|
||||
x[0][i] ^= h[i];
|
||||
x[1][0] ^= 0x80U;
|
||||
E8(x);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 4; i++)
|
||||
x[4][i] ^= h[i];
|
||||
x[5][0] ^= 0x80U;
|
||||
|
||||
// 3 close
|
||||
x[3][3] ^= 0x80020000U; // 80 bytes = 640bits (0x280)
|
||||
E8(x);
|
||||
x[7][3] ^= 0x80020000U;
|
||||
|
||||
uint32_t *Hash = &g_outhash[(size_t)16 * thread];
|
||||
AS_UINT4(&Hash[ 0]) = AS_UINT4(&x[4][0]);
|
||||
AS_UINT4(&Hash[ 4]) = AS_UINT4(&x[5][0]);
|
||||
AS_UINT4(&Hash[ 8]) = AS_UINT4(&x[6][0]);
|
||||
AS_UINT4(&Hash[12]) = AS_UINT4(&x[7][0]);
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void jh512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash)
|
||||
{
|
||||
const uint32_t threadsperblock = 256;
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
jh512_gpu_hash_80 <<<grid, block>>> (threads, startNounce, d_hash);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#undef SPH_C32
|
||||
#undef SPH_T32
|
||||
#undef SPH_C64
|
||||
#undef SPH_T64
|
||||
#include <sph/sph_jh.h>
|
||||
}
|
||||
|
||||
__host__
|
||||
void jh512_setBlock_80(int thr_id, uint32_t *endiandata)
|
||||
{
|
||||
sph_jh512_context ctx_jh;
|
||||
|
||||
sph_jh512_init(&ctx_jh);
|
||||
sph_jh512(&ctx_jh, endiandata, 64);
|
||||
|
||||
cudaMemcpyToSymbol(c_JHState, ctx_jh.H.narrow, 128, 0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(c_Message, &endiandata[16], sizeof(c_Message), 0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user