|
|
|
@ -93,8 +93,8 @@ typedef struct {
@@ -93,8 +93,8 @@ typedef struct {
|
|
|
|
|
b0 ^= c1; |
|
|
|
|
|
|
|
|
|
/* initial values of chaining variables */ |
|
|
|
|
__device__ __constant__ uint32_t c_IV[40]; |
|
|
|
|
const uint32_t h_IV[40] = { |
|
|
|
|
__device__ static __constant__ uint32_t c_IV[40]; |
|
|
|
|
static const uint32_t h_IV[40] = { |
|
|
|
|
0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465, |
|
|
|
|
0x6e292011,0x90152df4,0xee058139,0xdef610bb, |
|
|
|
|
0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3, |
|
|
|
@ -106,8 +106,8 @@ const uint32_t h_IV[40] = {
@@ -106,8 +106,8 @@ const uint32_t h_IV[40] = {
|
|
|
|
|
0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363, |
|
|
|
|
0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea}; |
|
|
|
|
|
|
|
|
|
__device__ __constant__ uint32_t c_CNS[80]; |
|
|
|
|
const uint32_t h_CNS[80] = { |
|
|
|
|
__device__ static __constant__ uint32_t c_CNS[80]; |
|
|
|
|
static const uint32_t h_CNS[80] = { |
|
|
|
|
0x303994a6,0xe0337818,0xc0e65299,0x441ba90d, |
|
|
|
|
0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f, |
|
|
|
|
0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4, |
|
|
|
@ -132,7 +132,7 @@ const uint32_t h_CNS[80] = {
@@ -132,7 +132,7 @@ const uint32_t h_CNS[80] = {
|
|
|
|
|
|
|
|
|
|
/***************************************************/ |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
void rnd512(hashState *state) |
|
|
|
|
static void rnd512(hashState *state) |
|
|
|
|
{ |
|
|
|
|
int i,j; |
|
|
|
|
uint32_t t[40]; |
|
|
|
@ -279,7 +279,7 @@ void rnd512(hashState *state)
@@ -279,7 +279,7 @@ void rnd512(hashState *state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
void Update512(hashState *state, const BitSequence *data) |
|
|
|
|
static void Update512(hashState *state, const BitSequence *data) |
|
|
|
|
{ |
|
|
|
|
#pragma unroll 8 |
|
|
|
|
for(int i=0;i<8;i++) state->buffer[i] = cuda_swab32(((uint32_t*)data)[i]); |
|
|
|
@ -293,7 +293,7 @@ void Update512(hashState *state, const BitSequence *data)
@@ -293,7 +293,7 @@ void Update512(hashState *state, const BitSequence *data)
|
|
|
|
|
|
|
|
|
|
/***************************************************/ |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
void finalization512(hashState *state, uint32_t *b) |
|
|
|
|
static void finalization512(hashState *state, uint32_t *b) |
|
|
|
|
{ |
|
|
|
|
int i,j; |
|
|
|
|
|
|
|
|
@ -332,7 +332,7 @@ void finalization512(hashState *state, uint32_t *b)
@@ -332,7 +332,7 @@ void finalization512(hashState *state, uint32_t *b)
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
typedef unsigned char BitSequence; |
|
|
|
|
//typedef unsigned char BitSequence; |
|
|
|
|
|
|
|
|
|
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ |
|
|
|
|
#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */ |
|
|
|
@ -480,7 +480,8 @@ static __device__ __forceinline__ void rrounds(uint32_t x[2][2][2][2][2])
@@ -480,7 +480,8 @@ static __device__ __forceinline__ void rrounds(uint32_t x[2][2][2][2][2])
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][2][2][2]) |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
static void block_tox(uint32_t *in, uint32_t x[2][2][2][2][2]) |
|
|
|
|
{ |
|
|
|
|
int k; |
|
|
|
|
int l; |
|
|
|
@ -496,7 +497,8 @@ static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][
@@ -496,7 +497,8 @@ static __device__ __forceinline__ void block_tox(uint32_t *in, uint32_t x[2][2][
|
|
|
|
|
x[0][0][k][l][m] ^= *in++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static __device__ __forceinline__ void hash_fromx(uint32_t *out, uint32_t x[2][2][2][2][2]) |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
static void hash_fromx(uint32_t *out, uint32_t x[2][2][2][2][2]) |
|
|
|
|
{ |
|
|
|
|
int j; |
|
|
|
|
int k; |
|
|
|
@ -556,7 +558,8 @@ void __device__ __forceinline__ Init(uint32_t x[2][2][2][2][2])
@@ -556,7 +558,8 @@ void __device__ __forceinline__ Init(uint32_t x[2][2][2][2][2])
|
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSequence *data) |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
static void Update32(uint32_t x[2][2][2][2][2], const BitSequence *data) |
|
|
|
|
{ |
|
|
|
|
/* "xor the block into the first b bytes of the state" */ |
|
|
|
|
/* "and then transform the state invertibly through r identical rounds" */ |
|
|
|
@ -564,7 +567,8 @@ void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSeq
@@ -564,7 +567,8 @@ void __device__ __forceinline__ Update32(uint32_t x[2][2][2][2][2], const BitSeq
|
|
|
|
|
rrounds(x); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *hashval) |
|
|
|
|
__device__ __forceinline__ |
|
|
|
|
static void Final(uint32_t x[2][2][2][2][2], BitSequence *hashval) |
|
|
|
|
{ |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
@ -581,7 +585,7 @@ void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *ha
@@ -581,7 +585,7 @@ void __device__ __forceinline__ Final(uint32_t x[2][2][2][2][2], BitSequence *ha
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************/ |
|
|
|
|
// Die Hash-Funktion |
|
|
|
|
// Hash Function |
|
|
|
|
__global__ |
|
|
|
|
void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) |
|
|
|
|
{ |
|
|
|
@ -619,25 +623,23 @@ void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_
@@ -619,25 +623,23 @@ void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Setup-Funktionen |
|
|
|
|
__host__ void x11_luffaCubehash512_cpu_init(int thr_id, int threads) |
|
|
|
|
// Setup |
|
|
|
|
__host__ |
|
|
|
|
void x11_luffaCubehash512_cpu_init(int thr_id, int threads) |
|
|
|
|
{ |
|
|
|
|
cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice); |
|
|
|
|
cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__host__ void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
|
|
|
|
__host__ |
|
|
|
|
void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) |
|
|
|
|
{ |
|
|
|
|
const int threadsperblock = 256; |
|
|
|
|
|
|
|
|
|
// berechne wie viele Thread Blocks wir brauchen |
|
|
|
|
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
|
|
|
|
dim3 block(threadsperblock); |
|
|
|
|
|
|
|
|
|
// Größe des dynamischen Shared Memory Bereichs |
|
|
|
|
size_t shared_size = 0; |
|
|
|
|
|
|
|
|
|
x11_luffaCubehash512_gpu_hash_64 << <grid, block, shared_size >> >(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
|
|
|
|
x11_luffaCubehash512_gpu_hash_64 <<< grid, block >>> (threads, startNounce, (uint64_t*)d_hash, d_nonceVector); |
|
|
|
|
MyStreamSynchronize(NULL, order, thr_id); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|