mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-09 14:28:12 +00:00
new vertcoin algorithm: Lyra2REv2
kept old lyra2RE for coins still using it (careful the original lyra2 had a bug, this one is corrected only in the new version) implemented next_diff trick for pools
This commit is contained in:
parent
101b10c5fa
commit
3e612cf335
@ -70,6 +70,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
|
|||||||
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
|
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
|
||||||
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
|
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
|
||||||
sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
|
sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
|
||||||
|
sgminer_SOURCES += algorithm/Lyra2RE_old.c algorithm/Lyra2RE_old.h algorithm/Lyra2_old.c algorithm/Lyra2_old.h algorithm/Sponge_old.c algorithm/Sponge_old.h
|
||||||
sgminer_SOURCES += algorithm/Lyra2RE.c algorithm/Lyra2RE.h algorithm/Lyra2.c algorithm/Lyra2.h algorithm/Sponge.c algorithm/Sponge.h
|
sgminer_SOURCES += algorithm/Lyra2RE.c algorithm/Lyra2RE.h algorithm/Lyra2.c algorithm/Lyra2.h algorithm/Sponge.c algorithm/Sponge.h
|
||||||
sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h
|
sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h
|
||||||
|
|
||||||
|
67
algorithm.c
67
algorithm.c
@ -31,7 +31,8 @@
|
|||||||
#include "algorithm/fresh.h"
|
#include "algorithm/fresh.h"
|
||||||
#include "algorithm/whirlcoin.h"
|
#include "algorithm/whirlcoin.h"
|
||||||
#include "algorithm/neoscrypt.h"
|
#include "algorithm/neoscrypt.h"
|
||||||
#include "algorithm/Lyra2RE.h"
|
#include "algorithm/Lyra2RE.h" //lyra new version
|
||||||
|
#include "algorithm/Lyra2RE_old.h" //lyra old version
|
||||||
#include "algorithm/pluck.h"
|
#include "algorithm/pluck.h"
|
||||||
#include "algorithm/yescrypt.h"
|
#include "algorithm/yescrypt.h"
|
||||||
#include "algorithm/credits.h"
|
#include "algorithm/credits.h"
|
||||||
@ -59,6 +60,7 @@ const char *algorithm_type_str[] = {
|
|||||||
"Whirlcoin",
|
"Whirlcoin",
|
||||||
"Neoscrypt",
|
"Neoscrypt",
|
||||||
"Lyra2RE",
|
"Lyra2RE",
|
||||||
|
"Lyta2REv2"
|
||||||
"pluck",
|
"pluck",
|
||||||
"yescrypt",
|
"yescrypt",
|
||||||
"yescrypt-multi"
|
"yescrypt-multi"
|
||||||
@ -409,6 +411,62 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
|
{
|
||||||
|
cl_kernel *kernel;
|
||||||
|
unsigned int num;
|
||||||
|
cl_int status = 0;
|
||||||
|
cl_ulong le_target;
|
||||||
|
|
||||||
|
// le_target = *(cl_uint *)(blk->work->device_target + 28);
|
||||||
|
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||||
|
flip80(clState->cldata, blk->work->data);
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
|
||||||
|
|
||||||
|
// blake - search
|
||||||
|
kernel = &clState->kernel;
|
||||||
|
num = 0;
|
||||||
|
// CL_SET_ARG(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->buffer1);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_a);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_b);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_c);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_d);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_e);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_f);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_g);
|
||||||
|
CL_SET_ARG(blk->work->blk.ctx_h);
|
||||||
|
CL_SET_ARG(blk->work->blk.cty_a);
|
||||||
|
CL_SET_ARG(blk->work->blk.cty_b);
|
||||||
|
CL_SET_ARG(blk->work->blk.cty_c);
|
||||||
|
|
||||||
|
// keccak - search1
|
||||||
|
kernel = clState->extra_kernels;
|
||||||
|
CL_SET_ARG_0(clState->buffer1);
|
||||||
|
// cubehash - search2
|
||||||
|
num = 0;
|
||||||
|
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
|
||||||
|
// lyra - search3
|
||||||
|
num = 0;
|
||||||
|
CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
|
||||||
|
CL_SET_ARG_N(1, clState->padbuffer8);
|
||||||
|
// skein -search4
|
||||||
|
num = 0;
|
||||||
|
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
|
||||||
|
// cubehash - search5
|
||||||
|
num = 0;
|
||||||
|
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
|
||||||
|
// bmw - search6
|
||||||
|
num = 0;
|
||||||
|
CL_NEXTKERNEL_SET_ARG(clState->buffer1);
|
||||||
|
CL_SET_ARG(clState->outputBuffer);
|
||||||
|
CL_SET_ARG(le_target);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
{
|
{
|
||||||
@ -927,7 +985,10 @@ static algorithm_settings_t algos[] = {
|
|||||||
|
|
||||||
{ "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL},
|
{ "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL},
|
||||||
|
|
||||||
{ "Lyra2RE", ALGO_LYRA2RE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4,2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
|
{ "Lyra2RE", ALGO_LYRA2RE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4,2 * 8 * 4194304 , 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
|
||||||
|
|
||||||
|
{ "Lyra2REv2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options },
|
||||||
|
|
||||||
|
|
||||||
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
|
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
|
||||||
#define A_FUGUE(a, b, c) \
|
#define A_FUGUE(a, b, c) \
|
||||||
@ -1035,7 +1096,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias)
|
|||||||
// use old nfactor if it was previously set and is different than the one set by alias
|
// use old nfactor if it was previously set and is different than the one set by alias
|
||||||
if ((old_nfactor > 0) && (old_nfactor != nfactor))
|
if ((old_nfactor > 0) && (old_nfactor != nfactor))
|
||||||
nfactor = old_nfactor;
|
nfactor = old_nfactor;
|
||||||
if (algo->type == ALGO_LYRA2RE) {opt_lyra = true;}
|
if (algo->type == ALGO_LYRA2RE || algo->type == ALGO_LYRA2REv2 ) { opt_lyra = true; }
|
||||||
set_algorithm_nfactor(algo, nfactor);
|
set_algorithm_nfactor(algo, nfactor);
|
||||||
|
|
||||||
//reapply kernelfile if was set
|
//reapply kernelfile if was set
|
||||||
|
@ -28,6 +28,7 @@ typedef enum {
|
|||||||
ALGO_WHIRL,
|
ALGO_WHIRL,
|
||||||
ALGO_NEOSCRYPT,
|
ALGO_NEOSCRYPT,
|
||||||
ALGO_LYRA2RE,
|
ALGO_LYRA2RE,
|
||||||
|
ALGO_LYRA2REv2,
|
||||||
ALGO_PLUCK,
|
ALGO_PLUCK,
|
||||||
ALGO_YESCRYPT,
|
ALGO_YESCRYPT,
|
||||||
ALGO_YESCRYPT_MULTI,
|
ALGO_YESCRYPT_MULTI,
|
||||||
|
@ -58,6 +58,11 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
|
|||||||
|
|
||||||
//========== Initializing the Memory Matrix and pointers to it =============//
|
//========== Initializing the Memory Matrix and pointers to it =============//
|
||||||
//Tries to allocate enough space for the whole memory matrix
|
//Tries to allocate enough space for the whole memory matrix
|
||||||
|
|
||||||
|
|
||||||
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||||
|
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||||
|
|
||||||
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
|
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
|
||||||
uint64_t *wholeMatrix = malloc(i);
|
uint64_t *wholeMatrix = malloc(i);
|
||||||
if (wholeMatrix == NULL) {
|
if (wholeMatrix == NULL) {
|
||||||
@ -130,16 +135,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
|
|||||||
ptrWord = wholeMatrix;
|
ptrWord = wholeMatrix;
|
||||||
for (i = 0; i < nBlocksInput; i++) {
|
for (i = 0; i < nBlocksInput; i++) {
|
||||||
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
|
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
|
||||||
ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
|
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
|
||||||
}
|
}
|
||||||
|
|
||||||
//Initializes M[0] and M[1]
|
//Initializes M[0] and M[1]
|
||||||
reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
|
reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
|
||||||
reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);
|
reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||||
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
|
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
|
||||||
|
|
||||||
|
|
||||||
//updates the value of row* (deterministically picked during Setup))
|
//updates the value of row* (deterministically picked during Setup))
|
||||||
@ -172,7 +177,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
|
|||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||||
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
|
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
|
||||||
|
|
||||||
//update prev: it now points to the last row ever computed
|
//update prev: it now points to the last row ever computed
|
||||||
prev = row;
|
prev = row;
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/**
|
/**
|
||||||
* Header file for the Lyra2 Password Hashing Scheme (PHS).
|
* Header file for the Lyra2 Password Hashing Scheme (PHS).
|
||||||
*
|
*
|
||||||
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
*
|
*
|
||||||
* This software is hereby placed in the public domain.
|
* This software is hereby placed in the public domain.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
@ -37,14 +37,6 @@ typedef unsigned char byte;
|
|||||||
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
|
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef N_COLS
|
|
||||||
#define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
|
|
||||||
#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row
|
|
||||||
|
|
||||||
|
|
||||||
int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
|
int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
|
||||||
|
|
||||||
#endif /* LYRA2_H_ */
|
#endif /* LYRA2_H_ */
|
||||||
|
@ -36,6 +36,8 @@
|
|||||||
#include "sph/sph_groestl.h"
|
#include "sph/sph_groestl.h"
|
||||||
#include "sph/sph_skein.h"
|
#include "sph/sph_skein.h"
|
||||||
#include "sph/sph_keccak.h"
|
#include "sph/sph_keccak.h"
|
||||||
|
#include "sph/sph_bmw.h"
|
||||||
|
#include "sph/sph_cubehash.h"
|
||||||
#include "Lyra2.h"
|
#include "Lyra2.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -55,34 +57,37 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
|
|||||||
inline void lyra2rehash(void *state, const void *input)
|
inline void lyra2rehash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
sph_blake256_context ctx_blake;
|
sph_blake256_context ctx_blake;
|
||||||
sph_groestl256_context ctx_groestl;
|
sph_bmw256_context ctx_bmw;
|
||||||
sph_keccak256_context ctx_keccak;
|
sph_keccak256_context ctx_keccak;
|
||||||
sph_skein256_context ctx_skein;
|
sph_skein256_context ctx_skein;
|
||||||
|
sph_cubehash256_context ctx_cube;
|
||||||
uint32_t hashA[8], hashB[8];
|
uint32_t hashA[8], hashB[8];
|
||||||
|
|
||||||
sph_blake256_init(&ctx_blake);
|
sph_blake256_init(&ctx_blake);
|
||||||
sph_blake256 (&ctx_blake, input, 80);
|
sph_blake256 (&ctx_blake, input, 80);
|
||||||
sph_blake256_close (&ctx_blake, hashA);
|
sph_blake256_close (&ctx_blake, hashA);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sph_keccak256_init(&ctx_keccak);
|
sph_keccak256_init(&ctx_keccak);
|
||||||
sph_keccak256 (&ctx_keccak,hashA, 32);
|
sph_keccak256 (&ctx_keccak,hashA, 32);
|
||||||
sph_keccak256_close(&ctx_keccak, hashB);
|
sph_keccak256_close(&ctx_keccak, hashB);
|
||||||
|
|
||||||
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
|
sph_cubehash256_init(&ctx_cube);
|
||||||
|
sph_cubehash256(&ctx_cube, hashB, 32);
|
||||||
|
sph_cubehash256_close(&ctx_cube, hashA);
|
||||||
|
|
||||||
|
LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);
|
||||||
|
|
||||||
sph_skein256_init(&ctx_skein);
|
sph_skein256_init(&ctx_skein);
|
||||||
sph_skein256 (&ctx_skein, hashA, 32);
|
sph_skein256 (&ctx_skein, hashB, 32);
|
||||||
sph_skein256_close(&ctx_skein, hashB);
|
sph_skein256_close(&ctx_skein, hashA);
|
||||||
|
|
||||||
|
sph_cubehash256_init(&ctx_cube);
|
||||||
|
sph_cubehash256(&ctx_cube, hashA, 32);
|
||||||
|
sph_cubehash256_close(&ctx_cube, hashB);
|
||||||
|
|
||||||
sph_groestl256_init(&ctx_groestl);
|
sph_bmw256_init(&ctx_bmw);
|
||||||
sph_groestl256 (&ctx_groestl, hashB, 32);
|
sph_bmw256 (&ctx_bmw, hashB, 32);
|
||||||
sph_groestl256_close(&ctx_groestl, hashA);
|
sph_bmw256_close(&ctx_bmw, hashA);
|
||||||
|
|
||||||
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
|
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
|
||||||
|
|
||||||
|
@ -2,7 +2,8 @@
|
|||||||
#define LYRA2RE_H
|
#define LYRA2RE_H
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
#define LYRA_SCRATCHBUF_SIZE (4 ) // matrix extended to 16 matrix[16][8][8] uint64_t or equivalent
|
||||||
|
#define LYRA_SECBUF_SIZE (4) //8 uint64
|
||||||
extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
|
extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
|
||||||
uint32_t nonce);
|
uint32_t nonce);
|
||||||
extern void lyra2re_regenhash(struct work *work);
|
extern void lyra2re_regenhash(struct work *work);
|
||||||
|
169
algorithm/Lyra2RE_old.c
Normal file
169
algorithm/Lyra2RE_old.c
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2014 James Lovejoy
|
||||||
|
* Copyright 2014 phm
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sph/sph_blake.h"
|
||||||
|
#include "sph/sph_groestl.h"
|
||||||
|
#include "sph/sph_skein.h"
|
||||||
|
#include "sph/sph_keccak.h"
|
||||||
|
#include "Lyra2_old.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||||
|
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
dst[i] = htobe32(src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void lyra2rehash_old(void *state, const void *input)
|
||||||
|
{
|
||||||
|
sph_blake256_context ctx_blake;
|
||||||
|
sph_groestl256_context ctx_groestl;
|
||||||
|
sph_keccak256_context ctx_keccak;
|
||||||
|
sph_skein256_context ctx_skein;
|
||||||
|
|
||||||
|
uint32_t hashA[8], hashB[8];
|
||||||
|
|
||||||
|
sph_blake256_init(&ctx_blake);
|
||||||
|
sph_blake256 (&ctx_blake, input, 80);
|
||||||
|
sph_blake256_close (&ctx_blake, hashA);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sph_keccak256_init(&ctx_keccak);
|
||||||
|
sph_keccak256 (&ctx_keccak,hashA, 32);
|
||||||
|
sph_keccak256_close(&ctx_keccak, hashB);
|
||||||
|
|
||||||
|
LYRA2O(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
|
||||||
|
|
||||||
|
|
||||||
|
sph_skein256_init(&ctx_skein);
|
||||||
|
sph_skein256 (&ctx_skein, hashA, 32);
|
||||||
|
sph_skein256_close(&ctx_skein, hashB);
|
||||||
|
|
||||||
|
|
||||||
|
sph_groestl256_init(&ctx_groestl);
|
||||||
|
sph_groestl256 (&ctx_groestl, hashB, 32);
|
||||||
|
sph_groestl256_close(&ctx_groestl, hashA);
|
||||||
|
|
||||||
|
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
|
||||||
|
|
||||||
|
memcpy(state, hashA, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const uint32_t diff1targ = 0x0000ffff;
|
||||||
|
|
||||||
|
|
||||||
|
/* Used externally as confirmation of correct OCL code */
|
||||||
|
int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||||
|
{
|
||||||
|
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||||
|
uint32_t data[20], ohash[8];
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||||
|
data[19] = htobe32(nonce);
|
||||||
|
lyra2rehash(ohash, data);
|
||||||
|
tmp_hash7 = be32toh(ohash[7]);
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
|
||||||
|
(long unsigned int)Htarg,
|
||||||
|
(long unsigned int)diff1targ,
|
||||||
|
(long unsigned int)tmp_hash7);
|
||||||
|
if (tmp_hash7 > diff1targ)
|
||||||
|
return -1;
|
||||||
|
if (tmp_hash7 > Htarg)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lyra2reold_regenhash(struct work *work)
|
||||||
|
{
|
||||||
|
uint32_t data[20];
|
||||||
|
uint32_t *nonce = (uint32_t *)(work->data + 76);
|
||||||
|
uint32_t *ohash = (uint32_t *)(work->hash);
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)work->data, 19);
|
||||||
|
data[19] = htobe32(*nonce);
|
||||||
|
lyra2rehash(ohash, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||||
|
unsigned char *pdata, unsigned char __maybe_unused *phash1,
|
||||||
|
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
|
||||||
|
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
|
||||||
|
{
|
||||||
|
uint32_t *nonce = (uint32_t *)(pdata + 76);
|
||||||
|
uint32_t data[20];
|
||||||
|
uint32_t tmp_hash7;
|
||||||
|
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
uint32_t ostate[8];
|
||||||
|
|
||||||
|
*nonce = ++n;
|
||||||
|
data[19] = (n);
|
||||||
|
lyra2rehash(ostate, data);
|
||||||
|
tmp_hash7 = (ostate[7]);
|
||||||
|
|
||||||
|
applog(LOG_INFO, "data7 %08lx",
|
||||||
|
(long unsigned int)data[7]);
|
||||||
|
|
||||||
|
if (unlikely(tmp_hash7 <= Htarg)) {
|
||||||
|
((uint32_t *)pdata)[19] = htobe32(n);
|
||||||
|
*last_nonce = n;
|
||||||
|
ret = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely((n >= max_nonce) || thr->work_restart)) {
|
||||||
|
*last_nonce = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
10
algorithm/Lyra2RE_old.h
Normal file
10
algorithm/Lyra2RE_old.h
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#ifndef LYRA2REOLD_H
|
||||||
|
#define LYRA2REOLD_H
|
||||||
|
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget,
|
||||||
|
uint32_t nonce);
|
||||||
|
extern void lyra2reold_regenhash(struct work *work);
|
||||||
|
|
||||||
|
#endif /* LYRA2RE_H */
|
208
algorithm/Lyra2_old.c
Normal file
208
algorithm/Lyra2_old.c
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
/**
|
||||||
|
* Implementation of the Lyra2 Password Hashing Scheme (PHS).
|
||||||
|
*
|
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
*
|
||||||
|
* This software is hereby placed in the public domain.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "Lyra2_old.h"
|
||||||
|
#include "Sponge_old.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
|
||||||
|
* whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
|
||||||
|
* where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
|
||||||
|
* integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
|
||||||
|
* of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
|
||||||
|
*
|
||||||
|
* @param K The derived key to be output by the algorithm
|
||||||
|
* @param kLen Desired key length
|
||||||
|
* @param pwd User password
|
||||||
|
* @param pwdlen Password length
|
||||||
|
* @param salt Salt
|
||||||
|
* @param saltlen Salt length
|
||||||
|
* @param timeCost Parameter to determine the processing time (T)
|
||||||
|
* @param nRows Number or rows of the memory matrix (R)
|
||||||
|
* @param nCols Number of columns of the memory matrix (C)
|
||||||
|
*
|
||||||
|
* @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
|
||||||
|
*/
|
||||||
|
int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
|
||||||
|
|
||||||
|
//============================= Basic variables ============================//
|
||||||
|
int64_t row = 2; //index of row to be processed
|
||||||
|
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||||
|
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
|
||||||
|
int64_t tau; //Time Loop iterator
|
||||||
|
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||||
|
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||||
|
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||||
|
int64_t i; //auxiliary iteration counter
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//========== Initializing the Memory Matrix and pointers to it =============//
|
||||||
|
//Tries to allocate enough space for the whole memory matrix
|
||||||
|
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
|
||||||
|
uint64_t *wholeMatrix = malloc(i);
|
||||||
|
if (wholeMatrix == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
memset(wholeMatrix, 0, i);
|
||||||
|
|
||||||
|
//Allocates pointers to each row of the matrix
|
||||||
|
uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
|
||||||
|
if (memMatrix == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
//Places the pointers in the correct positions
|
||||||
|
uint64_t *ptrWord = wholeMatrix;
|
||||||
|
for (i = 0; i < nRows; i++) {
|
||||||
|
memMatrix[i] = ptrWord;
|
||||||
|
ptrWord += ROW_LEN_INT64;
|
||||||
|
}
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============= Getting the password + salt + basil padded with 10*1 ===============//
|
||||||
|
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||||
|
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||||
|
|
||||||
|
//First, we clean enough blocks for the password, salt, basil and padding
|
||||||
|
uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
|
||||||
|
byte *ptrByte = (byte*) wholeMatrix;
|
||||||
|
memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
|
||||||
|
|
||||||
|
//Prepends the password
|
||||||
|
memcpy(ptrByte, pwd, pwdlen);
|
||||||
|
ptrByte += pwdlen;
|
||||||
|
|
||||||
|
//Concatenates the salt
|
||||||
|
memcpy(ptrByte, salt, saltlen);
|
||||||
|
ptrByte += saltlen;
|
||||||
|
|
||||||
|
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||||
|
memcpy(ptrByte, &kLen, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
memcpy(ptrByte, &nRows, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
memcpy(ptrByte, &nCols, sizeof (uint64_t));
|
||||||
|
ptrByte += sizeof (uint64_t);
|
||||||
|
|
||||||
|
//Now comes the padding
|
||||||
|
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||||
|
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||||
|
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||||
|
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//======================= Initializing the Sponge State ====================//
|
||||||
|
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||||
|
uint64_t *state = malloc(16 * sizeof (uint64_t));
|
||||||
|
if (state == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
initState(state);
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//================================ Setup Phase =============================//
|
||||||
|
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||||
|
ptrWord = wholeMatrix;
|
||||||
|
for (i = 0; i < nBlocksInput; i++) {
|
||||||
|
absorbBlockBlake2SafeO(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
|
||||||
|
ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
|
||||||
|
}
|
||||||
|
|
||||||
|
//Initializes M[0] and M[1]
|
||||||
|
reducedSqueezeRow0O(state, memMatrix[0]); //The locally copied password is most likely overwritten here
|
||||||
|
reducedDuplexRow1O(state, memMatrix[0], memMatrix[1]);
|
||||||
|
|
||||||
|
do {
|
||||||
|
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||||
|
reducedDuplexRowSetupO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
|
||||||
|
|
||||||
|
|
||||||
|
//updates the value of row* (deterministically picked during Setup))
|
||||||
|
rowa = (rowa + step) & (window - 1);
|
||||||
|
//update prev: it now points to the last row ever computed
|
||||||
|
prev = row;
|
||||||
|
//updates row: goes to the next row to be computed
|
||||||
|
row++;
|
||||||
|
|
||||||
|
//Checks if all rows in the window where visited.
|
||||||
|
if (rowa == 0) {
|
||||||
|
step = window + gap; //changes the step: approximately doubles its value
|
||||||
|
window *= 2; //doubles the size of the re-visitation window
|
||||||
|
gap = -gap; //inverts the modifier to the step
|
||||||
|
}
|
||||||
|
|
||||||
|
} while (row < nRows);
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============================ Wandering Phase =============================//
|
||||||
|
row = 0; //Resets the visitation to the first row of the memory matrix
|
||||||
|
for (tau = 1; tau <= timeCost; tau++) {
|
||||||
|
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
|
||||||
|
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
|
||||||
|
do {
|
||||||
|
//Selects a pseudorandom index row*
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||||
|
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||||
|
reducedDuplexRowO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
|
||||||
|
|
||||||
|
//update prev: it now points to the last row ever computed
|
||||||
|
prev = row;
|
||||||
|
|
||||||
|
//updates row: goes to the next row to be computed
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||||
|
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
} while (row != 0);
|
||||||
|
}
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============================ Wrap-up Phase ===============================//
|
||||||
|
//Absorbs the last block of the memory matrix
|
||||||
|
absorbBlockO(state, memMatrix[rowa]);
|
||||||
|
|
||||||
|
//Squeezes the key
|
||||||
|
squeezeO(state, K, kLen);
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//========================= Freeing the memory =============================//
|
||||||
|
free(memMatrix);
|
||||||
|
free(wholeMatrix);
|
||||||
|
|
||||||
|
//Wiping out the sponge's internal state before freeing it
|
||||||
|
memset(state, 0, 16 * sizeof (uint64_t));
|
||||||
|
free(state);
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
50
algorithm/Lyra2_old.h
Normal file
50
algorithm/Lyra2_old.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Header file for the Lyra2 Password Hashing Scheme (PHS).
|
||||||
|
*
|
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
*
|
||||||
|
* This software is hereby placed in the public domain.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#ifndef LYRA2OLD_H_
|
||||||
|
#define LYRA2OLD_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef unsigned char byte;
|
||||||
|
|
||||||
|
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
|
||||||
|
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)
|
||||||
|
#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef BLOCK_LEN_BITS
|
||||||
|
#define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t)
|
||||||
|
#define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes
|
||||||
|
#else //default block lenght: 768 bits
|
||||||
|
#define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t)
|
||||||
|
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef N_COLS
|
||||||
|
#define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
|
||||||
|
#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row
|
||||||
|
|
||||||
|
|
||||||
|
int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
|
||||||
|
|
||||||
|
#endif /* LYRA2_H_ */
|
@ -1,9 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
* A simple implementation of Blake2b's internal permutation
|
* A simple implementation of Blake2b's internal permutation
|
||||||
* in the form of a sponge.
|
* in the form of a sponge.
|
||||||
*
|
*
|
||||||
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
*
|
*
|
||||||
* This software is hereby placed in the public domain.
|
* This software is hereby placed in the public domain.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
@ -27,19 +27,19 @@
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
|
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
|
||||||
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
|
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
|
||||||
* typically have their internal state initialized with zeros, Blake2b's G function
|
* typically have their internal state initialized with zeros, Blake2b's G function
|
||||||
* has a fixed point: if the internal state and message are both filled with zeros. the
|
* has a fixed point: if the internal state and message are both filled with zeros. the
|
||||||
* resulting permutation will always be a block filled with zeros; this happens because
|
* resulting permutation will always be a block filled with zeros; this happens because
|
||||||
* Blake2b does not use the constants originally employed in Blake2 inside its G function,
|
* Blake2b does not use the constants originally employed in Blake2 inside its G function,
|
||||||
* relying on the IV for avoiding possible fixed points.
|
* relying on the IV for avoiding possible fixed points.
|
||||||
*
|
*
|
||||||
* @param state The 1024-bit array to be initialized
|
* @param state The 1024-bit array to be initialized
|
||||||
*/
|
*/
|
||||||
inline void initState(uint64_t state[/*16*/]) {
|
inline void initState(uint64_t state[/*16*/]) {
|
||||||
//First 512 bis are zeros
|
//First 512 bis are zeros
|
||||||
memset(state, 0, 64);
|
memset(state, 0, 64);
|
||||||
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
|
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
|
||||||
state[8] = blake2b_IV[0];
|
state[8] = blake2b_IV[0];
|
||||||
state[9] = blake2b_IV[1];
|
state[9] = blake2b_IV[1];
|
||||||
@ -53,7 +53,7 @@ inline void initState(uint64_t state[/*16*/]) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute Blake2b's G function, with all 12 rounds.
|
* Execute Blake2b's G function, with all 12 rounds.
|
||||||
*
|
*
|
||||||
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
|
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
|
||||||
*/
|
*/
|
||||||
inline static void blake2bLyra(uint64_t *v) {
|
inline static void blake2bLyra(uint64_t *v) {
|
||||||
@ -80,10 +80,10 @@ inline static void reducedBlake2bLyra(uint64_t *v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a squeeze operation, using Blake2b's G function as the
|
* Performs a squeeze operation, using Blake2b's G function as the
|
||||||
* internal permutation
|
* internal permutation
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param out Array that will receive the data squeezed
|
* @param out Array that will receive the data squeezed
|
||||||
* @param len The number of bytes to be squeezed into the "out" array
|
* @param len The number of bytes to be squeezed into the "out" array
|
||||||
*/
|
*/
|
||||||
@ -105,8 +105,8 @@ inline void squeeze(uint64_t *state, byte *out, unsigned int len) {
|
|||||||
/**
|
/**
|
||||||
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
|
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
|
||||||
* of type uint64_t), using Blake2b's G function as the internal permutation
|
* of type uint64_t), using Blake2b's G function as the internal permutation
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param in The block to be absorbed (BLOCK_LEN_INT64 words)
|
* @param in The block to be absorbed (BLOCK_LEN_INT64 words)
|
||||||
*/
|
*/
|
||||||
inline void absorbBlock(uint64_t *state, const uint64_t *in) {
|
inline void absorbBlock(uint64_t *state, const uint64_t *in) {
|
||||||
@ -129,15 +129,16 @@ inline void absorbBlock(uint64_t *state, const uint64_t *in) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
|
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
|
||||||
* words of type uint64_t), using Blake2b's G function as the internal permutation
|
* words of type uint64_t), using Blake2b's G function as the internal permutation
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
|
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
|
||||||
*/
|
*/
|
||||||
inline void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
|
inline void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
|
||||||
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||||
state[0] ^= in[0];
|
|
||||||
|
state[0] ^= in[0];
|
||||||
state[1] ^= in[1];
|
state[1] ^= in[1];
|
||||||
state[2] ^= in[2];
|
state[2] ^= in[2];
|
||||||
state[3] ^= in[3];
|
state[3] ^= in[3];
|
||||||
@ -146,23 +147,25 @@ inline void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
|
|||||||
state[6] ^= in[6];
|
state[6] ^= in[6];
|
||||||
state[7] ^= in[7];
|
state[7] ^= in[7];
|
||||||
|
|
||||||
|
|
||||||
//Applies the transformation f to the sponge's state
|
//Applies the transformation f to the sponge's state
|
||||||
blake2bLyra(state);
|
blake2bLyra(state);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a reduced squeeze operation for a single row, from the highest to
|
* Performs a reduced squeeze operation for a single row, from the highest to
|
||||||
* the lowest index, using the reduced-round Blake2b's G function as the
|
* the lowest index, using the reduced-round Blake2b's G function as the
|
||||||
* internal permutation
|
* internal permutation
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowOut Row to receive the data squeezed
|
* @param rowOut Row to receive the data squeezed
|
||||||
*/
|
*/
|
||||||
inline void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
|
inline void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
|
||||||
uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
|
uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
|
||||||
int i;
|
int i;
|
||||||
//M[row][C-1-col] = H.reduced_squeeze()
|
//M[row][C-1-col] = H.reduced_squeeze()
|
||||||
for (i = 0; i < N_COLS; i++) {
|
for (i = 0; i < nCols; i++) {
|
||||||
ptrWord[0] = state[0];
|
ptrWord[0] = state[0];
|
||||||
ptrWord[1] = state[1];
|
ptrWord[1] = state[1];
|
||||||
ptrWord[2] = state[2];
|
ptrWord[2] = state[2];
|
||||||
@ -184,21 +187,21 @@ inline void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a reduced duplex operation for a single row, from the highest to
|
* Performs a reduced duplex operation for a single row, from the highest to
|
||||||
* the lowest index, using the reduced-round Blake2b's G function as the
|
* the lowest index, using the reduced-round Blake2b's G function as the
|
||||||
* internal permutation
|
* internal permutation
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row to feed the sponge
|
* @param rowIn Row to feed the sponge
|
||||||
* @param rowOut Row to receive the sponge's output
|
* @param rowOut Row to receive the sponge's output
|
||||||
*/
|
*/
|
||||||
inline void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
|
inline void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) {
|
||||||
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < N_COLS; i++) {
|
for (i = 0; i < nCols; i++) {
|
||||||
|
|
||||||
//Absorbing "M[prev][col]"
|
//Absorbing "M[prev][col]"
|
||||||
state[0] ^= (ptrWordIn[0]);
|
state[0] ^= (ptrWordIn[0]);
|
||||||
@ -230,8 +233,8 @@ inline void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut
|
|||||||
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
||||||
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
||||||
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
||||||
|
|
||||||
|
|
||||||
//Input: next column (i.e., next block in sequence)
|
//Input: next column (i.e., next block in sequence)
|
||||||
ptrWordIn += BLOCK_LEN_INT64;
|
ptrWordIn += BLOCK_LEN_INT64;
|
||||||
//Output: goes to previous column
|
//Output: goes to previous column
|
||||||
@ -240,26 +243,26 @@ inline void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
||||||
* the wordwise addition of two columns, ignoring carries between words). The
|
* the wordwise addition of two columns, ignoring carries between words). The
|
||||||
* output of this operation, "rand", is then used to make
|
* output of this operation, "rand", is then used to make
|
||||||
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
|
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
|
||||||
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
* rotation to the left and N_COLS is a system parameter.
|
* rotation to the left and N_COLS is a system parameter.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
inline void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
|
inline void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
|
||||||
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < N_COLS; i++) {
|
for (i = 0; i < nCols; i++) {
|
||||||
//Absorbing "M[prev] [+] M[row*]"
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
||||||
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
|
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
|
||||||
@ -290,7 +293,7 @@ inline void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *ro
|
|||||||
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
||||||
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
||||||
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
||||||
|
|
||||||
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
||||||
ptrWordInOut[0] ^= state[11];
|
ptrWordInOut[0] ^= state[11];
|
||||||
ptrWordInOut[1] ^= state[0];
|
ptrWordInOut[1] ^= state[0];
|
||||||
@ -314,26 +317,26 @@ inline void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *ro
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
||||||
* the wordwise addition of two columns, ignoring carries between words). The
|
* the wordwise addition of two columns, ignoring carries between words). The
|
||||||
* output of this operation, "rand", is then used to make
|
* output of this operation, "rand", is then used to make
|
||||||
* "M[rowOut][col] = M[rowOut][col] XOR rand" and
|
* "M[rowOut][col] = M[rowOut][col] XOR rand" and
|
||||||
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
* rotation to the left.
|
* rotation to the left.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
inline void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
|
inline void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
|
||||||
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
|
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < N_COLS; i++) {
|
for (i = 0; i < nCols; i++) {
|
||||||
|
|
||||||
//Absorbing "M[prev] [+] M[row*]"
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
||||||
@ -392,10 +395,10 @@ inline void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOu
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
|
* Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
|
||||||
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
* rotation to the left.
|
* rotation to the left.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
@ -465,10 +468,10 @@ inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
|
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
|
||||||
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
* rotation to the left.
|
* rotation to the left.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
@ -539,10 +542,10 @@ inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
|
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
|
||||||
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
* rotation to the left.
|
* rotation to the left.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
@ -668,10 +671,10 @@ inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
|
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
|
||||||
* to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)",
|
* to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)",
|
||||||
* where rotW is a 64-bit rotation to the left.
|
* where rotW is a 64-bit rotation to the left.
|
||||||
*
|
*
|
||||||
* @param state The current state of the sponge
|
* @param state The current state of the sponge
|
||||||
* @param rowIn Row used only as input
|
* @param rowIn Row used only as input
|
||||||
* @param rowInOut Row used as input and to receive output after rotation
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
* @param rowOut Row receiving the output
|
* @param rowOut Row receiving the output
|
||||||
@ -717,7 +720,7 @@ inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInO
|
|||||||
ptrWordOut[11] ^= state[11];
|
ptrWordOut[11] ^= state[11];
|
||||||
|
|
||||||
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||||
|
|
||||||
|
|
||||||
//Goes to next block
|
//Goes to next block
|
||||||
ptrWordOut += BLOCK_LEN_INT64;
|
ptrWordOut += BLOCK_LEN_INT64;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
/**
|
/**
|
||||||
* Header file for Blake2b's internal permutation in the form of a sponge.
|
* Header file for Blake2b's internal permutation in the form of a sponge.
|
||||||
* This code is based on the original Blake2b's implementation provided by
|
* This code is based on the original Blake2b's implementation provided by
|
||||||
* Samuel Neves (https://blake2.net/)
|
* Samuel Neves (https://blake2.net/)
|
||||||
*
|
*
|
||||||
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
*
|
*
|
||||||
* This software is hereby placed in the public domain.
|
* This software is hereby placed in the public domain.
|
||||||
*
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]);
|
|||||||
|
|
||||||
//---- Squeezes
|
//---- Squeezes
|
||||||
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
|
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
|
||||||
void reducedSqueezeRow0(uint64_t* state, uint64_t* row);
|
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
|
||||||
|
|
||||||
//---- Absorbs
|
//---- Absorbs
|
||||||
void absorbBlock(uint64_t *state, const uint64_t *in);
|
void absorbBlock(uint64_t *state, const uint64_t *in);
|
||||||
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
|
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
|
||||||
|
|
||||||
//---- Duplexes
|
//---- Duplexes
|
||||||
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
|
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
|
||||||
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
|
||||||
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
|
||||||
|
|
||||||
//---- Misc
|
//---- Misc
|
||||||
void printArray(unsigned char *array, unsigned int size, char *name);
|
void printArray(unsigned char *array, unsigned int size, char *name);
|
||||||
|
405
algorithm/Sponge_old.c
Normal file
405
algorithm/Sponge_old.c
Normal file
@ -0,0 +1,405 @@
|
|||||||
|
/**
|
||||||
|
* A simple implementation of Blake2b's internal permutation
|
||||||
|
* in the form of a sponge.
|
||||||
|
*
|
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
*
|
||||||
|
* This software is hereby placed in the public domain.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "Sponge_old.h"
|
||||||
|
#include "Lyra2_old.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
|
||||||
|
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
|
||||||
|
* typically have their internal state initialized with zeros, Blake2b's G function
|
||||||
|
* has a fixed point: if the internal state and message are both filled with zeros. the
|
||||||
|
* resulting permutation will always be a block filled with zeros; this happens because
|
||||||
|
* Blake2b does not use the constants originally employed in Blake2 inside its G function,
|
||||||
|
* relying on the IV for avoiding possible fixed points.
|
||||||
|
*
|
||||||
|
* @param state The 1024-bit array to be initialized
|
||||||
|
*/
|
||||||
|
inline void initStateO(uint64_t state[/*16*/]) {
|
||||||
|
//First 512 bis are zeros
|
||||||
|
memset(state, 0, 64);
|
||||||
|
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
|
||||||
|
state[8] = blake2b_IV[0];
|
||||||
|
state[9] = blake2b_IV[1];
|
||||||
|
state[10] = blake2b_IV[2];
|
||||||
|
state[11] = blake2b_IV[3];
|
||||||
|
state[12] = blake2b_IV[4];
|
||||||
|
state[13] = blake2b_IV[5];
|
||||||
|
state[14] = blake2b_IV[6];
|
||||||
|
state[15] = blake2b_IV[7];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute Blake2b's G function, with all 12 rounds.
|
||||||
|
*
|
||||||
|
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
|
||||||
|
*/
|
||||||
|
inline static void blake2bLyra(uint64_t *v) {
|
||||||
|
ROUND_LYRA(0);
|
||||||
|
ROUND_LYRA(1);
|
||||||
|
ROUND_LYRA(2);
|
||||||
|
ROUND_LYRA(3);
|
||||||
|
ROUND_LYRA(4);
|
||||||
|
ROUND_LYRA(5);
|
||||||
|
ROUND_LYRA(6);
|
||||||
|
ROUND_LYRA(7);
|
||||||
|
ROUND_LYRA(8);
|
||||||
|
ROUND_LYRA(9);
|
||||||
|
ROUND_LYRA(10);
|
||||||
|
ROUND_LYRA(11);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes a reduced version of Blake2b's G function with only one round
|
||||||
|
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
|
||||||
|
*/
|
||||||
|
inline static void reducedBlake2bLyra(uint64_t *v) {
|
||||||
|
ROUND_LYRA(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a squeeze operation, using Blake2b's G function as the
|
||||||
|
* internal permutation
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param out Array that will receive the data squeezed
|
||||||
|
* @param len The number of bytes to be squeezed into the "out" array
|
||||||
|
*/
|
||||||
|
inline void squeezeO(uint64_t *state, byte *out, unsigned int len) {
|
||||||
|
int fullBlocks = len / BLOCK_LEN_BYTES;
|
||||||
|
byte *ptr = out;
|
||||||
|
int i;
|
||||||
|
//Squeezes full blocks
|
||||||
|
for (i = 0; i < fullBlocks; i++) {
|
||||||
|
memcpy(ptr, state, BLOCK_LEN_BYTES);
|
||||||
|
blake2bLyra(state);
|
||||||
|
ptr += BLOCK_LEN_BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Squeezes remaining bytes
|
||||||
|
memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
|
||||||
|
* of type uint64_t), using Blake2b's G function as the internal permutation
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param in The block to be absorbed (BLOCK_LEN_INT64 words)
|
||||||
|
*/
|
||||||
|
inline void absorbBlockO(uint64_t *state, const uint64_t *in) {
|
||||||
|
//XORs the first BLOCK_LEN_INT64 words of "in" with the current state
|
||||||
|
state[0] ^= in[0];
|
||||||
|
state[1] ^= in[1];
|
||||||
|
state[2] ^= in[2];
|
||||||
|
state[3] ^= in[3];
|
||||||
|
state[4] ^= in[4];
|
||||||
|
state[5] ^= in[5];
|
||||||
|
state[6] ^= in[6];
|
||||||
|
state[7] ^= in[7];
|
||||||
|
state[8] ^= in[8];
|
||||||
|
state[9] ^= in[9];
|
||||||
|
state[10] ^= in[10];
|
||||||
|
state[11] ^= in[11];
|
||||||
|
|
||||||
|
//Applies the transformation f to the sponge's state
|
||||||
|
blake2bLyra(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
|
||||||
|
* words of type uint64_t), using Blake2b's G function as the internal permutation
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
|
||||||
|
*/
|
||||||
|
inline void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in) {
|
||||||
|
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||||
|
state[0] ^= in[0];
|
||||||
|
state[1] ^= in[1];
|
||||||
|
state[2] ^= in[2];
|
||||||
|
state[3] ^= in[3];
|
||||||
|
state[4] ^= in[4];
|
||||||
|
state[5] ^= in[5];
|
||||||
|
state[6] ^= in[6];
|
||||||
|
state[7] ^= in[7];
|
||||||
|
|
||||||
|
//Applies the transformation f to the sponge's state
|
||||||
|
blake2bLyra(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a reduced squeeze operation for a single row, from the highest to
|
||||||
|
* the lowest index, using the reduced-round Blake2b's G function as the
|
||||||
|
* internal permutation
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param rowOut Row to receive the data squeezed
|
||||||
|
*/
|
||||||
|
inline void reducedSqueezeRow0O(uint64_t* state, uint64_t* rowOut) {
|
||||||
|
uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
|
||||||
|
int i;
|
||||||
|
//M[row][C-1-col] = H.reduced_squeeze()
|
||||||
|
for (i = 0; i < N_COLS; i++) {
|
||||||
|
ptrWord[0] = state[0];
|
||||||
|
ptrWord[1] = state[1];
|
||||||
|
ptrWord[2] = state[2];
|
||||||
|
ptrWord[3] = state[3];
|
||||||
|
ptrWord[4] = state[4];
|
||||||
|
ptrWord[5] = state[5];
|
||||||
|
ptrWord[6] = state[6];
|
||||||
|
ptrWord[7] = state[7];
|
||||||
|
ptrWord[8] = state[8];
|
||||||
|
ptrWord[9] = state[9];
|
||||||
|
ptrWord[10] = state[10];
|
||||||
|
ptrWord[11] = state[11];
|
||||||
|
|
||||||
|
//Goes to next block (column) that will receive the squeezed data
|
||||||
|
ptrWord -= BLOCK_LEN_INT64;
|
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a reduced duplex operation for a single row, from the highest to
|
||||||
|
* the lowest index, using the reduced-round Blake2b's G function as the
|
||||||
|
* internal permutation
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param rowIn Row to feed the sponge
|
||||||
|
* @param rowOut Row to receive the sponge's output
|
||||||
|
*/
|
||||||
|
inline void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
|
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) {
|
||||||
|
|
||||||
|
//Absorbing "M[prev][col]"
|
||||||
|
state[0] ^= (ptrWordIn[0]);
|
||||||
|
state[1] ^= (ptrWordIn[1]);
|
||||||
|
state[2] ^= (ptrWordIn[2]);
|
||||||
|
state[3] ^= (ptrWordIn[3]);
|
||||||
|
state[4] ^= (ptrWordIn[4]);
|
||||||
|
state[5] ^= (ptrWordIn[5]);
|
||||||
|
state[6] ^= (ptrWordIn[6]);
|
||||||
|
state[7] ^= (ptrWordIn[7]);
|
||||||
|
state[8] ^= (ptrWordIn[8]);
|
||||||
|
state[9] ^= (ptrWordIn[9]);
|
||||||
|
state[10] ^= (ptrWordIn[10]);
|
||||||
|
state[11] ^= (ptrWordIn[11]);
|
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state);
|
||||||
|
|
||||||
|
//M[row][C-1-col] = M[prev][col] XOR rand
|
||||||
|
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
|
||||||
|
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
|
||||||
|
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
|
||||||
|
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
|
||||||
|
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
|
||||||
|
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
|
||||||
|
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
|
||||||
|
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
|
||||||
|
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
|
||||||
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
||||||
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
||||||
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
||||||
|
|
||||||
|
|
||||||
|
//Input: next column (i.e., next block in sequence)
|
||||||
|
ptrWordIn += BLOCK_LEN_INT64;
|
||||||
|
//Output: goes to previous column
|
||||||
|
ptrWordOut -= BLOCK_LEN_INT64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
||||||
|
* the wordwise addition of two columns, ignoring carries between words). The
|
||||||
|
* output of this operation, "rand", is then used to make
|
||||||
|
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
|
||||||
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
|
* rotation to the left and N_COLS is a system parameter.
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param rowIn Row used only as input
|
||||||
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
|
* @param rowOut Row receiving the output
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
inline void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
|
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
|
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) {
|
||||||
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
||||||
|
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
|
||||||
|
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
|
||||||
|
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
|
||||||
|
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
|
||||||
|
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
|
||||||
|
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
|
||||||
|
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
|
||||||
|
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
|
||||||
|
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
|
||||||
|
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
|
||||||
|
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
|
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state);
|
||||||
|
|
||||||
|
//M[row][col] = M[prev][col] XOR rand
|
||||||
|
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
|
||||||
|
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
|
||||||
|
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
|
||||||
|
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
|
||||||
|
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
|
||||||
|
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
|
||||||
|
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
|
||||||
|
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
|
||||||
|
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
|
||||||
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
|
||||||
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
|
||||||
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
|
||||||
|
|
||||||
|
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
||||||
|
ptrWordInOut[0] ^= state[11];
|
||||||
|
ptrWordInOut[1] ^= state[0];
|
||||||
|
ptrWordInOut[2] ^= state[1];
|
||||||
|
ptrWordInOut[3] ^= state[2];
|
||||||
|
ptrWordInOut[4] ^= state[3];
|
||||||
|
ptrWordInOut[5] ^= state[4];
|
||||||
|
ptrWordInOut[6] ^= state[5];
|
||||||
|
ptrWordInOut[7] ^= state[6];
|
||||||
|
ptrWordInOut[8] ^= state[7];
|
||||||
|
ptrWordInOut[9] ^= state[8];
|
||||||
|
ptrWordInOut[10] ^= state[9];
|
||||||
|
ptrWordInOut[11] ^= state[10];
|
||||||
|
|
||||||
|
//Inputs: next column (i.e., next block in sequence)
|
||||||
|
ptrWordInOut += BLOCK_LEN_INT64;
|
||||||
|
ptrWordIn += BLOCK_LEN_INT64;
|
||||||
|
//Output: goes to previous column
|
||||||
|
ptrWordOut -= BLOCK_LEN_INT64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
|
||||||
|
* the wordwise addition of two columns, ignoring carries between words). The
|
||||||
|
* output of this operation, "rand", is then used to make
|
||||||
|
* "M[rowOut][col] = M[rowOut][col] XOR rand" and
|
||||||
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
|
||||||
|
* rotation to the left.
|
||||||
|
*
|
||||||
|
* @param state The current state of the sponge
|
||||||
|
* @param rowIn Row used only as input
|
||||||
|
* @param rowInOut Row used as input and to receive output after rotation
|
||||||
|
* @param rowOut Row receiving the output
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
inline void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
|
||||||
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) {
|
||||||
|
|
||||||
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
|
||||||
|
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
|
||||||
|
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
|
||||||
|
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
|
||||||
|
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
|
||||||
|
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
|
||||||
|
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
|
||||||
|
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
|
||||||
|
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
|
||||||
|
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
|
||||||
|
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
|
||||||
|
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
|
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state);
|
||||||
|
|
||||||
|
//M[rowOut][col] = M[rowOut][col] XOR rand
|
||||||
|
ptrWordOut[0] ^= state[0];
|
||||||
|
ptrWordOut[1] ^= state[1];
|
||||||
|
ptrWordOut[2] ^= state[2];
|
||||||
|
ptrWordOut[3] ^= state[3];
|
||||||
|
ptrWordOut[4] ^= state[4];
|
||||||
|
ptrWordOut[5] ^= state[5];
|
||||||
|
ptrWordOut[6] ^= state[6];
|
||||||
|
ptrWordOut[7] ^= state[7];
|
||||||
|
ptrWordOut[8] ^= state[8];
|
||||||
|
ptrWordOut[9] ^= state[9];
|
||||||
|
ptrWordOut[10] ^= state[10];
|
||||||
|
ptrWordOut[11] ^= state[11];
|
||||||
|
|
||||||
|
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||||
|
ptrWordInOut[0] ^= state[11];
|
||||||
|
ptrWordInOut[1] ^= state[0];
|
||||||
|
ptrWordInOut[2] ^= state[1];
|
||||||
|
ptrWordInOut[3] ^= state[2];
|
||||||
|
ptrWordInOut[4] ^= state[3];
|
||||||
|
ptrWordInOut[5] ^= state[4];
|
||||||
|
ptrWordInOut[6] ^= state[5];
|
||||||
|
ptrWordInOut[7] ^= state[6];
|
||||||
|
ptrWordInOut[8] ^= state[7];
|
||||||
|
ptrWordInOut[9] ^= state[8];
|
||||||
|
ptrWordInOut[10] ^= state[9];
|
||||||
|
ptrWordInOut[11] ^= state[10];
|
||||||
|
|
||||||
|
//Goes to next block
|
||||||
|
ptrWordOut += BLOCK_LEN_INT64;
|
||||||
|
ptrWordInOut += BLOCK_LEN_INT64;
|
||||||
|
ptrWordIn += BLOCK_LEN_INT64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
Prints an array of unsigned chars
|
||||||
|
*/
|
||||||
|
void printArrayO(unsigned char *array, unsigned int size, char *name) {
|
||||||
|
int i;
|
||||||
|
printf("%s: ", name);
|
||||||
|
for (i = 0; i < size; i++) {
|
||||||
|
printf("%2x|", array[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
98
algorithm/Sponge_old.h
Normal file
98
algorithm/Sponge_old.h
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
/**
|
||||||
|
* Header file for Blake2b's internal permutation in the form of a sponge.
|
||||||
|
* This code is based on the original Blake2b's implementation provided by
|
||||||
|
* Samuel Neves (https://blake2.net/)
|
||||||
|
*
|
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
*
|
||||||
|
* This software is hereby placed in the public domain.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#ifndef SPONGEOLD_H_
|
||||||
|
#define SPONGEOLD_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#define ALIGN __attribute__ ((aligned(32)))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define ALIGN __declspec(align(32))
|
||||||
|
#else
|
||||||
|
#define ALIGN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*Blake2b IV Array*/
|
||||||
|
static const uint64_t blake2b_IV[8] =
|
||||||
|
{
|
||||||
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||||
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||||
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||||
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||||
|
};
|
||||||
|
|
||||||
|
/*Blake2b's rotation*/
|
||||||
|
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||||
|
return ( w >> c ) | ( w << ( 64 - c ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Blake2b's G function*/
|
||||||
|
#define G(r,i,a,b,c,d) \
|
||||||
|
do { \
|
||||||
|
a = a + b; \
|
||||||
|
d = rotr64(d ^ a, 32); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr64(b ^ c, 24); \
|
||||||
|
a = a + b; \
|
||||||
|
d = rotr64(d ^ a, 16); \
|
||||||
|
c = c + d; \
|
||||||
|
b = rotr64(b ^ c, 63); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
/*One Round of the Blake2b's compression function*/
|
||||||
|
#define ROUND_LYRA(r) \
|
||||||
|
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
||||||
|
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
||||||
|
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
||||||
|
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
||||||
|
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
||||||
|
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
||||||
|
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||||
|
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
|
||||||
|
|
||||||
|
|
||||||
|
//---- Housekeeping
|
||||||
|
void initStateO(uint64_t state[/*16*/]);
|
||||||
|
|
||||||
|
//---- Squeezes
|
||||||
|
void squeezeO(uint64_t *state, unsigned char *out, unsigned int len);
|
||||||
|
void reducedSqueezeRow0O(uint64_t* state, uint64_t* row);
|
||||||
|
|
||||||
|
//---- Absorbs
|
||||||
|
void absorbBlockO(uint64_t *state, const uint64_t *in);
|
||||||
|
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
|
||||||
|
|
||||||
|
//---- Duplexes
|
||||||
|
void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
|
||||||
|
void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||||
|
void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
|
||||||
|
|
||||||
|
//---- Misc
|
||||||
|
void printArrayO(unsigned char *array, unsigned int size, char *name);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* SPONGE_H_ */
|
@ -258,14 +258,14 @@ char *set_gpu_threads(const char *_arg)
|
|||||||
if (nextptr == NULL)
|
if (nextptr == NULL)
|
||||||
return "Invalid parameters for set_gpu_threads";
|
return "Invalid parameters for set_gpu_threads";
|
||||||
val = atoi(nextptr);
|
val = atoi(nextptr);
|
||||||
if (val < 1 || val > 10)
|
if (val < 1 || val > 20) // gpu_threads increase max value to 20
|
||||||
return "Invalid value passed to set_gpu_threads";
|
return "Invalid value passed to set_gpu_threads";
|
||||||
|
|
||||||
gpus[device++].threads = val;
|
gpus[device++].threads = val;
|
||||||
|
|
||||||
while ((nextptr = strtok(NULL, ",")) != NULL) {
|
while ((nextptr = strtok(NULL, ",")) != NULL) {
|
||||||
val = atoi(nextptr);
|
val = atoi(nextptr);
|
||||||
if (val < 1 || val > 10)
|
if (val < 1 || val > 20) // gpu_threads increase max value to 20
|
||||||
return "Invalid value passed to set_gpu_threads";
|
return "Invalid value passed to set_gpu_threads";
|
||||||
|
|
||||||
gpus[device++].threads = val;
|
gpus[device++].threads = val;
|
||||||
|
525
kernel/Lyra2REv2.cl
Normal file
525
kernel/Lyra2REv2.cl
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
/*
|
||||||
|
* Lyra2RE kernel implementation.
|
||||||
|
*
|
||||||
|
* ==========================(LICENSE BEGIN)============================
|
||||||
|
* Copyright (c) 2014 djm34
|
||||||
|
* Copyright (c) 2014 James Lovejoy
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ===========================(LICENSE END)=============================
|
||||||
|
*
|
||||||
|
* @author djm34
|
||||||
|
*/
|
||||||
|
// typedef unsigned int uint;
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||||
|
|
||||||
|
#ifndef LYRA2RE_CL
|
||||||
|
#define LYRA2RE_CL
|
||||||
|
|
||||||
|
#if __ENDIAN_LITTLE__
|
||||||
|
#define SPH_LITTLE_ENDIAN 1
|
||||||
|
#else
|
||||||
|
#define SPH_BIG_ENDIAN 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SPH_UPTR sph_u64
|
||||||
|
|
||||||
|
typedef unsigned int sph_u32;
|
||||||
|
typedef int sph_s32;
|
||||||
|
#ifndef __OPENCL_VERSION__
|
||||||
|
typedef unsigned long sph_u64;
|
||||||
|
typedef long sph_s64;
|
||||||
|
#else
|
||||||
|
typedef unsigned long sph_u64;
|
||||||
|
typedef long sph_s64;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define SPH_64 1
|
||||||
|
#define SPH_64_TRUE 1
|
||||||
|
|
||||||
|
#define SPH_C32(x) ((sph_u32)(x ## U))
|
||||||
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
||||||
|
|
||||||
|
#define SPH_C64(x) ((sph_u64)(x ## UL))
|
||||||
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
|
||||||
|
|
||||||
|
//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
|
||||||
|
//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
|
||||||
|
//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
|
||||||
|
//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
|
||||||
|
|
||||||
|
#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6
|
||||||
|
#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n))
|
||||||
|
#define SPH_ROTL64(x,n) rotate(x,(ulong)n)
|
||||||
|
#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n))
|
||||||
|
static inline sph_u64 ror64(sph_u64 vw, unsigned a) {
|
||||||
|
uint2 result;
|
||||||
|
uint2 v = as_uint2(vw);
|
||||||
|
unsigned n = (unsigned)(64 - a);
|
||||||
|
if (n == 32) { return as_ulong((uint2)(v.y, v.x)); }
|
||||||
|
if (n < 32) {
|
||||||
|
result.y = ((v.y << (n)) | (v.x >> (32 - n)));
|
||||||
|
result.x = ((v.x << (n)) | (v.y >> (32 - n)));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
|
||||||
|
result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
|
||||||
|
}
|
||||||
|
return as_ulong(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
//#define SPH_ROTR64(l,n) ror64(l,n)
|
||||||
|
#define memshift 3
|
||||||
|
#include "blake256.cl"
|
||||||
|
#include "Lyra2v2.cl"
|
||||||
|
#include "keccak1600.cl"
|
||||||
|
#include "skein256.cl"
|
||||||
|
#include "cubehash.cl"
|
||||||
|
#include "bmw256.cl"
|
||||||
|
|
||||||
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
|
||||||
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
|
||||||
|
//#define SWAP8(x) as_ulong(as_uchar8(x).s32107654)
|
||||||
|
#if SPH_BIG_ENDIAN
|
||||||
|
#define DEC64E(x) (x)
|
||||||
|
#define DEC64BE(x) (*(const __global sph_u64 *) (x));
|
||||||
|
#define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x));
|
||||||
|
#define DEC32LE(x) (*(const __global sph_u32 *) (x));
|
||||||
|
#else
|
||||||
|
#define DEC64E(x) SWAP8(x)
|
||||||
|
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
|
||||||
|
#define DEC64LE(x) (*(const __global sph_u64 *) (x));
|
||||||
|
#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
unsigned char h1[32];
|
||||||
|
uint h4[8];
|
||||||
|
ulong h8[4];
|
||||||
|
} hash_t;
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search(
|
||||||
|
__global uchar* hashes,
|
||||||
|
// precalc hash from fisrt part of message
|
||||||
|
const uint h0,
|
||||||
|
const uint h1,
|
||||||
|
const uint h2,
|
||||||
|
const uint h3,
|
||||||
|
const uint h4,
|
||||||
|
const uint h5,
|
||||||
|
const uint h6,
|
||||||
|
const uint h7,
|
||||||
|
// last 12 bytes of original message
|
||||||
|
const uint in16,
|
||||||
|
const uint in17,
|
||||||
|
const uint in18
|
||||||
|
)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
|
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||||
|
|
||||||
|
unsigned int h[8];
|
||||||
|
unsigned int m[16];
|
||||||
|
unsigned int v[16];
|
||||||
|
|
||||||
|
|
||||||
|
h[0]=h0;
|
||||||
|
h[1]=h1;
|
||||||
|
h[2]=h2;
|
||||||
|
h[3]=h3;
|
||||||
|
h[4]=h4;
|
||||||
|
h[5]=h5;
|
||||||
|
h[6]=h6;
|
||||||
|
h[7]=h7;
|
||||||
|
// compress 2nd round
|
||||||
|
m[0] = in16;
|
||||||
|
m[1] = in17;
|
||||||
|
m[2] = in18;
|
||||||
|
m[3] = SWAP4(gid);
|
||||||
|
|
||||||
|
for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];}
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++) {v[i] = h[i];}
|
||||||
|
|
||||||
|
v[8] = c_u256[0];
|
||||||
|
v[9] = c_u256[1];
|
||||||
|
v[10] = c_u256[2];
|
||||||
|
v[11] = c_u256[3];
|
||||||
|
v[12] = c_u256[4] ^ 640;
|
||||||
|
v[13] = c_u256[5] ^ 640;
|
||||||
|
v[14] = c_u256[6];
|
||||||
|
v[15] = c_u256[7];
|
||||||
|
|
||||||
|
for (int r = 0; r < 14; r++) {
|
||||||
|
GS(0, 4, 0x8, 0xC, 0x0);
|
||||||
|
GS(1, 5, 0x9, 0xD, 0x2);
|
||||||
|
GS(2, 6, 0xA, 0xE, 0x4);
|
||||||
|
GS(3, 7, 0xB, 0xF, 0x6);
|
||||||
|
GS(0, 5, 0xA, 0xF, 0x8);
|
||||||
|
GS(1, 6, 0xB, 0xC, 0xA);
|
||||||
|
GS(2, 7, 0x8, 0xD, 0xC);
|
||||||
|
GS(3, 4, 0x9, 0xE, 0xE);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
int j = i & 7;
|
||||||
|
h[j] ^= v[i];}
|
||||||
|
|
||||||
|
for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);}
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// keccak256
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search1(__global uchar* hashes)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||||
|
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
sph_u64 keccak_gpu_state[25];
|
||||||
|
|
||||||
|
for (int i = 0; i<25; i++) {
|
||||||
|
if (i<4) { keccak_gpu_state[i] = hash->h8[i]; }
|
||||||
|
else { keccak_gpu_state[i] = 0; }
|
||||||
|
}
|
||||||
|
keccak_gpu_state[4] = 0x0000000000000001;
|
||||||
|
keccak_gpu_state[16] = 0x8000000000000000;
|
||||||
|
|
||||||
|
keccak_block(keccak_gpu_state);
|
||||||
|
for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; }
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// cubehash256
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search2(__global uchar* hashes)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
|
||||||
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71;
|
||||||
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63;
|
||||||
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696;
|
||||||
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C;
|
||||||
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00;
|
||||||
|
sph_u32 xf = 0x40E5FBAB;
|
||||||
|
|
||||||
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5;
|
||||||
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549;
|
||||||
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285;
|
||||||
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD;
|
||||||
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6;
|
||||||
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A;
|
||||||
|
|
||||||
|
x0 ^= (hash->h4[0]);
|
||||||
|
x1 ^= (hash->h4[1]);
|
||||||
|
x2 ^= (hash->h4[2]);
|
||||||
|
x3 ^= (hash->h4[3]);
|
||||||
|
x4 ^= (hash->h4[4]);
|
||||||
|
x5 ^= (hash->h4[5]);
|
||||||
|
x6 ^= (hash->h4[6]);
|
||||||
|
x7 ^= (hash->h4[7]);
|
||||||
|
|
||||||
|
|
||||||
|
SIXTEEN_ROUNDS;
|
||||||
|
x0 ^= 0x80;
|
||||||
|
SIXTEEN_ROUNDS;
|
||||||
|
xv ^= 0x01;
|
||||||
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS;
|
||||||
|
|
||||||
|
hash->h4[0] = x0;
|
||||||
|
hash->h4[1] = x1;
|
||||||
|
hash->h4[2] = x2;
|
||||||
|
hash->h4[3] = x3;
|
||||||
|
hash->h4[4] = x4;
|
||||||
|
hash->h4[5] = x5;
|
||||||
|
hash->h4[6] = x6;
|
||||||
|
hash->h4[7] = x7;
|
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// lyra2 algo
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search3(__global uchar* hashes,__global uchar* matrix )
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
__global ulong4 *DMatrix = (__global ulong4 *)(matrix + (4 * memshift * 4 * 4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
// uint offset = (4 * memshift * 4 * 4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))/32;
|
||||||
|
ulong4 state[4];
|
||||||
|
|
||||||
|
state[0].x = hash->h8[0]; //password
|
||||||
|
state[0].y = hash->h8[1]; //password
|
||||||
|
state[0].z = hash->h8[2]; //password
|
||||||
|
state[0].w = hash->h8[3]; //password
|
||||||
|
state[1] = state[0];
|
||||||
|
state[2] = (ulong4)(0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL);
|
||||||
|
state[3] = (ulong4)(0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL);
|
||||||
|
for (int i = 0; i<12; i++) { round_lyra(state); }
|
||||||
|
|
||||||
|
state[0] ^= (ulong4)(0x20,0x20,0x20,0x01);
|
||||||
|
state[1] ^= (ulong4)(0x04,0x04,0x80,0x0100000000000000);
|
||||||
|
|
||||||
|
for (int i = 0; i<12; i++) { round_lyra(state); }
|
||||||
|
|
||||||
|
|
||||||
|
uint ps1 = (memshift * 3);
|
||||||
|
//#pragma unroll 4
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
uint s1 = ps1 - memshift * i;
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
(DMatrix)[j+s1] = state[j];
|
||||||
|
|
||||||
|
round_lyra(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
reduceDuplexf(state,DMatrix);
|
||||||
|
|
||||||
|
reduceDuplexRowSetupf(1, 0, 2,state, DMatrix);
|
||||||
|
reduceDuplexRowSetupf(2, 1, 3, state,DMatrix);
|
||||||
|
|
||||||
|
|
||||||
|
uint rowa;
|
||||||
|
uint prev = 3;
|
||||||
|
for (uint i = 0; i<4; i++) {
|
||||||
|
rowa = state[0].x & 3;
|
||||||
|
reduceDuplexRowf(prev, rowa, i, state, DMatrix);
|
||||||
|
prev = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint shift = (memshift * 4 * rowa);
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
state[j] ^= (DMatrix)[j+shift];
|
||||||
|
|
||||||
|
for (int i = 0; i < 12; i++)
|
||||||
|
round_lyra(state);
|
||||||
|
//////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<4; i++) {hash->h8[i] = ((ulong*)state)[i];}
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//skein256
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search4(__global uchar* hashes)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
|
||||||
|
sph_u64 h[9];
|
||||||
|
sph_u64 t[3];
|
||||||
|
sph_u64 dt0,dt1,dt2,dt3;
|
||||||
|
sph_u64 p0, p1, p2, p3, p4, p5, p6, p7;
|
||||||
|
h[8] = skein_ks_parity;
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) {
|
||||||
|
h[i] = SKEIN_IV512_256[i];
|
||||||
|
h[8] ^= h[i];}
|
||||||
|
|
||||||
|
t[0]=t12[0];
|
||||||
|
t[1]=t12[1];
|
||||||
|
t[2]=t12[2];
|
||||||
|
|
||||||
|
dt0=hash->h8[0];
|
||||||
|
dt1=hash->h8[1];
|
||||||
|
dt2=hash->h8[2];
|
||||||
|
dt3=hash->h8[3];
|
||||||
|
|
||||||
|
p0 = h[0] + dt0;
|
||||||
|
p1 = h[1] + dt1;
|
||||||
|
p2 = h[2] + dt2;
|
||||||
|
p3 = h[3] + dt3;
|
||||||
|
p4 = h[4];
|
||||||
|
p5 = h[5] + t[0];
|
||||||
|
p6 = h[6] + t[1];
|
||||||
|
p7 = h[7];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);}
|
||||||
|
p0 ^= dt0;
|
||||||
|
p1 ^= dt1;
|
||||||
|
p2 ^= dt2;
|
||||||
|
p3 ^= dt3;
|
||||||
|
|
||||||
|
h[0] = p0;
|
||||||
|
h[1] = p1;
|
||||||
|
h[2] = p2;
|
||||||
|
h[3] = p3;
|
||||||
|
h[4] = p4;
|
||||||
|
h[5] = p5;
|
||||||
|
h[6] = p6;
|
||||||
|
h[7] = p7;
|
||||||
|
h[8] = skein_ks_parity;
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) { h[8] ^= h[i]; }
|
||||||
|
|
||||||
|
t[0] = t12[3];
|
||||||
|
t[1] = t12[4];
|
||||||
|
t[2] = t12[5];
|
||||||
|
p5 += t[0]; //p5 already equal h[5]
|
||||||
|
p6 += t[1];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); }
|
||||||
|
|
||||||
|
hash->h8[0] = p0;
|
||||||
|
hash->h8[1] = p1;
|
||||||
|
hash->h8[2] = p2;
|
||||||
|
hash->h8[3] = p3;
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//cubehash
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search5(__global uchar* hashes)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71;
|
||||||
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63;
|
||||||
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696;
|
||||||
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C;
|
||||||
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00;
|
||||||
|
sph_u32 xf = 0x40E5FBAB;
|
||||||
|
|
||||||
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5;
|
||||||
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549;
|
||||||
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285;
|
||||||
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD;
|
||||||
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6;
|
||||||
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A;
|
||||||
|
|
||||||
|
x0 ^= (hash->h4[0]);
|
||||||
|
x1 ^= (hash->h4[1]);
|
||||||
|
x2 ^= (hash->h4[2]);
|
||||||
|
x3 ^= (hash->h4[3]);
|
||||||
|
x4 ^= (hash->h4[4]);
|
||||||
|
x5 ^= (hash->h4[5]);
|
||||||
|
x6 ^= (hash->h4[6]);
|
||||||
|
x7 ^= (hash->h4[7]);
|
||||||
|
|
||||||
|
|
||||||
|
SIXTEEN_ROUNDS;
|
||||||
|
x0 ^= 0x80;
|
||||||
|
SIXTEEN_ROUNDS;
|
||||||
|
xv ^= 0x01;
|
||||||
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS;
|
||||||
|
|
||||||
|
hash->h4[0] = x0;
|
||||||
|
hash->h4[1] = x1;
|
||||||
|
hash->h4[2] = x2;
|
||||||
|
hash->h4[3] = x3;
|
||||||
|
hash->h4[4] = x4;
|
||||||
|
hash->h4[5] = x5;
|
||||||
|
hash->h4[6] = x6;
|
||||||
|
hash->h4[7] = x7;
|
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search6(__global uchar* hashes, __global uint* output, const ulong target)
|
||||||
|
{
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
uint dh[16] = {
|
||||||
|
(0x40414243), (0x44454647),
|
||||||
|
(0x48494A4B), (0x4C4D4E4F),
|
||||||
|
(0x50515253), (0x54555657),
|
||||||
|
(0x58595A5B), (0x5C5D5E5F),
|
||||||
|
(0x60616263), (0x64656667),
|
||||||
|
(0x68696A6B), (0x6C6D6E6F),
|
||||||
|
(0x70717273), (0x74757677),
|
||||||
|
(0x78797A7B), (0x7C7D7E7F)
|
||||||
|
};
|
||||||
|
uint final_s[16] = {
|
||||||
|
(0xaaaaaaa0), (0xaaaaaaa1), (0xaaaaaaa2),
|
||||||
|
(0xaaaaaaa3), (0xaaaaaaa4), (0xaaaaaaa5),
|
||||||
|
(0xaaaaaaa6), (0xaaaaaaa7), (0xaaaaaaa8),
|
||||||
|
(0xaaaaaaa9), (0xaaaaaaaa), (0xaaaaaaab),
|
||||||
|
(0xaaaaaaac), (0xaaaaaaad), (0xaaaaaaae),
|
||||||
|
(0xaaaaaaaf)
|
||||||
|
};
|
||||||
|
|
||||||
|
uint message[16];
|
||||||
|
for (int i = 0; i<8; i++) message[i] = hash->h4[i];
|
||||||
|
for (int i = 9; i<14; i++) message[i] = 0;
|
||||||
|
message[8]= 0x80;
|
||||||
|
message[14]=0x100;
|
||||||
|
message[15]=0;
|
||||||
|
|
||||||
|
Compression256(message, dh);
|
||||||
|
Compression256(dh, final_s);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
|
||||||
|
bool result = ( ((ulong*)final_s)[7] <= target);
|
||||||
|
if (result) {
|
||||||
|
output[atomic_inc(output + 0xFF)] = SWAP4(gid);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif // LYRA2RE_CL
|
184
kernel/Lyra2v2.cl
Normal file
184
kernel/Lyra2v2.cl
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
/*
|
||||||
|
* Lyra2 kernel implementation.
|
||||||
|
*
|
||||||
|
* ==========================(LICENSE BEGIN)============================
|
||||||
|
* Copyright (c) 2014 djm34
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ===========================(LICENSE END)=============================
|
||||||
|
*
|
||||||
|
* @author djm34
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define ROTL64(x,n) rotate(x,(ulong)n)
|
||||||
|
#define ROTR64(x,n) rotate(x,(ulong)(64-n))
|
||||||
|
#define SWAP32(x) as_ulong(as_uint2(x).s10)
|
||||||
|
#define SWAP24(x) as_ulong(as_uchar8(x).s34567012)
|
||||||
|
#define SWAP16(x) as_ulong(as_uchar8(x).s23456701)
|
||||||
|
|
||||||
|
#define G(a,b,c,d) \
|
||||||
|
do { \
|
||||||
|
a += b; d ^= a; d = SWAP32(d); \
|
||||||
|
c += d; b ^= c; b = ROTR64(b,24); \
|
||||||
|
a += b; d ^= a; d = ROTR64(d,16); \
|
||||||
|
c += d; b ^= c; b = ROTR64(b, 63); \
|
||||||
|
\
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define G_old(a,b,c,d) \
|
||||||
|
do { \
|
||||||
|
a += b; d ^= a; d = ROTR64(d, 32); \
|
||||||
|
c += d; b ^= c; b = ROTR64(b, 24); \
|
||||||
|
a += b; d ^= a; d = ROTR64(d, 16); \
|
||||||
|
c += d; b ^= c; b = ROTR64(b, 63); \
|
||||||
|
\
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
/*One Round of the Blake2b's compression function*/
|
||||||
|
|
||||||
|
#define round_lyra(s) \
|
||||||
|
do { \
|
||||||
|
G(s[0].x, s[1].x, s[2].x, s[3].x); \
|
||||||
|
G(s[0].y, s[1].y, s[2].y, s[3].y); \
|
||||||
|
G(s[0].z, s[1].z, s[2].z, s[3].z); \
|
||||||
|
G(s[0].w, s[1].w, s[2].w, s[3].w); \
|
||||||
|
G(s[0].x, s[1].y, s[2].z, s[3].w); \
|
||||||
|
G(s[0].y, s[1].z, s[2].w, s[3].x); \
|
||||||
|
G(s[0].z, s[1].w, s[2].x, s[3].y); \
|
||||||
|
G(s[0].w, s[1].x, s[2].y, s[3].z); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexf(ulong4* state ,__global ulong4* DMatrix)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong4 state1[3];
|
||||||
|
uint ps1 = 0;
|
||||||
|
uint ps2 = (memshift * 3 + memshift * 4);
|
||||||
|
//#pragma unroll 4
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
uint s1 = ps1 + i*memshift;
|
||||||
|
uint s2 = ps2 - i*memshift;
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j];
|
||||||
|
round_lyra(state);
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] ^= state[j];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) (DMatrix)[j + s2] = state1[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexRowf(uint rowIn,uint rowInOut,uint rowOut,ulong4 * state, __global ulong4 * DMatrix)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong4 state1[3], state2[3];
|
||||||
|
uint ps1 = (memshift * 4 * rowIn);
|
||||||
|
uint ps2 = (memshift * 4 * rowInOut);
|
||||||
|
uint ps3 = (memshift * 4 * rowOut);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
uint s1 = ps1 + i*memshift;
|
||||||
|
uint s2 = ps2 + i*memshift;
|
||||||
|
uint s3 = ps3 + i*memshift;
|
||||||
|
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] += state2[j];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j];
|
||||||
|
|
||||||
|
|
||||||
|
round_lyra(state);
|
||||||
|
|
||||||
|
((ulong*)state2)[0] ^= ((ulong*)state)[11];
|
||||||
|
for (int j = 0; j < 11; j++)
|
||||||
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j];
|
||||||
|
|
||||||
|
if (rowInOut != rowOut) {
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
(DMatrix)[j + s2] = state2[j];
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
(DMatrix)[j + s3] ^= state[j];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
state2[j] ^= state[j];
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
(DMatrix)[j + s2] = state2[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexRowSetupf(uint rowIn, uint rowInOut, uint rowOut, ulong4 *state, __global ulong4* DMatrix) {
|
||||||
|
|
||||||
|
ulong4 state2[3], state1[3];
|
||||||
|
uint ps1 = (memshift * 4 * rowIn);
|
||||||
|
uint ps2 = (memshift * 4 * rowInOut);
|
||||||
|
uint ps3 = (memshift * 3 + memshift * 4 * rowOut);
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
uint s1 = ps1 + i*memshift;
|
||||||
|
uint s2 = ps2 + i*memshift;
|
||||||
|
uint s3 = ps3 - i*memshift;
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1];
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2];
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
ulong4 tmp = state1[j] + state2[j];
|
||||||
|
state[j] ^= tmp;
|
||||||
|
}
|
||||||
|
round_lyra(state);
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
state1[j] ^= state[j];
|
||||||
|
(DMatrix)[j + s3] = state1[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
((ulong*)state2)[0] ^= ((ulong*)state)[11];
|
||||||
|
for (int j = 0; j < 11; j++)
|
||||||
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j];
|
||||||
|
for (int j = 0; j < 3; j++)
|
||||||
|
(DMatrix)[j + s2] = state2[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
128
kernel/bmw256.cl
Normal file
128
kernel/bmw256.cl
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
#define shl(x, n) ((x) << (n))
|
||||||
|
#define shr(x, n) ((x) >> (n))
|
||||||
|
//#define SHR(x, n) SHR2(x, n)
|
||||||
|
//#define SHL(x, n) SHL2(x, n)
|
||||||
|
|
||||||
|
|
||||||
|
#define SPH_ROTL32(x,n) rotate(x,(uint)n)
|
||||||
|
#define ss0(x) (shr((x), 1) ^ shl((x), 3) ^ SPH_ROTL32((x), 4) ^ SPH_ROTL32((x), 19))
|
||||||
|
#define ss1(x) (shr((x), 1) ^ shl((x), 2) ^ SPH_ROTL32((x), 8) ^ SPH_ROTL32((x), 23))
|
||||||
|
#define ss2(x) (shr((x), 2) ^ shl((x), 1) ^ SPH_ROTL32((x), 12) ^ SPH_ROTL32((x), 25))
|
||||||
|
#define ss3(x) (shr((x), 2) ^ shl((x), 2) ^ SPH_ROTL32((x), 15) ^ SPH_ROTL32((x), 29))
|
||||||
|
#define ss4(x) (shr((x), 1) ^ (x))
|
||||||
|
#define ss5(x) (shr((x), 2) ^ (x))
|
||||||
|
#define rs1(x) SPH_ROTL32((x), 3)
|
||||||
|
#define rs2(x) SPH_ROTL32((x), 7)
|
||||||
|
#define rs3(x) SPH_ROTL32((x), 13)
|
||||||
|
#define rs4(x) SPH_ROTL32((x), 16)
|
||||||
|
#define rs5(x) SPH_ROTL32((x), 19)
|
||||||
|
#define rs6(x) SPH_ROTL32((x), 23)
|
||||||
|
#define rs7(x) SPH_ROTL32((x), 27)
|
||||||
|
|
||||||
|
/* Message expansion function 1 */
|
||||||
|
static uint expand32_1(int i, uint *M32, uint *H, uint *Q)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (ss1(Q[i - 16]) + ss2(Q[i - 15]) + ss3(Q[i - 14]) + ss0(Q[i - 13])
|
||||||
|
+ ss1(Q[i - 12]) + ss2(Q[i - 11]) + ss3(Q[i - 10]) + ss0(Q[i - 9])
|
||||||
|
+ ss1(Q[i - 8]) + ss2(Q[i - 7]) + ss3(Q[i - 6]) + ss0(Q[i - 5])
|
||||||
|
+ ss1(Q[i - 4]) + ss2(Q[i - 3]) + ss3(Q[i - 2]) + ss0(Q[i - 1])
|
||||||
|
+ ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16]));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Message expansion function 2 */
|
||||||
|
static uint expand32_2(int i, uint *M32, uint *H, uint *Q)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (Q[i - 16] + rs1(Q[i - 15]) + Q[i - 14] + rs2(Q[i - 13])
|
||||||
|
+ Q[i - 12] + rs3(Q[i - 11]) + Q[i - 10] + rs4(Q[i - 9])
|
||||||
|
+ Q[i - 8] + rs5(Q[i - 7]) + Q[i - 6] + rs6(Q[i - 5])
|
||||||
|
+ Q[i - 4] + rs7(Q[i - 3]) + ss4(Q[i - 2]) + ss5(Q[i - 1])
|
||||||
|
+ ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16]));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Compression256(uint *M32, uint *H)
|
||||||
|
{
|
||||||
|
|
||||||
|
int i;
|
||||||
|
uint XL32, XH32, Q[32];
|
||||||
|
|
||||||
|
|
||||||
|
Q[0] = (M32[5] ^ H[5]) - (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[13] ^ H[13]) + (M32[14] ^ H[14]);
|
||||||
|
Q[1] = (M32[6] ^ H[6]) - (M32[8] ^ H[8]) + (M32[11] ^ H[11]) + (M32[14] ^ H[14]) - (M32[15] ^ H[15]);
|
||||||
|
Q[2] = (M32[0] ^ H[0]) + (M32[7] ^ H[7]) + (M32[9] ^ H[9]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]);
|
||||||
|
Q[3] = (M32[0] ^ H[0]) - (M32[1] ^ H[1]) + (M32[8] ^ H[8]) - (M32[10] ^ H[10]) + (M32[13] ^ H[13]);
|
||||||
|
Q[4] = (M32[1] ^ H[1]) + (M32[2] ^ H[2]) + (M32[9] ^ H[9]) - (M32[11] ^ H[11]) - (M32[14] ^ H[14]);
|
||||||
|
Q[5] = (M32[3] ^ H[3]) - (M32[2] ^ H[2]) + (M32[10] ^ H[10]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]);
|
||||||
|
Q[6] = (M32[4] ^ H[4]) - (M32[0] ^ H[0]) - (M32[3] ^ H[3]) - (M32[11] ^ H[11]) + (M32[13] ^ H[13]);
|
||||||
|
Q[7] = (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[5] ^ H[5]) - (M32[12] ^ H[12]) - (M32[14] ^ H[14]);
|
||||||
|
Q[8] = (M32[2] ^ H[2]) - (M32[5] ^ H[5]) - (M32[6] ^ H[6]) + (M32[13] ^ H[13]) - (M32[15] ^ H[15]);
|
||||||
|
Q[9] = (M32[0] ^ H[0]) - (M32[3] ^ H[3]) + (M32[6] ^ H[6]) - (M32[7] ^ H[7]) + (M32[14] ^ H[14]);
|
||||||
|
Q[10] = (M32[8] ^ H[8]) - (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[7] ^ H[7]) + (M32[15] ^ H[15]);
|
||||||
|
Q[11] = (M32[8] ^ H[8]) - (M32[0] ^ H[0]) - (M32[2] ^ H[2]) - (M32[5] ^ H[5]) + (M32[9] ^ H[9]);
|
||||||
|
Q[12] = (M32[1] ^ H[1]) + (M32[3] ^ H[3]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[10] ^ H[10]);
|
||||||
|
Q[13] = (M32[2] ^ H[2]) + (M32[4] ^ H[4]) + (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[11] ^ H[11]);
|
||||||
|
Q[14] = (M32[3] ^ H[3]) - (M32[5] ^ H[5]) + (M32[8] ^ H[8]) - (M32[11] ^ H[11]) - (M32[12] ^ H[12]);
|
||||||
|
Q[15] = (M32[12] ^ H[12]) - (M32[4] ^ H[4]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[13] ^ H[13]);
|
||||||
|
|
||||||
|
/* Diffuse the differences in every word in a bijective manner with ssi, and then add the values of the previous double pipe.*/
|
||||||
|
Q[0] = ss0(Q[0]) + H[1];
|
||||||
|
Q[1] = ss1(Q[1]) + H[2];
|
||||||
|
Q[2] = ss2(Q[2]) + H[3];
|
||||||
|
Q[3] = ss3(Q[3]) + H[4];
|
||||||
|
Q[4] = ss4(Q[4]) + H[5];
|
||||||
|
Q[5] = ss0(Q[5]) + H[6];
|
||||||
|
Q[6] = ss1(Q[6]) + H[7];
|
||||||
|
Q[7] = ss2(Q[7]) + H[8];
|
||||||
|
Q[8] = ss3(Q[8]) + H[9];
|
||||||
|
Q[9] = ss4(Q[9]) + H[10];
|
||||||
|
Q[10] = ss0(Q[10]) + H[11];
|
||||||
|
Q[11] = ss1(Q[11]) + H[12];
|
||||||
|
Q[12] = ss2(Q[12]) + H[13];
|
||||||
|
Q[13] = ss3(Q[13]) + H[14];
|
||||||
|
Q[14] = ss4(Q[14]) + H[15];
|
||||||
|
Q[15] = ss0(Q[15]) + H[0];
|
||||||
|
|
||||||
|
/* This is the Message expansion or f_1 in the documentation. */
|
||||||
|
/* It has 16 rounds. */
|
||||||
|
/* Blue Midnight Wish has two tunable security parameters. */
|
||||||
|
/* The parameters are named EXPAND_1_ROUNDS and EXPAND_2_ROUNDS. */
|
||||||
|
/* The following relation for these parameters should is satisfied: */
|
||||||
|
/* EXPAND_1_ROUNDS + EXPAND_2_ROUNDS = 16 */
|
||||||
|
|
||||||
|
for (i = 0; i<2; i++)
|
||||||
|
Q[i + 16] = expand32_1(i + 16, M32, H, Q);
|
||||||
|
|
||||||
|
for (i = 2; i<16; i++)
|
||||||
|
Q[i + 16] = expand32_2(i + 16, M32, H, Q);
|
||||||
|
|
||||||
|
/* Blue Midnight Wish has two temporary cummulative variables that accumulate via XORing */
|
||||||
|
/* 16 new variables that are prooduced in the Message Expansion part. */
|
||||||
|
XL32 = Q[16] ^ Q[17] ^ Q[18] ^ Q[19] ^ Q[20] ^ Q[21] ^ Q[22] ^ Q[23];
|
||||||
|
XH32 = XL32^Q[24] ^ Q[25] ^ Q[26] ^ Q[27] ^ Q[28] ^ Q[29] ^ Q[30] ^ Q[31];
|
||||||
|
|
||||||
|
|
||||||
|
/* This part is the function f_2 - in the documentation */
|
||||||
|
|
||||||
|
/* Compute the double chaining pipe for the next message block. */
|
||||||
|
H[0] = (shl(XH32, 5) ^ shr(Q[16], 5) ^ M32[0]) + (XL32 ^ Q[24] ^ Q[0]);
|
||||||
|
H[1] = (shr(XH32, 7) ^ shl(Q[17], 8) ^ M32[1]) + (XL32 ^ Q[25] ^ Q[1]);
|
||||||
|
H[2] = (shr(XH32, 5) ^ shl(Q[18], 5) ^ M32[2]) + (XL32 ^ Q[26] ^ Q[2]);
|
||||||
|
H[3] = (shr(XH32, 1) ^ shl(Q[19], 5) ^ M32[3]) + (XL32 ^ Q[27] ^ Q[3]);
|
||||||
|
H[4] = (shr(XH32, 3) ^ Q[20] ^ M32[4]) + (XL32 ^ Q[28] ^ Q[4]);
|
||||||
|
H[5] = (shl(XH32, 6) ^ shr(Q[21], 6) ^ M32[5]) + (XL32 ^ Q[29] ^ Q[5]);
|
||||||
|
H[6] = (shr(XH32, 4) ^ shl(Q[22], 6) ^ M32[6]) + (XL32 ^ Q[30] ^ Q[6]);
|
||||||
|
H[7] = (shr(XH32, 11) ^ shl(Q[23], 2) ^ M32[7]) + (XL32 ^ Q[31] ^ Q[7]);
|
||||||
|
|
||||||
|
H[8] = SPH_ROTL32(H[4], 9) + (XH32 ^ Q[24] ^ M32[8]) + (shl(XL32, 8) ^ Q[23] ^ Q[8]);
|
||||||
|
H[9] = SPH_ROTL32(H[5], 10) + (XH32 ^ Q[25] ^ M32[9]) + (shr(XL32, 6) ^ Q[16] ^ Q[9]);
|
||||||
|
H[10] = SPH_ROTL32(H[6], 11) + (XH32 ^ Q[26] ^ M32[10]) + (shl(XL32, 6) ^ Q[17] ^ Q[10]);
|
||||||
|
H[11] = SPH_ROTL32(H[7], 12) + (XH32 ^ Q[27] ^ M32[11]) + (shl(XL32, 4) ^ Q[18] ^ Q[11]);
|
||||||
|
H[12] = SPH_ROTL32(H[0], 13) + (XH32 ^ Q[28] ^ M32[12]) + (shr(XL32, 3) ^ Q[19] ^ Q[12]);
|
||||||
|
H[13] = SPH_ROTL32(H[1], 14) + (XH32 ^ Q[29] ^ M32[13]) + (shr(XL32, 4) ^ Q[20] ^ Q[13]);
|
||||||
|
H[14] = SPH_ROTL32(H[2], 15) + (XH32 ^ Q[30] ^ M32[14]) + (shr(XL32, 7) ^ Q[21] ^ Q[14]);
|
||||||
|
H[15] = SPH_ROTL32(H[3], 16) + (XH32 ^ Q[31] ^ M32[15]) + (shr(XL32, 2) ^ Q[22] ^ Q[15]);
|
||||||
|
|
||||||
|
}
|
132
kernel/cubehash256.cl
Normal file
132
kernel/cubehash256.cl
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
// cubehash256
|
||||||
|
// djm34 2015 based on ccminer cubehash512
|
||||||
|
|
||||||
|
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */
|
||||||
|
#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */
|
||||||
|
|
||||||
|
|
||||||
|
#define LROT(x, bits) rotate( x,(uint) bits)
|
||||||
|
|
||||||
|
|
||||||
|
#define ROTATEUPWARDS7(a) LROT(a,7)
|
||||||
|
#define ROTATEUPWARDS11(a) LROT(a,11)
|
||||||
|
|
||||||
|
#define SWAP(a,b) { uint u = a; a = b; b = u; }
|
||||||
|
|
||||||
|
inline void rrounds(uint x[2][2][2][2][2])
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
int j;
|
||||||
|
int k;
|
||||||
|
int l;
|
||||||
|
int m;
|
||||||
|
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (r = 0; r < CUBEHASH_ROUNDS; ++r) {
|
||||||
|
|
||||||
|
/* "add x_0jklm into x_1jklmn modulo 2^32" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[1][j][k][l][m] += x[0][j][k][l][m];
|
||||||
|
|
||||||
|
/* "rotate x_0jklm upwards by 7 bits" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[0][j][k][l][m] = ROTATEUPWARDS7(x[0][j][k][l][m]);
|
||||||
|
|
||||||
|
/* "swap x_00klm with x_01klm" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
SWAP(x[0][0][k][l][m], x[0][1][k][l][m])
|
||||||
|
|
||||||
|
/* "xor x_1jklm into x_0jklm" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[0][j][k][l][m] ^= x[1][j][k][l][m];
|
||||||
|
|
||||||
|
/* "swap x_1jk0m with x_1jk1m" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
SWAP(x[1][j][k][0][m], x[1][j][k][1][m])
|
||||||
|
|
||||||
|
/* "add x_0jklm into x_1jklm modulo 2^32" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[1][j][k][l][m] += x[0][j][k][l][m];
|
||||||
|
|
||||||
|
/* "rotate x_0jklm upwards by 11 bits" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[0][j][k][l][m] = ROTATEUPWARDS11(x[0][j][k][l][m]);
|
||||||
|
|
||||||
|
/* "swap x_0j0lm with x_0j1lm" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
SWAP(x[0][j][0][l][m], x[0][j][1][l][m])
|
||||||
|
|
||||||
|
/* "xor x_1jklm into x_0jklm" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (m = 0; m < 2; ++m)
|
||||||
|
x[0][j][k][l][m] ^= x[1][j][k][l][m];
|
||||||
|
|
||||||
|
/* "swap x_1jkl0 with x_1jkl1" */
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (j = 0; j < 2; ++j)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (k = 0; k < 2; ++k)
|
||||||
|
//#pragma unroll 2
|
||||||
|
for (l = 0; l < 2; ++l)
|
||||||
|
SWAP(x[1][j][k][l][0], x[1][j][k][l][1])
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -48,14 +48,14 @@ __constant static const sph_u64 SKEIN_IV512_256[8] = {
|
|||||||
|
|
||||||
__constant static const int ROT256[8][4] =
|
__constant static const int ROT256[8][4] =
|
||||||
{
|
{
|
||||||
46, 36, 19, 37,
|
{46, 36, 19, 37},
|
||||||
33, 27, 14, 42,
|
{33, 27, 14, 42},
|
||||||
17, 49, 36, 39,
|
{17, 49, 36, 39},
|
||||||
44, 9, 54, 56,
|
{44, 9, 54, 56},
|
||||||
39, 30, 34, 24,
|
{39, 30, 34, 24},
|
||||||
13, 50, 10, 17,
|
{13, 50, 10, 17},
|
||||||
25, 29, 39, 43,
|
{25, 29, 39, 43},
|
||||||
8, 35, 56, 22,
|
{8, 35, 56, 22}
|
||||||
};
|
};
|
||||||
|
|
||||||
__constant static const sph_u64 skein_ks_parity = 0x1BD11BDAA9FC1A22;
|
__constant static const sph_u64 skein_ks_parity = 0x1BD11BDAA9FC1A22;
|
||||||
|
5
miner.h
5
miner.h
@ -1156,8 +1156,8 @@ extern struct pool *add_pool(void);
|
|||||||
extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass, char *name, char *desc, char *profile, char *algo);
|
extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass, char *name, char *desc, char *profile, char *algo);
|
||||||
|
|
||||||
#define MAX_GPUDEVICES 16
|
#define MAX_GPUDEVICES 16
|
||||||
#define MAX_DEVICES 4096
|
//#define MAX_DEVICES 4096
|
||||||
|
#define MAX_DEVICES 8192
|
||||||
#define MIN_INTENSITY 4
|
#define MIN_INTENSITY 4
|
||||||
#define MIN_INTENSITY_STR "4"
|
#define MIN_INTENSITY_STR "4"
|
||||||
#define MAX_INTENSITY 31
|
#define MAX_INTENSITY 31
|
||||||
@ -1267,6 +1267,7 @@ struct stratum_work {
|
|||||||
size_t cb_len;
|
size_t cb_len;
|
||||||
size_t header_len;
|
size_t header_len;
|
||||||
int merkles;
|
int merkles;
|
||||||
|
double next_diff;
|
||||||
double diff;
|
double diff;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
134
ocl.c
134
ocl.c
@ -37,6 +37,7 @@
|
|||||||
#include "algorithm/neoscrypt.h"
|
#include "algorithm/neoscrypt.h"
|
||||||
#include "algorithm/pluck.h"
|
#include "algorithm/pluck.h"
|
||||||
#include "algorithm/yescrypt.h"
|
#include "algorithm/yescrypt.h"
|
||||||
|
#include "algorithm/Lyra2RE.h"
|
||||||
|
|
||||||
/* FIXME: only here for global config vars, replace with configuration.h
|
/* FIXME: only here for global config vars, replace with configuration.h
|
||||||
* or similar as soon as config is in a struct instead of littered all
|
* or similar as soon as config is in a struct instead of littered all
|
||||||
@ -599,6 +600,88 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
|
|
||||||
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
||||||
|
|
||||||
|
}
|
||||||
|
else if ( !safe_cmp(cgpu->algorithm.name, "lyra2REv2") ) {
|
||||||
|
size_t glob_thread_count;
|
||||||
|
long max_int;
|
||||||
|
unsigned char type = 0;
|
||||||
|
|
||||||
|
// determine which intensity type to use
|
||||||
|
if (cgpu->rawintensity > 0) {
|
||||||
|
glob_thread_count = cgpu->rawintensity;
|
||||||
|
max_int = glob_thread_count;
|
||||||
|
type = 2;
|
||||||
|
}
|
||||||
|
else if (cgpu->xintensity > 0) {
|
||||||
|
glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
|
||||||
|
max_int = cgpu->xintensity;
|
||||||
|
type = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
|
||||||
|
max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
|
||||||
|
|
||||||
|
// if TC * scratchbuf size is too big for memory... reduce to max
|
||||||
|
if ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
|
||||||
|
|
||||||
|
/* Selected intensity will not run on this GPU. Not enough memory.
|
||||||
|
* Adapt the memory setting. */
|
||||||
|
// depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
|
||||||
|
switch (type) {
|
||||||
|
//raw intensity
|
||||||
|
case 2:
|
||||||
|
while ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
|
||||||
|
--glob_thread_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
max_int = glob_thread_count;
|
||||||
|
cgpu->rawintensity = glob_thread_count;
|
||||||
|
break;
|
||||||
|
|
||||||
|
//x intensity
|
||||||
|
case 1:
|
||||||
|
glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE;
|
||||||
|
max_int = glob_thread_count / clState->compute_shaders;
|
||||||
|
|
||||||
|
while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
|
||||||
|
--max_int;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if max_intensity is >0. */
|
||||||
|
if (max_int < MIN_XINTENSITY) {
|
||||||
|
applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
|
||||||
|
max_int = MIN_XINTENSITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
cgpu->xintensity = max_int;
|
||||||
|
glob_thread_count = clState->compute_shaders * (1UL << max_int);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE;
|
||||||
|
while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
|
||||||
|
--max_int;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if max_intensity is >0. */
|
||||||
|
if (max_int < MIN_INTENSITY) {
|
||||||
|
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
|
||||||
|
max_int = MIN_INTENSITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
cgpu->intensity = max_int;
|
||||||
|
glob_thread_count = 1UL << max_int;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TC is glob thread count
|
||||||
|
cgpu->thread_concurrency = glob_thread_count;
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
||||||
|
|
||||||
} else if (!cgpu->opt_tc) {
|
} else if (!cgpu->opt_tc) {
|
||||||
unsigned int sixtyfours;
|
unsigned int sixtyfours;
|
||||||
@ -689,12 +772,17 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
if (clState->n_extra_kernels > 0) {
|
if (clState->n_extra_kernels > 0) {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
char kernel_name[9]; // max: search99 + 0x0
|
char kernel_name[9]; // max: search99 + 0x0
|
||||||
|
char kernel_name2[10]; // max: search99 + 0x0
|
||||||
|
|
||||||
clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel) * clState->n_extra_kernels);
|
clState->extra_kernels = (cl_kernel *)malloc(sizeof(cl_kernel) * clState->n_extra_kernels);
|
||||||
|
|
||||||
for (i = 0; i < clState->n_extra_kernels; i++) {
|
for (i = 0; i < clState->n_extra_kernels; i++) {
|
||||||
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
|
if (i+1<100){
|
||||||
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
|
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
|
||||||
|
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
|
||||||
|
}else {
|
||||||
|
snprintf(kernel_name2, 10, "%s%d", "search", i + 1);
|
||||||
|
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name2, &status);}
|
||||||
if (status != CL_SUCCESS) {
|
if (status != CL_SUCCESS) {
|
||||||
applog(LOG_DEBUG, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
|
applog(LOG_DEBUG, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -739,6 +827,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
else if (!safe_cmp(algorithm->name, "lyra2REv2") ) {
|
||||||
|
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
||||||
|
bufsize = 4 * 4 * 12 * sizeof(unsigned long long) * cgpu->thread_concurrency;
|
||||||
|
buf1size = 4 * sizeof(unsigned long long) * cgpu->thread_concurrency; //matrix
|
||||||
|
|
||||||
|
/* This is the input buffer. For yescrypt this is guaranteed to be
|
||||||
|
* 80 bytes only. */
|
||||||
|
readbufsize = 80;
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size);
|
||||||
|
// scrypt/n-scrypt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (!safe_cmp(algorithm->name, "pluck")) {
|
else if (!safe_cmp(algorithm->name, "pluck")) {
|
||||||
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
||||||
@ -798,7 +901,25 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (!safe_cmp(algorithm->name, "lyra2REv2") ) {
|
||||||
|
// need additionnal buffers
|
||||||
|
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
|
||||||
|
if (status != CL_SUCCESS && !clState->buffer1) {
|
||||||
|
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); // we don't need that much just tired...
|
||||||
|
if (status != CL_SUCCESS && !clState->buffer1) {
|
||||||
|
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* This buffer is weird and might work to some degree even if
|
/* This buffer is weird and might work to some degree even if
|
||||||
* the create buffer call has apparently failed, so check if we
|
* the create buffer call has apparently failed, so check if we
|
||||||
@ -808,6 +929,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
|
applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize/8, NULL, &status); // we don't need that much just tired...
|
||||||
|
// if (status != CL_SUCCESS && !clState->buffer1) {
|
||||||
|
// applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
|
||||||
|
// return NULL;
|
||||||
|
// }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
applog(LOG_DEBUG, "Using read buffer sized %lu", (unsigned long)readbufsize);
|
applog(LOG_DEBUG, "Using read buffer sized %lu", (unsigned long)readbufsize);
|
||||||
|
13
sgminer.c
13
sgminer.c
@ -2265,7 +2265,12 @@ static bool gbt_decode(struct pool *pool, json_t *res_val)
|
|||||||
|
|
||||||
static bool getwork_decode(json_t *res_val, struct work *work)
|
static bool getwork_decode(json_t *res_val, struct work *work)
|
||||||
{
|
{
|
||||||
if (unlikely(!jobj_binary(res_val, "data", work->data, sizeof(work->data), true))) {
|
|
||||||
|
size_t worklen = 128;
|
||||||
|
worklen = ((!safe_cmp(work->pool->algorithm.name, "credits")) ? sizeof(work->data) : worklen);
|
||||||
|
|
||||||
|
|
||||||
|
if (unlikely(!jobj_binary(res_val, "data", work->data, worklen, true))) {
|
||||||
if (opt_morenotices)
|
if (opt_morenotices)
|
||||||
applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), ""));
|
applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), ""));
|
||||||
return false;
|
return false;
|
||||||
@ -3030,7 +3035,11 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */
|
/* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */
|
||||||
hexstr = bin2hex(work->data, ((!safe_cmp(work->pool->algorithm.name, "neoscrypt")) ? 80 : sizeof(work->data)));
|
|
||||||
|
int worksize_default = 128;
|
||||||
|
hexstr = bin2hex(work->data, (!safe_cmp(work->pool->algorithm.name, "neoscrypt") ? 80 : worksize_default));
|
||||||
|
hexstr = bin2hex(work->data, (!safe_cmp(work->pool->algorithm.name, "credits") ? sizeof(work->data) : worksize_default));
|
||||||
|
|
||||||
|
|
||||||
/* build JSON-RPC request */
|
/* build JSON-RPC request */
|
||||||
if (work->gbt) {
|
if (work->gbt) {
|
||||||
|
8
util.c
8
util.c
@ -1560,6 +1560,8 @@ static bool parse_notify(struct pool *pool, json_t *val)
|
|||||||
pool->swork.nbit = nbit;
|
pool->swork.nbit = nbit;
|
||||||
pool->swork.ntime = ntime;
|
pool->swork.ntime = ntime;
|
||||||
pool->swork.clean = clean;
|
pool->swork.clean = clean;
|
||||||
|
pool->swork.diff = pool->swork.next_diff;
|
||||||
|
|
||||||
alloc_len = pool->swork.cb_len = cb1_len + pool->n1_len + pool->n2size + cb2_len;
|
alloc_len = pool->swork.cb_len = cb1_len + pool->n1_len + pool->n2size + cb2_len;
|
||||||
pool->nonce2_offset = cb1_len + pool->n1_len;
|
pool->nonce2_offset = cb1_len + pool->n1_len;
|
||||||
|
|
||||||
@ -1668,8 +1670,8 @@ static bool parse_diff(struct pool *pool, json_t *val)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
cg_wlock(&pool->data_lock);
|
cg_wlock(&pool->data_lock);
|
||||||
old_diff = pool->swork.diff;
|
old_diff = pool->swork.next_diff;
|
||||||
pool->swork.diff = diff;
|
pool->swork.next_diff = diff;
|
||||||
cg_wunlock(&pool->data_lock);
|
cg_wunlock(&pool->data_lock);
|
||||||
|
|
||||||
if (old_diff != diff) {
|
if (old_diff != diff) {
|
||||||
@ -2560,7 +2562,7 @@ out:
|
|||||||
if (!pool->stratum_url)
|
if (!pool->stratum_url)
|
||||||
pool->stratum_url = pool->sockaddr_url;
|
pool->stratum_url = pool->sockaddr_url;
|
||||||
pool->stratum_active = true;
|
pool->stratum_active = true;
|
||||||
pool->swork.diff = 1;
|
pool->swork.next_diff = pool->swork.diff = 1;
|
||||||
if (opt_protocol) {
|
if (opt_protocol) {
|
||||||
applog(LOG_DEBUG, "%s confirmed mining.subscribe with extranonce1 %s extran2size %d",
|
applog(LOG_DEBUG, "%s confirmed mining.subscribe with extranonce1 %s extran2size %d",
|
||||||
get_pool_name(pool), pool->nonce1, pool->n2size);
|
get_pool_name(pool), pool->nonce1, pool->n2size);
|
||||||
|
Loading…
Reference in New Issue
Block a user