Browse Source

Added Lyra2RE algo

windows
troky 10 years ago
parent
commit
8d68e96934
  1. 1
      Makefile.am
  2. 53
      algorithm.c
  3. 3
      algorithm.h
  4. 208
      algorithm/lyra2.c
  5. 50
      algorithm/lyra2.h
  6. 169
      algorithm/lyra2re.c
  7. 10
      algorithm/lyra2re.h
  8. 742
      algorithm/sponge.c
  9. 108
      algorithm/sponge.h
  10. 166
      driver-opencl.c
  11. 139
      findnonce.c
  12. 1
      findnonce.h
  13. 145
      kernel/Lyra2.cl
  14. 392
      kernel/Lyra2RE.cl
  15. 2
      kernel/arebyp.cl
  16. 96
      kernel/blake256.cl
  17. 2042
      kernel/diamond.cl
  18. 415
      kernel/groestl256.cl
  19. 2036
      kernel/groestlcoin.cl
  20. 84
      kernel/keccak1600.cl
  21. 107
      kernel/skein256.cl
  22. 1
      sgminer.c
  23. 6
      winbuild/sgminer.vcxproj
  24. 18
      winbuild/sgminer.vcxproj.filters

1
Makefile.am

@ -69,6 +69,7 @@ sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h @@ -69,6 +69,7 @@ sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h
sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

53
algorithm.c

@ -32,6 +32,7 @@ @@ -32,6 +32,7 @@
#include "algorithm/whirlcoin.h"
#include "algorithm/neoscrypt.h"
#include "algorithm/whirlpoolx.h"
#include "algorithm/lyra2re.h"
#include "compat.h"
@ -54,7 +55,8 @@ const char *algorithm_type_str[] = { @@ -54,7 +55,8 @@ const char *algorithm_type_str[] = {
"Fresh",
"Whirlcoin",
"Neoscrypt",
"WhirlpoolX"
"WhirlpoolX",
"Lyra2RE"
};
void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
@ -653,6 +655,50 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk @@ -653,6 +655,50 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk
return status;
}
static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_int status = 0;
cl_ulong le_target;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(blk->work->blk.ctx_a);
CL_SET_ARG(blk->work->blk.ctx_b);
CL_SET_ARG(blk->work->blk.ctx_c);
CL_SET_ARG(blk->work->blk.ctx_d);
CL_SET_ARG(blk->work->blk.ctx_e);
CL_SET_ARG(blk->work->blk.ctx_f);
CL_SET_ARG(blk->work->blk.ctx_g);
CL_SET_ARG(blk->work->blk.ctx_h);
CL_SET_ARG(blk->work->blk.cty_a);
CL_SET_ARG(blk->work->blk.cty_b);
CL_SET_ARG(blk->work->blk.cty_c);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */
algorithm_type_t type; //common algorithm type
@ -728,6 +774,8 @@ static algorithm_settings_t algos[] = { @@ -728,6 +774,8 @@ static algorithm_settings_t algos[] = {
{ "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL},
{ "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b, c) \
{ a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL}
@ -737,7 +785,6 @@ static algorithm_settings_t algos[] = { @@ -737,7 +785,6 @@ static algorithm_settings_t algos[] = {
#undef A_FUGUE
{ "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL},
{ "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL },
// Terminator (do not remove)
@ -812,6 +859,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa @@ -812,6 +859,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
ALGO_ALIAS("keccak", "maxcoin");
ALGO_ALIAS("whirlpool", "whirlcoin");
ALGO_ALIAS("whirlpoolx", "whirlpoolx");
ALGO_ALIAS("Lyra2RE", "lyra2re");
ALGO_ALIAS("lyra2", "lyra2re");
#undef ALGO_ALIAS
#undef ALGO_ALIAS_NF

3
algorithm.h

@ -25,7 +25,8 @@ typedef enum { @@ -25,7 +25,8 @@ typedef enum {
ALGO_NIST,
ALGO_FRESH,
ALGO_WHIRL,
ALGO_NEOSCRYPT
ALGO_NEOSCRYPT,
ALGO_LYRA2RE
} algorithm_type_t;
extern const char *algorithm_type_str[];

208
algorithm/lyra2.c

@ -0,0 +1,208 @@ @@ -0,0 +1,208 @@
/**
* Implementation of the Lyra2 Password Hashing Scheme (PHS).
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lyra2.h"
#include "sponge.h"
/**
* Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
* whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
* where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
* integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
* of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
*
* @param K The derived key to be output by the algorithm
* @param kLen Desired key length
* @param pwd User password
* @param pwdlen Password length
* @param salt Salt
* @param saltlen Salt length
* @param timeCost Parameter to determine the processing time (T)
* @param nRows Number or rows of the memory matrix (R)
* @param nCols Number of columns of the memory matrix (C)
*
* @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
*/
int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
//============================= Basic variables ============================//
int64_t row = 2; //index of row to be processed
int64_t prev = 1; //index of prev (last row ever computed/modified)
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
int64_t tau; //Time Loop iterator
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
int64_t i; //auxiliary iteration counter
//==========================================================================/
//========== Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
uint64_t *wholeMatrix = (uint64_t*)malloc(i);
if (wholeMatrix == NULL) {
return -1;
}
memset(wholeMatrix, 0, i);
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*));
if (memMatrix == NULL) {
return -1;
}
//Places the pointers in the correct positions
uint64_t *ptrWord = wholeMatrix;
for (i = 0; i < nRows; i++) {
memMatrix[i] = ptrWord;
ptrWord += ROW_LEN_INT64;
}
//==========================================================================/
//============= Getting the password + salt + basil padded with 10*1 ===============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
//but this ensures that the password copied locally will be overwritten as soon as possible
//First, we clean enough blocks for the password, salt, basil and padding
uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
byte *ptrByte = (byte*) wholeMatrix;
memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
//Prepends the password
memcpy(ptrByte, pwd, pwdlen);
ptrByte += pwdlen;
//Concatenates the salt
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nRows, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nCols, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
//Now comes the padding
*ptrByte = 0x80; //first byte of padding: right after the password
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
//==========================================================================/
//======================= Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t));
if (state == NULL) {
return -1;
}
initState(state);
//==========================================================================/
//================================ Setup Phase =============================//
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
ptrWord = wholeMatrix;
for (i = 0; i < nBlocksInput; i++) {
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
}
//Initializes M[0] and M[1]
reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);
do {
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
//updates the value of row* (deterministically picked during Setup))
rowa = (rowa + step) & (window - 1);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
row++;
//Checks if all rows in the window where visited.
if (rowa == 0) {
step = window + gap; //changes the step: approximately doubles its value
window *= 2; //doubles the size of the re-visitation window
gap = -gap; //inverts the modifier to the step
}
} while (row < nRows);
//==========================================================================/
//============================ Wandering Phase =============================//
row = 0; //Resets the visitation to the first row of the memory matrix
for (tau = 1; tau <= timeCost; tau++) {
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
do {
//Selects a pseudorandom index row*
//------------------------------------------------------------------------------------------
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
//------------------------------------------------------------------------------------------
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
} while (row != 0);
}
//==========================================================================/
//============================ Wrap-up Phase ===============================//
//Absorbs the last block of the memory matrix
absorbBlock(state, memMatrix[rowa]);
//Squeezes the key
squeeze(state, (unsigned char*)K, kLen);
//==========================================================================/
//========================= Freeing the memory =============================//
free(memMatrix);
free(wholeMatrix);
//Wiping out the sponge's internal state before freeing it
memset(state, 0, 16 * sizeof (uint64_t));
free(state);
//==========================================================================/
return 0;
}

50
algorithm/lyra2.h

@ -0,0 +1,50 @@ @@ -0,0 +1,50 @@
/**
* Header file for the Lyra2 Password Hashing Scheme (PHS).
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LYRA2_H_
#define LYRA2_H_
#include <stdint.h>
typedef unsigned char byte;
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)
#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes
#ifdef BLOCK_LEN_BITS
#define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t)
#define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes
#else //default block lenght: 768 bits
#define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t)
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
#endif
#ifndef N_COLS
#define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default
#endif
#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row
int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
#endif /* LYRA2_H_ */

169
algorithm/lyra2re.c

@ -0,0 +1,169 @@ @@ -0,0 +1,169 @@
/*-
* Copyright 2014 James Lovejoy
* Copyright 2014 phm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_blake.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_keccak.h"
#include "lyra2.h"
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static inline void
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{
uint32_t i;
for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]);
}
inline void lyra2rehash(void *state, const void *input)
{
sph_blake256_context ctx_blake;
sph_groestl256_context ctx_groestl;
sph_keccak256_context ctx_keccak;
sph_skein256_context ctx_skein;
uint32_t hashA[8], hashB[8];
sph_blake256_init(&ctx_blake);
sph_blake256 (&ctx_blake, input, 80);
sph_blake256_close (&ctx_blake, hashA);
sph_keccak256_init(&ctx_keccak);
sph_keccak256 (&ctx_keccak,hashA, 32);
sph_keccak256_close(&ctx_keccak, hashB);
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
sph_skein256_init(&ctx_skein);
sph_skein256 (&ctx_skein, hashA, 32);
sph_skein256_close(&ctx_skein, hashB);
sph_groestl256_init(&ctx_groestl);
sph_groestl256 (&ctx_groestl, hashB, 32);
sph_groestl256_close(&ctx_groestl, hashA);
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
memcpy(state, hashA, 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
lyra2rehash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void lyra2re_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
lyra2rehash(ohash, data);
}
bool scanhash_lyra2re(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1) {
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
lyra2rehash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx",
(long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg)) {
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart)) {
*last_nonce = n;
break;
}
}
return ret;
}

10
algorithm/lyra2re.h

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
#ifndef LYRA2RE_H
#define LYRA2RE_H
#include "miner.h"
extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void lyra2re_regenhash(struct work *work);
#endif /* LYRA2RE_H */

742
algorithm/sponge.c

@ -0,0 +1,742 @@ @@ -0,0 +1,742 @@
/**
* A simple implementation of Blake2b's internal permutation
* in the form of a sponge.
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
#include <time.h>
#include "sponge.h"
#include "lyra2.h"
/**
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
* typically have their internal state initialized with zeros, Blake2b's G function
* has a fixed point: if the internal state and message are both filled with zeros. the
* resulting permutation will always be a block filled with zeros; this happens because
* Blake2b does not use the constants originally employed in Blake2 inside its G function,
* relying on the IV for avoiding possible fixed points.
*
* @param state The 1024-bit array to be initialized
*/
void initState(uint64_t state[/*16*/]) {
//First 512 bis are zeros
memset(state, 0, 64);
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
state[8] = blake2b_IV[0];
state[9] = blake2b_IV[1];
state[10] = blake2b_IV[2];
state[11] = blake2b_IV[3];
state[12] = blake2b_IV[4];
state[13] = blake2b_IV[5];
state[14] = blake2b_IV[6];
state[15] = blake2b_IV[7];
}
/**
* Execute Blake2b's G function, with all 12 rounds.
*
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
*/
static void blake2bLyra(uint64_t *v) {
ROUND_LYRA(0);
ROUND_LYRA(1);
ROUND_LYRA(2);
ROUND_LYRA(3);
ROUND_LYRA(4);
ROUND_LYRA(5);
ROUND_LYRA(6);
ROUND_LYRA(7);
ROUND_LYRA(8);
ROUND_LYRA(9);
ROUND_LYRA(10);
ROUND_LYRA(11);
}
/**
* Executes a reduced version of Blake2b's G function with only one round
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
*/
static void reducedBlake2bLyra(uint64_t *v) {
ROUND_LYRA(0);
}
/**
* Performs a squeeze operation, using Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param out Array that will receive the data squeezed
* @param len The number of bytes to be squeezed into the "out" array
*/
void squeeze(uint64_t *state, byte *out, unsigned int len) {
int fullBlocks = len / BLOCK_LEN_BYTES;
byte *ptr = out;
int i;
//Squeezes full blocks
for (i = 0; i < fullBlocks; i++) {
memcpy(ptr, state, BLOCK_LEN_BYTES);
blake2bLyra(state);
ptr += BLOCK_LEN_BYTES;
}
//Squeezes remaining bytes
memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
}
/**
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
* of type uint64_t), using Blake2b's G function as the internal permutation
*
* @param state The current state of the sponge
* @param in The block to be absorbed (BLOCK_LEN_INT64 words)
*/
void absorbBlock(uint64_t *state, const uint64_t *in) {
//XORs the first BLOCK_LEN_INT64 words of "in" with the current state
state[0] ^= in[0];
state[1] ^= in[1];
state[2] ^= in[2];
state[3] ^= in[3];
state[4] ^= in[4];
state[5] ^= in[5];
state[6] ^= in[6];
state[7] ^= in[7];
state[8] ^= in[8];
state[9] ^= in[9];
state[10] ^= in[10];
state[11] ^= in[11];
//Applies the transformation f to the sponge's state
blake2bLyra(state);
}
/**
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
* words of type uint64_t), using Blake2b's G function as the internal permutation
*
* @param state The current state of the sponge
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
*/
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
state[0] ^= in[0];
state[1] ^= in[1];
state[2] ^= in[2];
state[3] ^= in[3];
state[4] ^= in[4];
state[5] ^= in[5];
state[6] ^= in[6];
state[7] ^= in[7];
//Applies the transformation f to the sponge's state
blake2bLyra(state);
}
/**
* Performs a reduced squeeze operation for a single row, from the highest to
* the lowest index, using the reduced-round Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param rowOut Row to receive the data squeezed
*/
void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
int i;
//M[row][C-1-col] = H.reduced_squeeze()
for (i = 0; i < N_COLS; i++) {
ptrWord[0] = state[0];
ptrWord[1] = state[1];
ptrWord[2] = state[2];
ptrWord[3] = state[3];
ptrWord[4] = state[4];
ptrWord[5] = state[5];
ptrWord[6] = state[6];
ptrWord[7] = state[7];
ptrWord[8] = state[8];
ptrWord[9] = state[9];
ptrWord[10] = state[10];
ptrWord[11] = state[11];
//Goes to next block (column) that will receive the squeezed data
ptrWord -= BLOCK_LEN_INT64;
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
}
}
/**
* Performs a reduced duplex operation for a single row, from the highest to
* the lowest index, using the reduced-round Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param rowIn Row to feed the sponge
* @param rowOut Row to receive the sponge's output
*/
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev][col]"
state[0] ^= (ptrWordIn[0]);
state[1] ^= (ptrWordIn[1]);
state[2] ^= (ptrWordIn[2]);
state[3] ^= (ptrWordIn[3]);
state[4] ^= (ptrWordIn[4]);
state[5] ^= (ptrWordIn[5]);
state[6] ^= (ptrWordIn[6]);
state[7] ^= (ptrWordIn[7]);
state[8] ^= (ptrWordIn[8]);
state[9] ^= (ptrWordIn[9]);
state[10] ^= (ptrWordIn[10]);
state[11] ^= (ptrWordIn[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][C-1-col] = M[prev][col] XOR rand
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
//Input: next column (i.e., next block in sequence)
ptrWordIn += BLOCK_LEN_INT64;
//Output: goes to previous column
ptrWordOut -= BLOCK_LEN_INT64;
}
}
/**
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
* the wordwise addition of two columns, ignoring carries between words). The
* output of this operation, "rand", is then used to make
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left and N_COLS is a system parameter.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][col] = M[prev][col] XOR rand
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[11];
ptrWordInOut[1] ^= state[0];
ptrWordInOut[2] ^= state[1];
ptrWordInOut[3] ^= state[2];
ptrWordInOut[4] ^= state[3];
ptrWordInOut[5] ^= state[4];
ptrWordInOut[6] ^= state[5];
ptrWordInOut[7] ^= state[6];
ptrWordInOut[8] ^= state[7];
ptrWordInOut[9] ^= state[8];
ptrWordInOut[10] ^= state[9];
ptrWordInOut[11] ^= state[10];
//Inputs: next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
//Output: goes to previous column
ptrWordOut -= BLOCK_LEN_INT64;
}
}
/**
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
* the wordwise addition of two columns, ignoring carries between words). The
* output of this operation, "rand", is then used to make
* "M[rowOut][col] = M[rowOut][col] XOR rand" and
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[rowOut][col] = M[rowOut][col] XOR rand
ptrWordOut[0] ^= state[0];
ptrWordOut[1] ^= state[1];
ptrWordOut[2] ^= state[2];
ptrWordOut[3] ^= state[3];
ptrWordOut[4] ^= state[4];
ptrWordOut[5] ^= state[5];
ptrWordOut[6] ^= state[6];
ptrWordOut[7] ^= state[7];
ptrWordOut[8] ^= state[8];
ptrWordOut[9] ^= state[9];
ptrWordOut[10] ^= state[10];
ptrWordOut[11] ^= state[11];
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[11];
ptrWordInOut[1] ^= state[0];
ptrWordInOut[2] ^= state[1];
ptrWordInOut[3] ^= state[2];
ptrWordInOut[4] ^= state[3];
ptrWordInOut[5] ^= state[4];
ptrWordInOut[6] ^= state[5];
ptrWordInOut[7] ^= state[6];
ptrWordInOut[8] ^= state[7];
ptrWordInOut[9] ^= state[8];
ptrWordInOut[10] ^= state[9];
ptrWordInOut[11] ^= state[10];
//Goes to next block
ptrWordOut += BLOCK_LEN_INT64;
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0];
state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1];
state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2];
state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3];
state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4];
state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5];
state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6];
state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7];
state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8];
state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9];
state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10];
state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][col] = rand
ptrWordOut[0] = state[0];
ptrWordOut[1] = state[1];
ptrWordOut[2] = state[2];
ptrWordOut[3] = state[3];
ptrWordOut[4] = state[4];
ptrWordOut[5] = state[5];
ptrWordOut[6] = state[6];
ptrWordOut[7] = state[7];
ptrWordOut[8] = state[8];
ptrWordOut[9] = state[9];
ptrWordOut[10] = state[10];
ptrWordOut[11] = state[11];
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut;
int i;
for (i = 0; i < N_COLS / 2; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += 2 * BLOCK_LEN_INT64;
}
ptrWordOut = rowOut + BLOCK_LEN_INT64;
for (i = 0; i < N_COLS / 2; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += 2 * BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
* to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)",
* where rotW is a 64-bit rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[rowOut][col] = M[rowOut][col] XOR rand
ptrWordOut[0] ^= state[0];
ptrWordOut[1] ^= state[1];
ptrWordOut[2] ^= state[2];
ptrWordOut[3] ^= state[3];
ptrWordOut[4] ^= state[4];
ptrWordOut[5] ^= state[5];
ptrWordOut[6] ^= state[6];
ptrWordOut[7] ^= state[7];
ptrWordOut[8] ^= state[8];
ptrWordOut[9] ^= state[9];
ptrWordOut[10] ^= state[10];
ptrWordOut[11] ^= state[11];
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
//Goes to next block
ptrWordOut += BLOCK_LEN_INT64;
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
}
}
*/
/**
Prints an array of unsigned chars
*/
void printArray(unsigned char *array, unsigned int size, char *name) {
int i;
printf("%s: ", name);
for (i = 0; i < size; i++) {
printf("%2x|", array[i]);
}
printf("\n");
}
////////////////////////////////////////////////////////////////////////////////////////////////

108
algorithm/sponge.h

@ -0,0 +1,108 @@ @@ -0,0 +1,108 @@
/**
* Header file for Blake2b's internal permutation in the form of a sponge.
* This code is based on the original Blake2b's implementation provided by
* Samuel Neves (https://blake2.net/)
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPONGE_H_
#define SPONGE_H_
#include <stdint.h>
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
/*Blake2b IV Array*/
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/*Blake2b's rotation*/
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
return ( w >> c ) | ( w << ( 64 - c ) );
}
/*Blake2b's G function*/
#define G(r,i,a,b,c,d) \
do { \
a = a + b; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
/*One Round of the Blake2b's compression function*/
#define ROUND_LYRA(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
//---- Housekeeping
void initState(uint64_t state[/*16*/]);
//---- Squeezes
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
void reducedSqueezeRow0(uint64_t* state, uint64_t* row);
//---- Absorbs
void absorbBlock(uint64_t *state, const uint64_t *in);
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
//---- Duplexes
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//---- Misc
void printArray(unsigned char *array, unsigned int size, char *name);
////////////////////////////////////////////////////////////////////////////////////////////////
////TESTS////
//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
/////////////
#endif /* SPONGE_H_ */

166
driver-opencl.c

@ -284,7 +284,7 @@ char *set_gpu_engine(const char *_arg) @@ -284,7 +284,7 @@ char *set_gpu_engine(const char *_arg)
char *arg = (char *)alloca(strlen(_arg) + 1);
strcpy(arg, _arg);
if(!(nextptr = strtok(arg, ",")))
if (!(nextptr = strtok(arg, ",")))
return "Invalid parameters for set gpu engine";
do {
@ -408,12 +408,12 @@ char *set_gpu_memdiff(char *arg) @@ -408,12 +408,12 @@ char *set_gpu_memdiff(char *arg)
gpus[device++].gpu_memdiff = val;
}
if (device == 1) {
for (i = device; i < MAX_GPUDEVICES; i++)
gpus[i].gpu_memdiff = gpus[0].gpu_memdiff;
}
if (device == 1) {
for (i = device; i < MAX_GPUDEVICES; i++)
gpus[i].gpu_memdiff = gpus[0].gpu_memdiff;
}
return NULL;
return NULL;
}
char *set_gpu_powertune(char *arg)
@ -630,12 +630,12 @@ char *set_xintensity(const char *_arg) @@ -630,12 +630,12 @@ char *set_xintensity(const char *_arg)
device++;
}
if (device == 1)
for (i = device; i < MAX_GPUDEVICES; i++) {
gpus[i].dynamic = gpus[0].dynamic;
gpus[i].intensity = gpus[0].intensity;
gpus[i].rawintensity = gpus[0].rawintensity;
gpus[i].xintensity = gpus[0].xintensity;
}
for (i = device; i < MAX_GPUDEVICES; i++) {
gpus[i].dynamic = gpus[0].dynamic;
gpus[i].intensity = gpus[0].intensity;
gpus[i].rawintensity = gpus[0].rawintensity;
gpus[i].xintensity = gpus[0].xintensity;
}
return NULL;
}
@ -673,12 +673,12 @@ char *set_rawintensity(const char *_arg) @@ -673,12 +673,12 @@ char *set_rawintensity(const char *_arg)
device++;
}
if (device == 1)
for (i = device; i < MAX_GPUDEVICES; i++) {
gpus[i].dynamic = gpus[0].dynamic;
gpus[i].intensity = gpus[0].intensity;
gpus[i].rawintensity = gpus[0].rawintensity;
gpus[i].xintensity = gpus[0].xintensity;
}
for (i = device; i < MAX_GPUDEVICES; i++) {
gpus[i].dynamic = gpus[0].dynamic;
gpus[i].intensity = gpus[0].intensity;
gpus[i].rawintensity = gpus[0].rawintensity;
gpus[i].xintensity = gpus[0].xintensity;
}
return NULL;
}
@ -799,22 +799,22 @@ retry: // TODO: refactor @@ -799,22 +799,22 @@ retry: // TODO: refactor
displayed_rolling = thr->rolling;
if (!mhash_base)
displayed_rolling *= 1000;
wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K" , cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K", cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
switch (cgpu->status) {
default:
case LIFE_WELL:
wlog("ALIVE");
break;
case LIFE_SICK:
wlog("SICK reported in %s", checkin);
break;
case LIFE_DEAD:
wlog("DEAD reported in %s", checkin);
break;
case LIFE_INIT:
case LIFE_NOSTART:
wlog("Never started");
break;
default:
case LIFE_WELL:
wlog("ALIVE");
break;
case LIFE_SICK:
wlog("SICK reported in %s", checkin);
break;
case LIFE_DEAD:
wlog("DEAD reported in %s", checkin);
break;
case LIFE_INIT:
case LIFE_NOSTART:
wlog("Never started");
break;
}
if (thr->pause)
wlog(" paused");
@ -825,7 +825,7 @@ retry: // TODO: refactor @@ -825,7 +825,7 @@ retry: // TODO: refactor
wlog("\n");
}
wlogprint("[E]nable [D]isable [R]estart GPU %s\n",adl_active ? "[C]hange settings" : "");
wlogprint("[E]nable [D]isable [R]estart GPU %s\n", adl_active ? "[C]hange settings" : "");
wlogprint("[I]ntensity E[x]perimental intensity R[a]w Intensity\n");
wlogprint("Or press any other key to continue\n");
@ -868,7 +868,8 @@ retry: // TODO: refactor @@ -868,7 +868,8 @@ retry: // TODO: refactor
}
rd_unlock(&mining_thr_lock);
goto retry;
} else if (!strncasecmp(&input, "d", 1)) {
}
else if (!strncasecmp(&input, "d", 1)) {
if (selected)
selected = curses_int("Select GPU to disable");
if (selected < 0 || selected >= nDevs) {
@ -881,7 +882,8 @@ retry: // TODO: refactor @@ -881,7 +882,8 @@ retry: // TODO: refactor
}
gpus[selected].deven = DEV_DISABLED;
goto retry;
} else if (!strncasecmp(&input, "i", 1)) {
}
else if (!strncasecmp(&input, "i", 1)) {
int intensity;
char *intvar;
@ -893,8 +895,8 @@ retry: // TODO: refactor @@ -893,8 +895,8 @@ retry: // TODO: refactor
}
intvar = curses_input("Set GPU scan intensity (d or "
MIN_INTENSITY_STR " -> "
MAX_INTENSITY_STR ")");
MIN_INTENSITY_STR " -> "
MAX_INTENSITY_STR ")");
if (!intvar) {
wlogprint("Invalid input\n");
goto retry;
@ -927,7 +929,8 @@ retry: // TODO: refactor @@ -927,7 +929,8 @@ retry: // TODO: refactor
pause_dynamic_threads(selected);
goto retry;
} else if (!strncasecmp(&input, "x", 1)) {
}
else if (!strncasecmp(&input, "x", 1)) {
int xintensity;
char *intvar;
@ -960,7 +963,8 @@ retry: // TODO: refactor @@ -960,7 +963,8 @@ retry: // TODO: refactor
pause_dynamic_threads(selected);
goto retry;
} else if (!strncasecmp(&input, "a", 1)) {
}
else if (!strncasecmp(&input, "a", 1)) {
int rawintensity;
char *intvar;
@ -993,7 +997,8 @@ retry: // TODO: refactor @@ -993,7 +997,8 @@ retry: // TODO: refactor
pause_dynamic_threads(selected);
goto retry;
} else if (!strncasecmp(&input, "r", 1)) {
}
else if (!strncasecmp(&input, "r", 1)) {
if (selected)
selected = curses_int("Select GPU to attempt to restart");
if (selected < 0 || selected >= nDevs) {
@ -1003,7 +1008,8 @@ retry: // TODO: refactor @@ -1003,7 +1008,8 @@ retry: // TODO: refactor
wlogprint("Attempting to restart threads of GPU %d\n", selected);
reinit_device(&gpus[selected]);
goto retry;
} else if (adl_active && (!strncasecmp(&input, "c", 1))) {
}
else if (adl_active && (!strncasecmp(&input, "c", 1))) {
if (selected)
selected = curses_int("Select GPU to change settings on");
if (selected < 0 || selected >= nDevs) {
@ -1012,7 +1018,8 @@ retry: // TODO: refactor @@ -1012,7 +1018,8 @@ retry: // TODO: refactor
}
change_gpusettings(selected);
goto retry;
} else
}
else
clear_logwin();
immedok(logwin, false);
@ -1027,8 +1034,8 @@ void manage_gpu(void) @@ -1027,8 +1034,8 @@ void manage_gpu(void)
static _clState *clStates[MAX_GPUDEVICES];
static void set_threads_hashes(unsigned int vectors, unsigned int compute_shaders, int64_t *hashes, size_t *globalThreads,
unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity,
__maybe_unused int *rawintensity, algorithm_t *algorithm)
unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity,
__maybe_unused int *rawintensity, algorithm_t *algorithm)
{
unsigned int threads = 0;
while (threads < minthreads) {
@ -1037,7 +1044,7 @@ static void set_threads_hashes(unsigned int vectors, unsigned int compute_shader @@ -1037,7 +1044,7 @@ static void set_threads_hashes(unsigned int vectors, unsigned int compute_shader
threads = *rawintensity;
}
else if (*xintensity > 0) {
threads = compute_shaders * ((algorithm->xintensity_shift)?(1 << (algorithm->xintensity_shift + *xintensity)):*xintensity);
threads = compute_shaders * ((algorithm->xintensity_shift) ? (1 << (algorithm->xintensity_shift + *xintensity)) : *xintensity);
}
else {
threads = 1 << (algorithm->intensity_shift + *intensity);
@ -1102,7 +1109,8 @@ select_cgpu: @@ -1102,7 +1109,8 @@ select_cgpu:
applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
cg_completion_timeout(&thr_info_cancel_join, thr, 5000);
thr->cgpu->drv->thread_shutdown(thr);
} else
}
else
applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
}
rd_unlock(&mining_thr_lock);
@ -1234,7 +1242,8 @@ static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_inf @@ -1234,7 +1242,8 @@ static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_inf
else
tailsprintf(buf, bufsiz, " ");
tailsprintf(buf, bufsiz, "| ");
} else
}
else
gpu->drv->get_statline_before = &blank_get_statline_before;
}
#endif
@ -1250,7 +1259,7 @@ static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu) @@ -1250,7 +1259,7 @@ static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu)
}
struct opencl_thread_data {
cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
cl_int(*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
uint32_t *res;
};
@ -1340,7 +1349,7 @@ static bool opencl_thread_init(struct thr_info *thr) @@ -1340,7 +1349,7 @@ static bool opencl_thread_init(struct thr_info *thr)
}
status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
buffersize, blank_res, 0, NULL, NULL);
buffersize, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
free(thrdata->res);
free(thrdata);
@ -1357,7 +1366,13 @@ static bool opencl_thread_init(struct thr_info *thr) @@ -1357,7 +1366,13 @@ static bool opencl_thread_init(struct thr_info *thr)
static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
{
work->blk.work = work;
if (!safe_cmp(work->pool->algorithm.name, "Lyra2RE")) {
work->blk.work = work;
precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data));
}
else {
work->blk.work = work;
}
thr->pool_no = work->pool->pool_no;
return true;
}
@ -1365,7 +1380,7 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work @@ -1365,7 +1380,7 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work
extern int opt_dynamic_interval;
static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
int64_t __maybe_unused max_nonce)
int64_t __maybe_unused max_nonce)
{
const int thr_id = thr->id;
struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data;
@ -1376,11 +1391,11 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1376,11 +1391,11 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
cl_int status;
size_t globalThreads[1];
size_t localThreads[1] = { clState->wsize };
size_t *p_global_work_offset = NULL;
size_t *p_global_work_offset = NULL;
int64_t hashes;
int found = gpu->algorithm.found_idx;
int buffersize = BUFFERSIZE;
unsigned int i;
unsigned int i;
/* Windows' timer resolution is only 15ms so oversample 5x */
if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
@ -1392,7 +1407,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1392,7 +1407,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (gpu_us > dynamic_us) {
if (gpu->intensity > MIN_INTENSITY)
--gpu->intensity;
} else if (gpu_us < dynamic_us / 2) {
}
else if (gpu_us < dynamic_us / 2) {
if (gpu->intensity < MAX_INTENSITY)
++gpu->intensity;
}
@ -1401,7 +1417,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1401,7 +1417,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
}
set_threads_hashes(clState->vwidth, clState->compute_shaders, &hashes, globalThreads, localThreads[0],
&gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm);
&gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm);
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;
@ -1411,27 +1427,27 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1411,27 +1427,27 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1;
}
if (clState->goffset)
p_global_work_offset = (size_t *)&work->blk.nonce;
if (clState->goffset)
p_global_work_offset = (size_t *)&work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
for (i = 0; i < clState->n_extra_kernels; i++) {
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}
}
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1;
@ -1449,7 +1465,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, @@ -1449,7 +1465,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (thrdata->res[found]) {
/* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, blank_res, 0, NULL, NULL);
buffersize, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
return -1;
@ -1471,7 +1487,7 @@ static void opencl_thread_shutdown(struct thr_info *thr) @@ -1471,7 +1487,7 @@ static void opencl_thread_shutdown(struct thr_info *thr)
const int thr_id = thr->id;
_clState *clState = clStates[thr_id];
clStates[thr_id] = NULL;
unsigned int i;
unsigned int i;
if (clState) {
clFinish(clState->commandQueue);
@ -1480,8 +1496,8 @@ static void opencl_thread_shutdown(struct thr_info *thr) @@ -1480,8 +1496,8 @@ static void opencl_thread_shutdown(struct thr_info *thr)
if (clState->padbuffer8)
clReleaseMemObject(clState->padbuffer8);
clReleaseKernel(clState->kernel);
for (i = 0; i < clState->n_extra_kernels; i++)
clReleaseKernel(clState->extra_kernels[i]);
for (i = 0; i < clState->n_extra_kernels; i++)
clReleaseKernel(clState->extra_kernels[i]);
clReleaseProgram(clState->program);
clReleaseCommandQueue(clState->commandQueue);
clReleaseContext(clState->context);
@ -1503,7 +1519,7 @@ struct device_drv opencl_drv = { @@ -1503,7 +1519,7 @@ struct device_drv opencl_drv = {
#ifdef HAVE_ADL
/*.get_statline_before = */ get_opencl_statline_before,
#else
NULL,
NULL,
#endif
/*.get_statline = */ get_opencl_statline,
/*.api_data = */ NULL,
@ -1524,7 +1540,7 @@ struct device_drv opencl_drv = { @@ -1524,7 +1540,7 @@ struct device_drv opencl_drv = {
/*.hw_error = */ NULL,
/*.thread_shutdown = */ opencl_thread_shutdown,
/*.thread_enable =*/ NULL,
false,
0,
0
false,
0,
0
};

139
findnonce.c

@ -92,7 +92,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) @@ -92,7 +92,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
blk->PreVal4_2 = blk->PreVal4 + blk->T1;
blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
@ -191,7 +191,7 @@ static void *postcalc_hash(void *userdata) @@ -191,7 +191,7 @@ static void *postcalc_hash(void *userdata)
* end of the res[] array */
if (unlikely(pcd->res[found] & ~found)) {
applog(LOG_WARNING, "%s%d: invalid nonce count - HW error",
thr->cgpu->drv->name, thr->cgpu->device_id);
thr->cgpu->drv->name, thr->cgpu->device_id);
hw_errors++;
thr->cgpu->hw_errors++;
pcd->res[found] &= found;
@ -200,7 +200,7 @@ static void *postcalc_hash(void *userdata) @@ -200,7 +200,7 @@ static void *postcalc_hash(void *userdata)
for (entry = 0; entry < pcd->res[found]; entry++) {
uint32_t nonce = pcd->res[entry];
if (found == 0x0F)
nonce = swab32(nonce);
nonce = swab32(nonce);
applog(LOG_DEBUG, "[THR%d] OCL NONCE %08x (%lu) found in slot %d (found = %d)", thr->id, nonce, nonce, entry, found);
submit_nonce(thr, pcd->work, nonce);
@ -234,3 +234,136 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res) @@ -234,3 +234,136 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
free(pcd);
}
}
// BLAKE 256 14 rounds (standard)
typedef struct
{
uint32_t h[8];
uint32_t t;
} blake_state256;
#define NB_ROUNDS32 14
const uint8_t blake_sigma[][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};
const uint32_t blake_u256[16] =
{
0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917
};
#define ROT32(x,n) (((x)<<(32-n))|( (x)>>(n)))
//#define ROT32(x,n) (rotate((uint)x, (uint)32-n))
#define ADD32(x,y) ((uint32_t)((x) + (y)))
#define XOR32(x,y) ((uint32_t)((x) ^ (y)))
#define G(a,b,c,d,i) \
do { \
v[a] += XOR32(m[blake_sigma[r][i]], blake_u256[blake_sigma[r][i + 1]]) + v[b]; \
v[d] = ROT32(XOR32(v[d], v[a]), 16); \
v[c] += v[d]; \
v[b] = ROT32(XOR32(v[b], v[c]), 12); \
v[a] += XOR32(m[blake_sigma[r][i + 1]], blake_u256[blake_sigma[r][i]]) + v[b]; \
v[d] = ROT32(XOR32(v[d], v[a]), 8); \
v[c] += v[d]; \
v[b] = ROT32(XOR32(v[b], v[c]), 7); \
} while (0)
// compress a block
void blake256_compress_block(blake_state256 *S, uint32_t *m)
{
uint32_t v[16];
int i, r;
for (i = 0; i < 8; ++i) v[i] = S->h[i];
v[8] = blake_u256[0];
v[9] = blake_u256[1];
v[10] = blake_u256[2];
v[11] = blake_u256[3];
v[12] = blake_u256[4];
v[13] = blake_u256[5];
v[14] = blake_u256[6];
v[15] = blake_u256[7];
v[12] ^= S->t;
v[13] ^= S->t;
for (r = 0; r < NB_ROUNDS32; ++r)
{
/* column step */
G(0, 4, 8, 12, 0);
G(1, 5, 9, 13, 2);
G(2, 6, 10, 14, 4);
G(3, 7, 11, 15, 6);
/* diagonal step */
G(0, 5, 10, 15, 8);
G(1, 6, 11, 12, 10);
G(2, 7, 8, 13, 12);
G(3, 4, 9, 14, 14);
}
for (i = 0; i < 16; ++i) S->h[i & 7] ^= v[i];
}
void blake256_init(blake_state256 *S)
{
S->h[0] = 0x6a09e667;
S->h[1] = 0xbb67ae85;
S->h[2] = 0x3c6ef372;
S->h[3] = 0xa54ff53a;
S->h[4] = 0x510e527f;
S->h[5] = 0x9b05688c;
S->h[6] = 0x1f83d9ab;
S->h[7] = 0x5be0cd19;
S->t = 0;
}
void blake256_update(blake_state256 *S, const uint32_t *in)
{
uint32_t m[16];
int i;
S->t = 512;
for (i = 0; i < 16; ++i) m[i] = in[i];
blake256_compress_block(S, m);
}
void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
{
blake_state256 S;
blake256_init(&S);
blake256_update(&S, data);
blk->ctx_a = S.h[0];
blk->ctx_b = S.h[1];
blk->ctx_c = S.h[2];
blk->ctx_d = S.h[3];
blk->ctx_e = S.h[4];
blk->ctx_f = S.h[5];
blk->ctx_g = S.h[6];
blk->ctx_h = S.h[7];
blk->cty_a = data[16];
blk->cty_b = data[17];
blk->cty_c = data[18];
}

1
findnonce.h

@ -10,5 +10,6 @@ @@ -10,5 +10,6 @@
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
#endif /*FINDNONCE_H*/

145
kernel/Lyra2.cl

@ -0,0 +1,145 @@ @@ -0,0 +1,145 @@
/*
* Lyra2 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
* Copyright (c) 2014 djm34
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
/*Blake2b IV Array*/
__constant static const sph_u64 blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/*Blake2b's rotation*/
static inline sph_u64 rotr64( const sph_u64 w, const unsigned c ){
return rotate(w, (ulong)(64-c));
}
/*Blake2b's G function*/
#define G(a,b,c,d) \
do { \
a += b; d ^= a; d = SPH_ROTR64(d, 32); \
c += d; b ^= c; b = SPH_ROTR64(b, 24); \
a += b; d ^= a; d = SPH_ROTR64(d, 16); \
c += d; b ^= c; b = SPH_ROTR64(b, 63); \
} while(0)
/*One Round of the Blake2b's compression function*/
#define round_lyra(v) \
do { \
G(v[ 0],v[ 4],v[ 8],v[12]); \
G(v[ 1],v[ 5],v[ 9],v[13]); \
G(v[ 2],v[ 6],v[10],v[14]); \
G(v[ 3],v[ 7],v[11],v[15]); \
G(v[ 0],v[ 5],v[10],v[15]); \
G(v[ 1],v[ 6],v[11],v[12]); \
G(v[ 2],v[ 7],v[ 8],v[13]); \
G(v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \
{ \
for (int i = 0; i < 8; i++) \
{ \
\
for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \
round_lyra(state); \
for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
\
Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
} \
\
}
#define reduceDuplexRow(rowIn, rowInOut, rowOut) \
{ \
for (int i = 0; i < 8; i++) \
{ \
for (int j = 0; j < 12; j++) \
state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \
\
round_lyra(state); \
for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
\
Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
} \
\
}
#define absorbblock(in) { \
state[0] ^= Matrix[0][in]; \
state[1] ^= Matrix[1][in]; \
state[2] ^= Matrix[2][in]; \
state[3] ^= Matrix[3][in]; \
state[4] ^= Matrix[4][in]; \
state[5] ^= Matrix[5][in]; \
state[6] ^= Matrix[6][in]; \
state[7] ^= Matrix[7][in]; \
state[8] ^= Matrix[8][in]; \
state[9] ^= Matrix[9][in]; \
state[10] ^= Matrix[10][in]; \
state[11] ^= Matrix[11][in]; \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
round_lyra(state); \
}

392
kernel/Lyra2RE.cl

@ -0,0 +1,392 @@ @@ -0,0 +1,392 @@
/*
* Lyra2RE kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
* Copyright (c) 2014 djm34
* Copyright (c) 2014 James Lovejoy
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#ifndef LYRA2RE_CL
#define LYRA2RE_CL
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
#else
#define SPH_BIG_ENDIAN 1
#endif
#define SPH_UPTR sph_u64
typedef unsigned int sph_u32;
typedef int sph_s32;
#ifndef __OPENCL_VERSION__
typedef unsigned long long sph_u64;
typedef long long sph_s64;
#else
typedef unsigned long sph_u64;
typedef long sph_s64;
#endif
#define SPH_64 1
#define SPH_64_TRUE 1
#define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6
#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n))
#define SPH_ROTL64(x,n) rotate(x,(ulong)n)
#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n))
#include "blake256.cl"
#include "groestl256.cl"
#include "Lyra2.cl"
#include "keccak1600.cl"
#include "skein256.cl"
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
#if SPH_BIG_ENDIAN
#define DEC64E(x) (x)
#define DEC64BE(x) (*(const __global sph_u64 *) (x));
#define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x));
#define DEC32LE(x) (*(const __global sph_u32 *) (x));
#else
#define DEC64E(x) SWAP8(x)
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
#define DEC64LE(x) (*(const __global sph_u64 *) (x));
#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x));
#endif
typedef union {
unsigned char h1[64];
uint h4[16];
ulong h8[8];
} hash_t;
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(
__global hash_t* hashes,
// precalc hash from fisrt part of message
const uint h0,
const uint h1,
const uint h2,
const uint h3,
const uint h4,
const uint h5,
const uint h6,
const uint h7,
// last 12 bytes of original message
const uint in16,
const uint in17,
const uint in18
)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u32 h[8];
sph_u32 m[16];
sph_u32 v[16];
h[0]=h0;
h[1]=h1;
h[2]=h2;
h[3]=h3;
h[4]=h4;
h[5]=h5;
h[6]=h6;
h[7]=h7;
// compress 2nd round
m[0] = in16;
m[1] = in17;
m[2] = in18;
m[3] = SWAP4(gid);
for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];}
for (int i = 0; i < 8; i++) {v[i] = h[i];}
v[8] = c_u256[0];
v[9] = c_u256[1];
v[10] = c_u256[2];
v[11] = c_u256[3];
v[12] = c_u256[4] ^ 640;
v[13] = c_u256[5] ^ 640;
v[14] = c_u256[6];
v[15] = c_u256[7];
for (int r = 0; r < 14; r++) {
GS(0, 4, 0x8, 0xC, 0x0);
GS(1, 5, 0x9, 0xD, 0x2);
GS(2, 6, 0xA, 0xE, 0x4);
GS(3, 7, 0xB, 0xF, 0x6);
GS(0, 5, 0xA, 0xF, 0x8);
GS(1, 6, 0xB, 0xC, 0xA);
GS(2, 7, 0x8, 0xD, 0xC);
GS(3, 4, 0x9, 0xE, 0xE);
}
for (int i = 0; i < 16; i++) {
int j = i & 7;
h[j] ^= v[i];}
for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);}
barrier(CLK_GLOBAL_MEM_FENCE);
}
// keccak256
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u64 keccak_gpu_state[25];
for (int i = 0; i<25; i++) {
if (i<4) { keccak_gpu_state[i] = hash->h8[i]; }
else { keccak_gpu_state[i] = 0; }
}
keccak_gpu_state[4] = 0x0000000000000001;
keccak_gpu_state[16] = 0x8000000000000000;
keccak_block(keccak_gpu_state);
for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; }
barrier(CLK_GLOBAL_MEM_FENCE);
}
/// lyra2 algo
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search2(__global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u64 state[16];
for (int i = 0; i<4; i++) { state[i] = hash->h8[i];} //password
for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt
for (int i = 0; i<8; i++) { state[i + 8] = blake2b_IV[i]; }
// blake2blyra x2
for (int i = 0; i<24; i++) { round_lyra(state); } //because 12 is not enough
sph_u64 Matrix[96][8]; // very uncool
/// reducedSqueezeRow0
for (int i = 0; i < 8; i++)
{
for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][0] = state[j]; }
round_lyra(state);
}
/// reducedSqueezeRow1
for (int i = 0; i < 8; i++)
{
for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + 12 * i][0]; }
round_lyra(state);
for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][1] = Matrix[j + 12 * i][0] ^ state[j]; }
}
reduceDuplexRowSetup(1, 0, 2);
reduceDuplexRowSetup(2, 1, 3);
reduceDuplexRowSetup(3, 0, 4);
reduceDuplexRowSetup(4, 3, 5);
reduceDuplexRowSetup(5, 2, 6);
reduceDuplexRowSetup(6, 1, 7);
sph_u64 rowa;
rowa = state[0] & 7;
reduceDuplexRow(7, rowa, 0);
rowa = state[0] & 7;
reduceDuplexRow(0, rowa, 3);
rowa = state[0] & 7;
reduceDuplexRow(3, rowa, 6);
rowa = state[0] & 7;
reduceDuplexRow(6, rowa, 1);
rowa = state[0] & 7;
reduceDuplexRow(1, rowa, 4);
rowa = state[0] & 7;
reduceDuplexRow(4, rowa, 7);
rowa = state[0] & 7;
reduceDuplexRow(7, rowa, 2);
rowa = state[0] & 7;
reduceDuplexRow(2, rowa, 5);
absorbblock(rowa);
for (int i = 0; i<4; i++) {hash->h8[i] = state[i];}
barrier(CLK_GLOBAL_MEM_FENCE);
}
//skein256
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search3(__global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u64 h[9];
sph_u64 t[3];
sph_u64 dt0,dt1,dt2,dt3;
sph_u64 p0, p1, p2, p3, p4, p5, p6, p7;
h[8] = skein_ks_parity;
for (int i = 0; i<8; i++) {
h[i] = SKEIN_IV512_256[i];
h[8] ^= h[i];}
t[0]=t12[0];
t[1]=t12[1];
t[2]=t12[2];
dt0=hash->h8[0];
dt1=hash->h8[1];
dt2=hash->h8[2];
dt3=hash->h8[3];
p0 = h[0] + dt0;
p1 = h[1] + dt1;
p2 = h[2] + dt2;
p3 = h[3] + dt3;
p4 = h[4];
p5 = h[5] + t[0];
p6 = h[6] + t[1];
p7 = h[7];
#pragma unroll
for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);}
p0 ^= dt0;
p1 ^= dt1;
p2 ^= dt2;
p3 ^= dt3;
h[0] = p0;
h[1] = p1;
h[2] = p2;
h[3] = p3;
h[4] = p4;
h[5] = p5;
h[6] = p6;
h[7] = p7;
h[8] = skein_ks_parity;
for (int i = 0; i<8; i++) { h[8] ^= h[i]; }
t[0] = t12[3];
t[1] = t12[4];
t[2] = t12[5];
p5 += t[0]; //p5 already equal h[5]
p6 += t[1];
#pragma unroll
for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); }
hash->h8[0] = p0;
hash->h8[1] = p1;
hash->h8[2] = p2;
hash->h8[3] = p3;
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search4(__global hash_t* hashes, __global uint* output, const uint target)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid - get_global_offset(0)]);
sph_u64 message[8], state[8];
sph_u64 t[8];
for (int k = 0; k<4; k++) { message[k] = hash->h8[k]; }
message[4] = 0x80UL;
message[5] = 0UL;
message[6] = 0UL;
message[7] = 0x0100000000000000UL;
for (int u = 0; u<8; u++) { state[u] = message[u]; }
state[7] ^= 0x0001000000000000UL;
PERM_SMALL_P(state);
state[7] ^= 0x0001000000000000UL;
PERM_SMALL_Q(message);
for (int u = 0; u<8; u++) { state[u] ^= message[u]; }
message[7] = state[7];
PERM_SMALL_Pf(state);
state[7] ^= message[7];
barrier(CLK_GLOBAL_MEM_FENCE);
bool result = ( as_uint2(state[7]).y <= target);
if (result) {
output[atomic_inc(output + 0xFF)] = SWAP4(gid);
}
}
#endif // LYRA2RE_CL

2
kernel/arebyp.cl

@ -904,7 +904,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -904,7 +904,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
//------------------------------------------------------------------------------------------------------------
uint cotmp = x * zSIZE;
progress = 0;
for (i = 0; i < N[NFACTOR] + 512 + 42; i++)
for (i = 0; i < N[NFACTOR] + (N[NFACTOR] / LOOKUP_GAP) + 42; i++)
{
//if (progress < 2 * N[NFACTOR])
{

96
kernel/blake256.cl

@ -0,0 +1,96 @@ @@ -0,0 +1,96 @@
/*
* blake256 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
* Copyright (c) 2014 djm34
* Copyright (c) 2014 tpruvot
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
__constant static const int sigma[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
};
__constant static const sph_u32 c_IV256[8] = {
0x6A09E667, 0xBB67AE85,
0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C,
0x1F83D9AB, 0x5BE0CD19
};
/* Second part (64-80) msg never change, store it */
__constant static const sph_u32 c_Padding[16] = {
0, 0, 0, 0,
0x80000000, 0, 0, 0,
0, 0, 0, 0,
0, 1, 0, 640,
};
__constant static const sph_u32 c_u256[16] = {
0x243F6A88, 0x85A308D3,
0x13198A2E, 0x03707344,
0xA4093822, 0x299F31D0,
0x082EFA98, 0xEC4E6C89,
0x452821E6, 0x38D01377,
0xBE5466CF, 0x34E90C6C,
0xC0AC29B7, 0xC97C50DD,
0x3F84D5B5, 0xB5470917
};
#define GS(a,b,c,d,x) { \
const sph_u32 idx1 = sigma[r][x]; \
const sph_u32 idx2 = sigma[r][x+1]; \
v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \
v[d] ^= v[a]; \
v[d] = SPH_ROTR32(v[d], 16); \
v[c] += v[d]; \
v[b] ^= v[c]; \
v[b] = SPH_ROTR32(v[b], 12); \
\
v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \
v[d] ^= v[a]; \
v[d] = SPH_ROTR32(v[d], 8); \
v[c] += v[d]; \
v[b] ^= v[c]; \
v[b] = SPH_ROTR32(v[b], 7); \
}

2042
kernel/diamond.cl

File diff suppressed because it is too large Load Diff

415
kernel/groestl256.cl

@ -0,0 +1,415 @@ @@ -0,0 +1,415 @@
/* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */
/*
* Groestl256
*
* ==========================(LICENSE BEGIN)============================
* Copyright (c) 2014 djm34
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
/*
* Apparently, the 32-bit-only version is not faster than the 64-bit
* version unless using the "small footprint" code on a 32-bit machine.
*/
#define C64e(x) ((SPH_C64(x) >> 56) \
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
#define dec64e_aligned sph_dec64le_aligned
#define enc64e sph_enc64le
#define B64_0(x) ((x) & 0xFF)
#define B64_1(x) (((x) >> 8) & 0xFF)
#define B64_2(x) (((x) >> 16) & 0xFF)
#define B64_3(x) (((x) >> 24) & 0xFF)
#define B64_4(x) (((x) >> 32) & 0xFF)
#define B64_5(x) (((x) >> 40) & 0xFF)
#define B64_6(x) (((x) >> 48) & 0xFF)
#define B64_7(x) ((x) >> 56)
#define R64 SPH_ROTL64
#define PC64(j, r) ((sph_u64)((j) + (r)))
#define QC64(j, r) (((sph_u64)(r) << 56) ^ SPH_T64(~((sph_u64)(j) << 56)))
__constant static const sph_u64 T0[] = {
C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8),
C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6),
C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6),
C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491),
C64e(0x6090f050f0c05060), C64e(0x0207050305040302),
C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56),
C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5),
C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec),
C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f),
C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa),
C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2),
C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb),
C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3),
C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45),
C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753),
C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b),
C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1),
C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c),
C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e),
C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83),
C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451),
C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9),
C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab),
C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a),
C64e(0x081c140c14100c08), C64e(0x9563f652f6315295),
C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d),
C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137),
C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f),
C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624),
C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf),
C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e),
C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea),
C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d),
C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34),
C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc),
C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b),
C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76),
C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d),
C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd),
C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713),
C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9),
C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1),
C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3),
C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6),
C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d),
C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72),
C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498),
C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85),
C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5),
C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed),
C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a),
C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411),
C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9),
C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe),
C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478),
C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b),
C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d),
C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05),
C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21),
C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1),
C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177),
C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342),
C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5),
C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf),
C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418),
C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3),
C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235),
C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e),
C64e(0x936af957f93d5793), C64e(0x55580df20daaf255),
C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a),
C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba),
C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6),
C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819),
C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3),
C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54),
C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b),
C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7),
C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28),
C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc),
C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad),
C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664),
C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14),
C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c),
C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8),
C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd),
C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4),
C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431),
C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2),
C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b),
C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da),
C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1),
C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049),
C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac),
C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf),
C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4),
C64e(0x476720e9208ee947), C64e(0x1038281828201810),
C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0),
C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c),
C64e(0x38546c246c702438), C64e(0x575f08f108aef157),
C64e(0x732152c752e6c773), C64e(0x9764f351f3355197),
C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1),
C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e),
C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61),
C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f),
C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c),
C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc),
C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506),
C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c),
C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a),
C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069),
C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899),
C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927),
C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb),
C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322),
C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9),
C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733),
C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c),
C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9),
C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa),
C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5),
C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859),
C64e(0x09929b809b128009), C64e(0x1a2339173934171a),
C64e(0x651075da75cada65), C64e(0xd784533153b531d7),
C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0),
C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029),
C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e),
C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8),
C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c)
};
__constant static const sph_u64 T4[] = {
C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784),
C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d),
C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd),
C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54),
C64e(0xf0c050606090f050), C64e(0x0504030202070503),
C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d),
C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662),
C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a),
C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d),
C64e(0xc00940898949c040), C64e(0x92ef87fafa689287),
C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb),
C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b),
C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967),
C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea),
C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7),
C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b),
C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c),
C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a),
C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341),
C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f),
C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4),
C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808),
C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573),
C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f),
C64e(0x14100c08081c140c), C64e(0xf63152959563f652),
C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e),
C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1),
C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5),
C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36),
C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d),
C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69),
C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f),
C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e),
C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e),
C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2),
C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb),
C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d),
C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce),
C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e),
C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297),
C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868),
C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c),
C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f),
C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced),
C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46),
C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b),
C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4),
C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a),
C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a),
C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16),
C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7),
C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794),
C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010),
C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881),
C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44),
C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3),
C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe),
C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a),
C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc),
C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04),
C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1),
C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563),
C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a),
C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d),
C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14),
C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f),
C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2),
C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39),
C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2),
C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947),
C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7),
C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495),
C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398),
C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f),
C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e),
C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83),
C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29),
C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c),
C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2),
C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76),
C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56),
C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e),
C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a),
C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4),
C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e),
C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6),
C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4),
C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b),
C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543),
C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7),
C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64),
C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0),
C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa),
C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25),
C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e),
C64e(0x208ee947476720e9), C64e(0x2820181010382818),
C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388),
C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672),
C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1),
C64e(0x52e6c773732152c7), C64e(0xf33551979764f351),
C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c),
C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321),
C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc),
C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485),
C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642),
C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa),
C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05),
C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612),
C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f),
C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0),
C64e(0xa82e911717bfa891), C64e(0xe82958999971e858),
C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9),
C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513),
C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533),
C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070),
C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7),
C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622),
C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020),
C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff),
C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a),
C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8),
C64e(0x9b12800909929b80), C64e(0x3934171a1a233917),
C64e(0x75cada65651075da), C64e(0x53b531d7d7845331),
C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8),
C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0),
C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311),
C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc),
C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a)
};
#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
t[d] = T0[B64_0(a[b0])] \
^ R64(T0[B64_1(a[b1])], 8) \
^ R64(T0[B64_2(a[b2])], 16) \
^ R64(T0[B64_3(a[b3])], 24) \
^ T4[B64_4(a[b4])] \
^ R64(T4[B64_5(a[b5])], 8) \
^ R64(T4[B64_6(a[b6])], 16) \
^ R64(T4[B64_7(a[b7])], 24); \
} while (0)
#define ROUND_SMALL_P(a, r) do { \
a[0] ^= PC64(0x00, r); \
a[1] ^= PC64(0x10, r); \
a[2] ^= PC64(0x20, r); \
a[3] ^= PC64(0x30, r); \
a[4] ^= PC64(0x40, r); \
a[5] ^= PC64(0x50, r); \
a[6] ^= PC64(0x60, r); \
a[7] ^= PC64(0x70, r); \
RSTT(0, a, 0, 1, 2, 3, 4, 5, 6, 7); \
RSTT(1, a, 1, 2, 3, 4, 5, 6, 7, 0); \
RSTT(2, a, 2, 3, 4, 5, 6, 7, 0, 1); \
RSTT(3, a, 3, 4, 5, 6, 7, 0, 1, 2); \
RSTT(4, a, 4, 5, 6, 7, 0, 1, 2, 3); \
RSTT(5, a, 5, 6, 7, 0, 1, 2, 3, 4); \
RSTT(6, a, 6, 7, 0, 1, 2, 3, 4, 5); \
RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \
a[0] = t[0]; \
a[1] = t[1]; \
a[2] = t[2]; \
a[3] = t[3]; \
a[4] = t[4]; \
a[5] = t[5]; \
a[6] = t[6]; \
a[7] = t[7]; \
} while (0)
#define ROUND_SMALL_Pf(a, r) do { \
a[0] ^= PC64(0x00, r); \
a[1] ^= PC64(0x10, r); \
a[2] ^= PC64(0x20, r); \
a[3] ^= PC64(0x30, r); \
a[4] ^= PC64(0x40, r); \
a[5] ^= PC64(0x50, r); \
a[6] ^= PC64(0x60, r); \
a[7] ^= PC64(0x70, r); \
RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \
a[7] = t[7]; \
} while (0)
#define ROUND_SMALL_Q(a, r) do { \
a[0] ^= QC64(0x00, r); \
a[1] ^= QC64(0x10, r); \
a[2] ^= QC64(0x20, r); \
a[3] ^= QC64(0x30, r); \
a[4] ^= QC64(0x40, r); \
a[5] ^= QC64(0x50, r); \
a[6] ^= QC64(0x60, r); \
a[7] ^= QC64(0x70, r); \
RSTT(0, a, 1, 3, 5, 7, 0, 2, 4, 6); \
RSTT(1, a, 2, 4, 6, 0, 1, 3, 5, 7); \
RSTT(2, a, 3, 5, 7, 1, 2, 4, 6, 0); \
RSTT(3, a, 4, 6, 0, 2, 3, 5, 7, 1); \
RSTT(4, a, 5, 7, 1, 3, 4, 6, 0, 2); \
RSTT(5, a, 6, 0, 2, 4, 5, 7, 1, 3); \
RSTT(6, a, 7, 1, 3, 5, 6, 0, 2, 4); \
RSTT(7, a, 0, 2, 4, 6, 7, 1, 3, 5); \
a[0] = t[0]; \
a[1] = t[1]; \
a[2] = t[2]; \
a[3] = t[3]; \
a[4] = t[4]; \
a[5] = t[5]; \
a[6] = t[6]; \
a[7] = t[7]; \
} while (0)
#define PERM_SMALL_P(a) do { \
for (int r = 0; r < 10; r ++) \
ROUND_SMALL_P(a, r); \
} while (0)
#define PERM_SMALL_Pf(a) do { \
for (int r = 0; r < 9; r ++) { \
ROUND_SMALL_P(a, r);} \
ROUND_SMALL_Pf(a,9); \
} while (0)
#define PERM_SMALL_Q(a) do { \
for (int r = 0; r < 10; r ++) \
ROUND_SMALL_Q(a, r); \
} while (0)

2036
kernel/groestlcoin.cl

File diff suppressed because it is too large Load Diff

84
kernel/keccak1600.cl

@ -0,0 +1,84 @@ @@ -0,0 +1,84 @@
/*
* keccak_1600 function
* C. Buchner 2014
*
*/
__constant static const sph_u64 RC[] = {
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
};
inline void keccak_block(ulong *s) {
size_t i;
ulong t[5], u[5], v, w;
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
u[0] = t[4] ^ SPH_ROTL64(t[1], 1);
u[1] = t[0] ^ SPH_ROTL64(t[2], 1);
u[2] = t[1] ^ SPH_ROTL64(t[3], 1);
u[3] = t[2] ^ SPH_ROTL64(t[4], 1);
u[4] = t[3] ^ SPH_ROTL64(t[0], 1);
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[1];
s[1] = SPH_ROTL64(s[6], 44);
s[6] = SPH_ROTL64(s[9], 20);
s[9] = SPH_ROTL64(s[22], 61);
s[22] = SPH_ROTL64(s[14], 39);
s[14] = SPH_ROTL64(s[20], 18);
s[20] = SPH_ROTL64(s[2], 62);
s[2] = SPH_ROTL64(s[12], 43);
s[12] = SPH_ROTL64(s[13], 25);
s[13] = SPH_ROTL64(s[19], 8);
s[19] = SPH_ROTL64(s[23], 56);
s[23] = SPH_ROTL64(s[15], 41);
s[15] = SPH_ROTL64(s[4], 27);
s[4] = SPH_ROTL64(s[24], 14);
s[24] = SPH_ROTL64(s[21], 2);
s[21] = SPH_ROTL64(s[8], 55);
s[8] = SPH_ROTL64(s[16], 45);
s[16] = SPH_ROTL64(s[5], 36);
s[5] = SPH_ROTL64(s[3], 28);
s[3] = SPH_ROTL64(s[18], 21);
s[18] = SPH_ROTL64(s[17], 15);
s[17] = SPH_ROTL64(s[11], 10);
s[11] = SPH_ROTL64(s[7], 6);
s[7] = SPH_ROTL64(s[10], 3);
s[10] = SPH_ROTL64(v, 1);
v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w;
v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w;
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
s[0] ^= RC[i];
}
};

107
kernel/skein256.cl

@ -0,0 +1,107 @@ @@ -0,0 +1,107 @@
/*
* skein256 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
__constant static const sph_u64 SKEIN_IV512[] = {
SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
};
__constant static const sph_u64 SKEIN_IV512_256[8] = {
0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL,
0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL,
0xEC06025E74DD7683UL, 0xE7A436CDC4746251UL,
0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL
};
__constant static const int ROT256[8][4] =
{
46, 36, 19, 37,
33, 27, 14, 42,
17, 49, 36, 39,
44, 9, 54, 56,
39, 30, 34, 24,
13, 50, 10, 17,
25, 29, 39, 43,
8, 35, 56, 22,
};
__constant static const sph_u64 skein_ks_parity = 0x1BD11BDAA9FC1A22;
__constant static const sph_u64 t12[6] =
{ 0x20UL,
0xf000000000000000UL,
0xf000000000000020UL,
0x08UL,
0xff00000000000000UL,
0xff00000000000008UL
};
static inline ulong ROTL64(const ulong v, const ulong n){
return rotate(v,n);
}
#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT) { \
p0 += p1; p1 = SPH_ROTL64(p1, ROT256[ROT][0]); p1 ^= p0; \
p2 += p3; p3 = SPH_ROTL64(p3, ROT256[ROT][1]); p3 ^= p2; \
p4 += p5; p5 = SPH_ROTL64(p5, ROT256[ROT][2]); p5 ^= p4; \
p6 += p7; p7 = SPH_ROTL64(p7, ROT256[ROT][3]); p7 ^= p6; \
}
#define Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, R) { \
Round512(p0, p1, p2, p3, p4, p5, p6, p7, 0); \
Round512(p2, p1, p4, p7, p6, p5, p0, p3, 1); \
Round512(p4, p1, p6, p3, p0, p5, p2, p7, 2); \
Round512(p6, p1, p0, p7, p2, p5, p4, p3, 3); \
p0 += h[((R)+0) % 9]; \
p1 += h[((R)+1) % 9]; \
p2 += h[((R)+2) % 9]; \
p3 += h[((R)+3) % 9]; \
p4 += h[((R)+4) % 9]; \
p5 += h[((R)+5) % 9] + t[((R)+0) % 3]; \
p6 += h[((R)+6) % 9] + t[((R)+1) % 3]; \
p7 += h[((R)+7) % 9] + R; \
Round512(p0, p1, p2, p3, p4, p5, p6, p7, 4); \
Round512(p2, p1, p4, p7, p6, p5, p0, p3, 5); \
Round512(p4, p1, p6, p3, p0, p5, p2, p7, 6); \
Round512(p6, p1, p0, p7, p2, p5, p4, p3, 7); \
p0 += h[((R)+1) % 9]; \
p1 += h[((R)+2) % 9]; \
p2 += h[((R)+3) % 9]; \
p3 += h[((R)+4) % 9]; \
p4 += h[((R)+5) % 9]; \
p5 += h[((R)+6) % 9] + t[((R)+1) % 3]; \
p6 += h[((R)+7) % 9] + t[((R)+2) % 3]; \
p7 += h[((R)+8) % 9] + (R+1); \
}

1
sgminer.c

@ -48,6 +48,7 @@ char *curly = ":D"; @@ -48,6 +48,7 @@ char *curly = ":D";
#endif
#include <libgen.h>
#include "sph/sph_sha2.h"
#include "sph/sph_blake.h"
#include "compat.h"
#include "miner.h"

6
winbuild/sgminer.vcxproj

@ -263,7 +263,10 @@ @@ -263,7 +263,10 @@
<ClCompile Include="..\algorithm.c" />
<ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\lyra2.c" />
<ClCompile Include="..\algorithm\lyra2re.c" />
<ClCompile Include="..\algorithm\neoscrypt.c" />
<ClCompile Include="..\algorithm\sponge.c" />
<ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\whirlpoolx.c" />
<ClCompile Include="..\algorithm\x14.c" />
@ -325,7 +328,10 @@ @@ -325,7 +328,10 @@
<ClInclude Include="..\algorithm.h" />
<ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\lyra2.h" />
<ClInclude Include="..\algorithm\lyra2re.h" />
<ClInclude Include="..\algorithm\neoscrypt.h" />
<ClInclude Include="..\algorithm\sponge.h" />
<ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\whirlpoolx.h" />
<ClInclude Include="..\algorithm\x14.h" />

18
winbuild/sgminer.vcxproj.filters

@ -209,6 +209,15 @@ @@ -209,6 +209,15 @@
<ClCompile Include="..\algorithm\whirlpoolx.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\lyra2.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\lyra2re.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\sponge.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\adl.h">
@ -397,6 +406,15 @@ @@ -397,6 +406,15 @@
<ClInclude Include="..\algorithm\whirlpoolx.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\lyra2.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\lyra2re.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\sponge.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="README.txt" />

Loading…
Cancel
Save