mirror of https://github.com/GOSTSec/sgminer
elbandi
9 years ago
41 changed files with 7083 additions and 61 deletions
@ -0,0 +1,148 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2015 djm34 |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "miner.h" |
||||||
|
|
||||||
|
#include <stdlib.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "sph/sph_sha2.h" |
||||||
|
|
||||||
|
static const uint32_t diff1targ = 0x0000ffff; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline void credits_hash(void *state, const void *input) |
||||||
|
{ |
||||||
|
sph_sha256_context sha1, sha2; |
||||||
|
uint32_t hash[8], hash2[8]; |
||||||
|
|
||||||
|
sph_sha256_init(&sha1); |
||||||
|
sph_sha256(&sha1, input, 168); |
||||||
|
sph_sha256_close(&sha1, hash); |
||||||
|
|
||||||
|
|
||||||
|
sph_sha256_init(&sha2); |
||||||
|
sph_sha256(&sha2, hash, 32); |
||||||
|
sph_sha256_close(&sha2, hash2); |
||||||
|
|
||||||
|
memcpy(state, hash2, 32); |
||||||
|
|
||||||
|
} |
||||||
|
static inline void |
||||||
|
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) |
||||||
|
{ |
||||||
|
uint32_t i; |
||||||
|
|
||||||
|
for (i = 0; i < len; i++) |
||||||
|
dst[i] = htobe32(src[i]); |
||||||
|
} |
||||||
|
|
||||||
|
/* Used externally as confirmation of correct OCL code */ |
||||||
|
int credits_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) |
||||||
|
{ |
||||||
|
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
uint32_t data[42], ohash[8]; |
||||||
|
printf("coming here credits test\n"); |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 42); |
||||||
|
data[35] = htobe32(nonce); |
||||||
|
credits_hash((unsigned char*)data,(unsigned char*)ohash); |
||||||
|
|
||||||
|
tmp_hash7 = be32toh(ohash[7]); |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", |
||||||
|
(long unsigned int)Htarg, |
||||||
|
(long unsigned int)diff1targ, |
||||||
|
(long unsigned int)tmp_hash7); |
||||||
|
|
||||||
|
if (tmp_hash7 > diff1targ) |
||||||
|
return -1; |
||||||
|
|
||||||
|
if (tmp_hash7 > Htarg) |
||||||
|
return 0; |
||||||
|
|
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
void credits_regenhash(struct work *work) |
||||||
|
{ |
||||||
|
uint32_t data[42]; |
||||||
|
uint32_t *nonce = (uint32_t *)(work->data + 140); |
||||||
|
uint32_t *ohash = (uint32_t *)(work->hash); |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)work->data, 42); |
||||||
|
data[35] = htobe32(*nonce); |
||||||
|
|
||||||
|
credits_hash((unsigned char*)ohash, (unsigned char*)data); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
bool scanhash_credits(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, |
||||||
|
unsigned char *pdata, unsigned char __maybe_unused *phash1, |
||||||
|
unsigned char __maybe_unused *phash, const unsigned char *ptarget, |
||||||
|
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) |
||||||
|
{ |
||||||
|
uint32_t *nonce = (uint32_t *)(pdata + 140); |
||||||
|
uint32_t data[42]; |
||||||
|
uint32_t tmp_hash7; |
||||||
|
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
bool ret = false; |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 35); |
||||||
|
|
||||||
|
|
||||||
|
while (1) |
||||||
|
{ |
||||||
|
uint32_t ostate[8]; |
||||||
|
|
||||||
|
*nonce = ++n; |
||||||
|
data[35] = (n); |
||||||
|
credits_hash(ostate, data); |
||||||
|
tmp_hash7 = (ostate[7]); |
||||||
|
|
||||||
|
applog(LOG_INFO, "data7 %08lx", (long unsigned int)ostate[7]); |
||||||
|
|
||||||
|
if (unlikely(tmp_hash7 <= Htarg)) |
||||||
|
{ |
||||||
|
((uint32_t *)pdata)[35] = htobe32(n); |
||||||
|
*last_nonce = n; |
||||||
|
ret = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (unlikely((n >= max_nonce) || thr->work_restart)) |
||||||
|
{ |
||||||
|
*last_nonce = n; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
@ -0,0 +1,10 @@ |
|||||||
|
#ifndef CREDITS_H |
||||||
|
#define CREDITS_H |
||||||
|
|
||||||
|
#include "miner.h" |
||||||
|
|
||||||
|
|
||||||
|
extern int credits_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); |
||||||
|
extern void credits_regenhash(struct work *work); |
||||||
|
|
||||||
|
#endif /* CREDITS_H */ |
@ -0,0 +1,208 @@ |
|||||||
|
/**
|
||||||
|
* Implementation of the Lyra2 Password Hashing Scheme (PHS). |
||||||
|
* |
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
* |
||||||
|
* This software is hereby placed in the public domain. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS |
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE |
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
#include <stdio.h> |
||||||
|
#include <stdlib.h> |
||||||
|
#include <string.h> |
||||||
|
#include <time.h> |
||||||
|
#include "lyra2_old.h" |
||||||
|
#include "sponge_old.h" |
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords |
||||||
|
* whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, |
||||||
|
* where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all |
||||||
|
* integer parameters (treated as type "unsigned int") in the order they are provided, plus the value |
||||||
|
* of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). |
||||||
|
* |
||||||
|
* @param K The derived key to be output by the algorithm |
||||||
|
* @param kLen Desired key length |
||||||
|
* @param pwd User password |
||||||
|
* @param pwdlen Password length |
||||||
|
* @param salt Salt |
||||||
|
* @param saltlen Salt length |
||||||
|
* @param timeCost Parameter to determine the processing time (T) |
||||||
|
* @param nRows Number or rows of the memory matrix (R) |
||||||
|
* @param nCols Number of columns of the memory matrix (C) |
||||||
|
* |
||||||
|
* @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) |
||||||
|
*/ |
||||||
|
int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { |
||||||
|
|
||||||
|
//============================= Basic variables ============================//
|
||||||
|
int64_t row = 2; //index of row to be processed
|
||||||
|
int64_t prev = 1; //index of prev (last row ever computed/modified)
|
||||||
|
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
|
||||||
|
int64_t tau; //Time Loop iterator
|
||||||
|
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
|
||||||
|
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
|
||||||
|
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
|
||||||
|
int64_t i; //auxiliary iteration counter
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//========== Initializing the Memory Matrix and pointers to it =============//
|
||||||
|
//Tries to allocate enough space for the whole memory matrix
|
||||||
|
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); |
||||||
|
uint64_t *wholeMatrix = malloc(i); |
||||||
|
if (wholeMatrix == NULL) { |
||||||
|
return -1; |
||||||
|
} |
||||||
|
memset(wholeMatrix, 0, i); |
||||||
|
|
||||||
|
//Allocates pointers to each row of the matrix
|
||||||
|
uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); |
||||||
|
if (memMatrix == NULL) { |
||||||
|
return -1; |
||||||
|
} |
||||||
|
//Places the pointers in the correct positions
|
||||||
|
uint64_t *ptrWord = wholeMatrix; |
||||||
|
for (i = 0; i < nRows; i++) { |
||||||
|
memMatrix[i] = ptrWord; |
||||||
|
ptrWord += ROW_LEN_INT64; |
||||||
|
} |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============= Getting the password + salt + basil padded with 10*1 ===============//
|
||||||
|
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||||
|
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||||
|
|
||||||
|
//First, we clean enough blocks for the password, salt, basil and padding
|
||||||
|
uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; |
||||||
|
byte *ptrByte = (byte*) wholeMatrix; |
||||||
|
memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); |
||||||
|
|
||||||
|
//Prepends the password
|
||||||
|
memcpy(ptrByte, pwd, pwdlen); |
||||||
|
ptrByte += pwdlen; |
||||||
|
|
||||||
|
//Concatenates the salt
|
||||||
|
memcpy(ptrByte, salt, saltlen); |
||||||
|
ptrByte += saltlen; |
||||||
|
|
||||||
|
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||||
|
memcpy(ptrByte, &kLen, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
memcpy(ptrByte, &saltlen, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
memcpy(ptrByte, &timeCost, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
memcpy(ptrByte, &nRows, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
memcpy(ptrByte, &nCols, sizeof (uint64_t)); |
||||||
|
ptrByte += sizeof (uint64_t); |
||||||
|
|
||||||
|
//Now comes the padding
|
||||||
|
*ptrByte = 0x80; //first byte of padding: right after the password
|
||||||
|
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
|
||||||
|
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
|
||||||
|
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
|
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//======================= Initializing the Sponge State ====================//
|
||||||
|
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
|
||||||
|
uint64_t *state = malloc(16 * sizeof (uint64_t)); |
||||||
|
if (state == NULL) { |
||||||
|
return -1; |
||||||
|
} |
||||||
|
initStateO(state); |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//================================ Setup Phase =============================//
|
||||||
|
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
|
||||||
|
ptrWord = wholeMatrix; |
||||||
|
for (i = 0; i < nBlocksInput; i++) { |
||||||
|
absorbBlockBlake2SafeO(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
|
||||||
|
ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
|
||||||
|
} |
||||||
|
|
||||||
|
//Initializes M[0] and M[1]
|
||||||
|
reducedSqueezeRow0O(state, memMatrix[0]); //The locally copied password is most likely overwritten here
|
||||||
|
reducedDuplexRow1O(state, memMatrix[0], memMatrix[1]); |
||||||
|
|
||||||
|
do { |
||||||
|
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
|
||||||
|
reducedDuplexRowSetupO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); |
||||||
|
|
||||||
|
|
||||||
|
//updates the value of row* (deterministically picked during Setup))
|
||||||
|
rowa = (rowa + step) & (window - 1); |
||||||
|
//update prev: it now points to the last row ever computed
|
||||||
|
prev = row; |
||||||
|
//updates row: goes to the next row to be computed
|
||||||
|
row++; |
||||||
|
|
||||||
|
//Checks if all rows in the window where visited.
|
||||||
|
if (rowa == 0) { |
||||||
|
step = window + gap; //changes the step: approximately doubles its value
|
||||||
|
window *= 2; //doubles the size of the re-visitation window
|
||||||
|
gap = -gap; //inverts the modifier to the step
|
||||||
|
} |
||||||
|
|
||||||
|
} while (row < nRows); |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============================ Wandering Phase =============================//
|
||||||
|
row = 0; //Resets the visitation to the first row of the memory matrix
|
||||||
|
for (tau = 1; tau <= timeCost; tau++) { |
||||||
|
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
|
||||||
|
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; |
||||||
|
do { |
||||||
|
//Selects a pseudorandom index row*
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||||
|
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
|
||||||
|
reducedDuplexRowO(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); |
||||||
|
|
||||||
|
//update prev: it now points to the last row ever computed
|
||||||
|
prev = row; |
||||||
|
|
||||||
|
//updates row: goes to the next row to be computed
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
|
||||||
|
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
} while (row != 0); |
||||||
|
} |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//============================ Wrap-up Phase ===============================//
|
||||||
|
//Absorbs the last block of the memory matrix
|
||||||
|
absorbBlockO(state, memMatrix[rowa]); |
||||||
|
|
||||||
|
//Squeezes the key
|
||||||
|
squeezeO(state, K, kLen); |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
//========================= Freeing the memory =============================//
|
||||||
|
free(memMatrix); |
||||||
|
free(wholeMatrix); |
||||||
|
|
||||||
|
//Wiping out the sponge's internal state before freeing it
|
||||||
|
memset(state, 0, 16 * sizeof (uint64_t)); |
||||||
|
free(state); |
||||||
|
//==========================================================================/
|
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,50 @@ |
|||||||
|
/**
|
||||||
|
* Header file for the Lyra2 Password Hashing Scheme (PHS). |
||||||
|
* |
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
* |
||||||
|
* This software is hereby placed in the public domain. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS |
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE |
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
#ifndef LYRA2OLD_H_ |
||||||
|
#define LYRA2OLD_H_ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
typedef unsigned char byte; |
||||||
|
|
||||||
|
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
|
||||||
|
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)
|
||||||
|
#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef BLOCK_LEN_BITS |
||||||
|
#define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t)
|
||||||
|
#define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes
|
||||||
|
#else //default block lenght: 768 bits
|
||||||
|
#define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t)
|
||||||
|
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
|
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef N_COLS |
||||||
|
#define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default
|
||||||
|
#endif |
||||||
|
|
||||||
|
#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
|
||||||
|
#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row
|
||||||
|
|
||||||
|
|
||||||
|
int LYRA2O(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); |
||||||
|
|
||||||
|
#endif /* LYRA2_H_ */ |
@ -0,0 +1,169 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2014 James Lovejoy |
||||||
|
* Copyright 2014 phm |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "miner.h" |
||||||
|
|
||||||
|
#include <stdlib.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "sph/sph_blake.h" |
||||||
|
#include "sph/sph_groestl.h" |
||||||
|
#include "sph/sph_skein.h" |
||||||
|
#include "sph/sph_keccak.h" |
||||||
|
#include "lyra2_old.h" |
||||||
|
|
||||||
|
/*
|
||||||
|
* Encode a length len/4 vector of (uint32_t) into a length len vector of |
||||||
|
* (unsigned char) in big-endian form. Assumes len is a multiple of 4. |
||||||
|
*/ |
||||||
|
static inline void |
||||||
|
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) |
||||||
|
{ |
||||||
|
uint32_t i; |
||||||
|
|
||||||
|
for (i = 0; i < len; i++) |
||||||
|
dst[i] = htobe32(src[i]); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
inline void lyra2rehash_old(void *state, const void *input) |
||||||
|
{ |
||||||
|
sph_blake256_context ctx_blake; |
||||||
|
sph_groestl256_context ctx_groestl; |
||||||
|
sph_keccak256_context ctx_keccak; |
||||||
|
sph_skein256_context ctx_skein; |
||||||
|
|
||||||
|
uint32_t hashA[8], hashB[8]; |
||||||
|
|
||||||
|
sph_blake256_init(&ctx_blake); |
||||||
|
sph_blake256 (&ctx_blake, input, 80); |
||||||
|
sph_blake256_close (&ctx_blake, hashA); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sph_keccak256_init(&ctx_keccak); |
||||||
|
sph_keccak256 (&ctx_keccak,hashA, 32); |
||||||
|
sph_keccak256_close(&ctx_keccak, hashB); |
||||||
|
|
||||||
|
LYRA2O(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); |
||||||
|
|
||||||
|
|
||||||
|
sph_skein256_init(&ctx_skein); |
||||||
|
sph_skein256 (&ctx_skein, hashA, 32); |
||||||
|
sph_skein256_close(&ctx_skein, hashB); |
||||||
|
|
||||||
|
|
||||||
|
sph_groestl256_init(&ctx_groestl); |
||||||
|
sph_groestl256 (&ctx_groestl, hashB, 32); |
||||||
|
sph_groestl256_close(&ctx_groestl, hashA); |
||||||
|
|
||||||
|
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
|
||||||
|
|
||||||
|
memcpy(state, hashA, 32); |
||||||
|
} |
||||||
|
|
||||||
|
static const uint32_t diff1targ = 0x0000ffff; |
||||||
|
|
||||||
|
|
||||||
|
/* Used externally as confirmation of correct OCL code */ |
||||||
|
int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) |
||||||
|
{ |
||||||
|
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
uint32_t data[20], ohash[8]; |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19); |
||||||
|
data[19] = htobe32(nonce); |
||||||
|
lyra2rehash_old(ohash, data); |
||||||
|
tmp_hash7 = be32toh(ohash[7]); |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", |
||||||
|
(long unsigned int)Htarg, |
||||||
|
(long unsigned int)diff1targ, |
||||||
|
(long unsigned int)tmp_hash7); |
||||||
|
if (tmp_hash7 > diff1targ) |
||||||
|
return -1; |
||||||
|
if (tmp_hash7 > Htarg) |
||||||
|
return 0; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
void lyra2reold_regenhash(struct work *work) |
||||||
|
{ |
||||||
|
uint32_t data[20]; |
||||||
|
uint32_t *nonce = (uint32_t *)(work->data + 76); |
||||||
|
uint32_t *ohash = (uint32_t *)(work->hash); |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)work->data, 19); |
||||||
|
data[19] = htobe32(*nonce); |
||||||
|
lyra2rehash_old(ohash, data); |
||||||
|
} |
||||||
|
|
||||||
|
bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, |
||||||
|
unsigned char *pdata, unsigned char __maybe_unused *phash1, |
||||||
|
unsigned char __maybe_unused *phash, const unsigned char *ptarget, |
||||||
|
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) |
||||||
|
{ |
||||||
|
uint32_t *nonce = (uint32_t *)(pdata + 76); |
||||||
|
uint32_t data[20]; |
||||||
|
uint32_t tmp_hash7; |
||||||
|
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
bool ret = false; |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19); |
||||||
|
|
||||||
|
while(1) { |
||||||
|
uint32_t ostate[8]; |
||||||
|
|
||||||
|
*nonce = ++n; |
||||||
|
data[19] = (n); |
||||||
|
lyra2rehash_old(ostate, data); |
||||||
|
tmp_hash7 = (ostate[7]); |
||||||
|
|
||||||
|
applog(LOG_INFO, "data7 %08lx", |
||||||
|
(long unsigned int)data[7]); |
||||||
|
|
||||||
|
if (unlikely(tmp_hash7 <= Htarg)) { |
||||||
|
((uint32_t *)pdata)[19] = htobe32(n); |
||||||
|
*last_nonce = n; |
||||||
|
ret = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (unlikely((n >= max_nonce) || thr->work_restart)) { |
||||||
|
*last_nonce = n; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,10 @@ |
|||||||
|
#ifndef LYRA2REOLD_H |
||||||
|
#define LYRA2REOLD_H |
||||||
|
|
||||||
|
#include "miner.h" |
||||||
|
|
||||||
|
extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, |
||||||
|
uint32_t nonce); |
||||||
|
extern void lyra2reold_regenhash(struct work *work); |
||||||
|
|
||||||
|
#endif /* LYRA2RE_H */ |
@ -0,0 +1,405 @@ |
|||||||
|
/**
|
||||||
|
* A simple implementation of Blake2b's internal permutation |
||||||
|
* in the form of a sponge. |
||||||
|
* |
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
* |
||||||
|
* This software is hereby placed in the public domain. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS |
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE |
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
#include <string.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include <time.h> |
||||||
|
#include "sponge_old.h" |
||||||
|
#include "lyra2_old.h" |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder |
||||||
|
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges |
||||||
|
* typically have their internal state initialized with zeros, Blake2b's G function |
||||||
|
* has a fixed point: if the internal state and message are both filled with zeros. the |
||||||
|
* resulting permutation will always be a block filled with zeros; this happens because |
||||||
|
* Blake2b does not use the constants originally employed in Blake2 inside its G function, |
||||||
|
* relying on the IV for avoiding possible fixed points. |
||||||
|
* |
||||||
|
* @param state The 1024-bit array to be initialized |
||||||
|
*/ |
||||||
|
void initStateO(uint64_t state[/*16*/]) { |
||||||
|
//First 512 bis are zeros
|
||||||
|
memset(state, 0, 64); |
||||||
|
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
|
||||||
|
state[8] = blake2b_IV[0]; |
||||||
|
state[9] = blake2b_IV[1]; |
||||||
|
state[10] = blake2b_IV[2]; |
||||||
|
state[11] = blake2b_IV[3]; |
||||||
|
state[12] = blake2b_IV[4]; |
||||||
|
state[13] = blake2b_IV[5]; |
||||||
|
state[14] = blake2b_IV[6]; |
||||||
|
state[15] = blake2b_IV[7]; |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute Blake2b's G function, with all 12 rounds. |
||||||
|
* |
||||||
|
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function |
||||||
|
*/ |
||||||
|
static void blake2bLyra(uint64_t *v) { |
||||||
|
ROUND_LYRA(0); |
||||||
|
ROUND_LYRA(1); |
||||||
|
ROUND_LYRA(2); |
||||||
|
ROUND_LYRA(3); |
||||||
|
ROUND_LYRA(4); |
||||||
|
ROUND_LYRA(5); |
||||||
|
ROUND_LYRA(6); |
||||||
|
ROUND_LYRA(7); |
||||||
|
ROUND_LYRA(8); |
||||||
|
ROUND_LYRA(9); |
||||||
|
ROUND_LYRA(10); |
||||||
|
ROUND_LYRA(11); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes a reduced version of Blake2b's G function with only one round |
||||||
|
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function |
||||||
|
*/ |
||||||
|
static void reducedBlake2bLyra(uint64_t *v) { |
||||||
|
ROUND_LYRA(0); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a squeeze operation, using Blake2b's G function as the |
||||||
|
* internal permutation |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param out Array that will receive the data squeezed |
||||||
|
* @param len The number of bytes to be squeezed into the "out" array |
||||||
|
*/ |
||||||
|
void squeezeO(uint64_t *state, byte *out, unsigned int len) { |
||||||
|
int fullBlocks = len / BLOCK_LEN_BYTES; |
||||||
|
byte *ptr = out; |
||||||
|
int i; |
||||||
|
//Squeezes full blocks
|
||||||
|
for (i = 0; i < fullBlocks; i++) { |
||||||
|
memcpy(ptr, state, BLOCK_LEN_BYTES); |
||||||
|
blake2bLyra(state); |
||||||
|
ptr += BLOCK_LEN_BYTES; |
||||||
|
} |
||||||
|
|
||||||
|
//Squeezes remaining bytes
|
||||||
|
memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words |
||||||
|
* of type uint64_t), using Blake2b's G function as the internal permutation |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param in The block to be absorbed (BLOCK_LEN_INT64 words) |
||||||
|
*/ |
||||||
|
void absorbBlockO(uint64_t *state, const uint64_t *in) { |
||||||
|
//XORs the first BLOCK_LEN_INT64 words of "in" with the current state
|
||||||
|
state[0] ^= in[0]; |
||||||
|
state[1] ^= in[1]; |
||||||
|
state[2] ^= in[2]; |
||||||
|
state[3] ^= in[3]; |
||||||
|
state[4] ^= in[4]; |
||||||
|
state[5] ^= in[5]; |
||||||
|
state[6] ^= in[6]; |
||||||
|
state[7] ^= in[7]; |
||||||
|
state[8] ^= in[8]; |
||||||
|
state[9] ^= in[9]; |
||||||
|
state[10] ^= in[10]; |
||||||
|
state[11] ^= in[11]; |
||||||
|
|
||||||
|
//Applies the transformation f to the sponge's state
|
||||||
|
blake2bLyra(state); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 |
||||||
|
* words of type uint64_t), using Blake2b's G function as the internal permutation |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) |
||||||
|
*/ |
||||||
|
void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in) { |
||||||
|
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
|
||||||
|
state[0] ^= in[0]; |
||||||
|
state[1] ^= in[1]; |
||||||
|
state[2] ^= in[2]; |
||||||
|
state[3] ^= in[3]; |
||||||
|
state[4] ^= in[4]; |
||||||
|
state[5] ^= in[5]; |
||||||
|
state[6] ^= in[6]; |
||||||
|
state[7] ^= in[7]; |
||||||
|
|
||||||
|
//Applies the transformation f to the sponge's state
|
||||||
|
blake2bLyra(state); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a reduced squeeze operation for a single row, from the highest to |
||||||
|
* the lowest index, using the reduced-round Blake2b's G function as the |
||||||
|
* internal permutation |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param rowOut Row to receive the data squeezed |
||||||
|
*/ |
||||||
|
void reducedSqueezeRow0O(uint64_t* state, uint64_t* rowOut) { |
||||||
|
uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
|
||||||
|
int i; |
||||||
|
//M[row][C-1-col] = H.reduced_squeeze()
|
||||||
|
for (i = 0; i < N_COLS; i++) { |
||||||
|
ptrWord[0] = state[0]; |
||||||
|
ptrWord[1] = state[1]; |
||||||
|
ptrWord[2] = state[2]; |
||||||
|
ptrWord[3] = state[3]; |
||||||
|
ptrWord[4] = state[4]; |
||||||
|
ptrWord[5] = state[5]; |
||||||
|
ptrWord[6] = state[6]; |
||||||
|
ptrWord[7] = state[7]; |
||||||
|
ptrWord[8] = state[8]; |
||||||
|
ptrWord[9] = state[9]; |
||||||
|
ptrWord[10] = state[10]; |
||||||
|
ptrWord[11] = state[11]; |
||||||
|
|
||||||
|
//Goes to next block (column) that will receive the squeezed data
|
||||||
|
ptrWord -= BLOCK_LEN_INT64; |
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a reduced duplex operation for a single row, from the highest to |
||||||
|
* the lowest index, using the reduced-round Blake2b's G function as the |
||||||
|
* internal permutation |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param rowIn Row to feed the sponge |
||||||
|
* @param rowOut Row to receive the sponge's output |
||||||
|
*/ |
||||||
|
void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { |
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) { |
||||||
|
|
||||||
|
//Absorbing "M[prev][col]"
|
||||||
|
state[0] ^= (ptrWordIn[0]); |
||||||
|
state[1] ^= (ptrWordIn[1]); |
||||||
|
state[2] ^= (ptrWordIn[2]); |
||||||
|
state[3] ^= (ptrWordIn[3]); |
||||||
|
state[4] ^= (ptrWordIn[4]); |
||||||
|
state[5] ^= (ptrWordIn[5]); |
||||||
|
state[6] ^= (ptrWordIn[6]); |
||||||
|
state[7] ^= (ptrWordIn[7]); |
||||||
|
state[8] ^= (ptrWordIn[8]); |
||||||
|
state[9] ^= (ptrWordIn[9]); |
||||||
|
state[10] ^= (ptrWordIn[10]); |
||||||
|
state[11] ^= (ptrWordIn[11]); |
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state); |
||||||
|
|
||||||
|
//M[row][C-1-col] = M[prev][col] XOR rand
|
||||||
|
ptrWordOut[0] = ptrWordIn[0] ^ state[0]; |
||||||
|
ptrWordOut[1] = ptrWordIn[1] ^ state[1]; |
||||||
|
ptrWordOut[2] = ptrWordIn[2] ^ state[2]; |
||||||
|
ptrWordOut[3] = ptrWordIn[3] ^ state[3]; |
||||||
|
ptrWordOut[4] = ptrWordIn[4] ^ state[4]; |
||||||
|
ptrWordOut[5] = ptrWordIn[5] ^ state[5]; |
||||||
|
ptrWordOut[6] = ptrWordIn[6] ^ state[6]; |
||||||
|
ptrWordOut[7] = ptrWordIn[7] ^ state[7]; |
||||||
|
ptrWordOut[8] = ptrWordIn[8] ^ state[8]; |
||||||
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9]; |
||||||
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10]; |
||||||
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11]; |
||||||
|
|
||||||
|
|
||||||
|
//Input: next column (i.e., next block in sequence)
|
||||||
|
ptrWordIn += BLOCK_LEN_INT64; |
||||||
|
//Output: goes to previous column
|
||||||
|
ptrWordOut -= BLOCK_LEN_INT64; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., |
||||||
|
* the wordwise addition of two columns, ignoring carries between words). The |
||||||
|
* output of this operation, "rand", is then used to make |
||||||
|
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and |
||||||
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit |
||||||
|
* rotation to the left and N_COLS is a system parameter. |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param rowIn Row used only as input |
||||||
|
* @param rowInOut Row used as input and to receive output after rotation |
||||||
|
* @param rowOut Row receiving the output |
||||||
|
* |
||||||
|
*/ |
||||||
|
void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { |
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
|
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
|
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) { |
||||||
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); |
||||||
|
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); |
||||||
|
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); |
||||||
|
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); |
||||||
|
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); |
||||||
|
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); |
||||||
|
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); |
||||||
|
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); |
||||||
|
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); |
||||||
|
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); |
||||||
|
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); |
||||||
|
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); |
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state); |
||||||
|
|
||||||
|
//M[row][col] = M[prev][col] XOR rand
|
||||||
|
ptrWordOut[0] = ptrWordIn[0] ^ state[0]; |
||||||
|
ptrWordOut[1] = ptrWordIn[1] ^ state[1]; |
||||||
|
ptrWordOut[2] = ptrWordIn[2] ^ state[2]; |
||||||
|
ptrWordOut[3] = ptrWordIn[3] ^ state[3]; |
||||||
|
ptrWordOut[4] = ptrWordIn[4] ^ state[4]; |
||||||
|
ptrWordOut[5] = ptrWordIn[5] ^ state[5]; |
||||||
|
ptrWordOut[6] = ptrWordIn[6] ^ state[6]; |
||||||
|
ptrWordOut[7] = ptrWordIn[7] ^ state[7]; |
||||||
|
ptrWordOut[8] = ptrWordIn[8] ^ state[8]; |
||||||
|
ptrWordOut[9] = ptrWordIn[9] ^ state[9]; |
||||||
|
ptrWordOut[10] = ptrWordIn[10] ^ state[10]; |
||||||
|
ptrWordOut[11] = ptrWordIn[11] ^ state[11]; |
||||||
|
|
||||||
|
//M[row*][col] = M[row*][col] XOR rotW(rand)
|
||||||
|
ptrWordInOut[0] ^= state[11]; |
||||||
|
ptrWordInOut[1] ^= state[0]; |
||||||
|
ptrWordInOut[2] ^= state[1]; |
||||||
|
ptrWordInOut[3] ^= state[2]; |
||||||
|
ptrWordInOut[4] ^= state[3]; |
||||||
|
ptrWordInOut[5] ^= state[4]; |
||||||
|
ptrWordInOut[6] ^= state[5]; |
||||||
|
ptrWordInOut[7] ^= state[6]; |
||||||
|
ptrWordInOut[8] ^= state[7]; |
||||||
|
ptrWordInOut[9] ^= state[8]; |
||||||
|
ptrWordInOut[10] ^= state[9]; |
||||||
|
ptrWordInOut[11] ^= state[10]; |
||||||
|
|
||||||
|
//Inputs: next column (i.e., next block in sequence)
|
||||||
|
ptrWordInOut += BLOCK_LEN_INT64; |
||||||
|
ptrWordIn += BLOCK_LEN_INT64; |
||||||
|
//Output: goes to previous column
|
||||||
|
ptrWordOut -= BLOCK_LEN_INT64; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., |
||||||
|
* the wordwise addition of two columns, ignoring carries between words). The |
||||||
|
* output of this operation, "rand", is then used to make |
||||||
|
* "M[rowOut][col] = M[rowOut][col] XOR rand" and |
||||||
|
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit |
||||||
|
* rotation to the left. |
||||||
|
* |
||||||
|
* @param state The current state of the sponge |
||||||
|
* @param rowIn Row used only as input |
||||||
|
* @param rowInOut Row used as input and to receive output after rotation |
||||||
|
* @param rowOut Row receiving the output |
||||||
|
* |
||||||
|
*/ |
||||||
|
void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { |
||||||
|
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
|
||||||
|
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
|
||||||
|
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
|
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < N_COLS; i++) { |
||||||
|
|
||||||
|
//Absorbing "M[prev] [+] M[row*]"
|
||||||
|
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); |
||||||
|
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); |
||||||
|
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); |
||||||
|
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); |
||||||
|
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); |
||||||
|
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); |
||||||
|
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); |
||||||
|
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); |
||||||
|
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); |
||||||
|
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); |
||||||
|
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); |
||||||
|
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); |
||||||
|
|
||||||
|
//Applies the reduced-round transformation f to the sponge's state
|
||||||
|
reducedBlake2bLyra(state); |
||||||
|
|
||||||
|
//M[rowOut][col] = M[rowOut][col] XOR rand
|
||||||
|
ptrWordOut[0] ^= state[0]; |
||||||
|
ptrWordOut[1] ^= state[1]; |
||||||
|
ptrWordOut[2] ^= state[2]; |
||||||
|
ptrWordOut[3] ^= state[3]; |
||||||
|
ptrWordOut[4] ^= state[4]; |
||||||
|
ptrWordOut[5] ^= state[5]; |
||||||
|
ptrWordOut[6] ^= state[6]; |
||||||
|
ptrWordOut[7] ^= state[7]; |
||||||
|
ptrWordOut[8] ^= state[8]; |
||||||
|
ptrWordOut[9] ^= state[9]; |
||||||
|
ptrWordOut[10] ^= state[10]; |
||||||
|
ptrWordOut[11] ^= state[11]; |
||||||
|
|
||||||
|
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
|
||||||
|
ptrWordInOut[0] ^= state[11]; |
||||||
|
ptrWordInOut[1] ^= state[0]; |
||||||
|
ptrWordInOut[2] ^= state[1]; |
||||||
|
ptrWordInOut[3] ^= state[2]; |
||||||
|
ptrWordInOut[4] ^= state[3]; |
||||||
|
ptrWordInOut[5] ^= state[4]; |
||||||
|
ptrWordInOut[6] ^= state[5]; |
||||||
|
ptrWordInOut[7] ^= state[6]; |
||||||
|
ptrWordInOut[8] ^= state[7]; |
||||||
|
ptrWordInOut[9] ^= state[8]; |
||||||
|
ptrWordInOut[10] ^= state[9]; |
||||||
|
ptrWordInOut[11] ^= state[10]; |
||||||
|
|
||||||
|
//Goes to next block
|
||||||
|
ptrWordOut += BLOCK_LEN_INT64; |
||||||
|
ptrWordInOut += BLOCK_LEN_INT64; |
||||||
|
ptrWordIn += BLOCK_LEN_INT64; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
Prints an array of unsigned chars |
||||||
|
*/ |
||||||
|
void printArrayO(unsigned char *array, unsigned int size, char *name) { |
||||||
|
int i; |
||||||
|
printf("%s: ", name); |
||||||
|
for (i = 0; i < size; i++) { |
||||||
|
printf("%2x|", array[i]); |
||||||
|
} |
||||||
|
printf("\n"); |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
@ -0,0 +1,98 @@ |
|||||||
|
/**
|
||||||
|
* Header file for Blake2b's internal permutation in the form of a sponge. |
||||||
|
* This code is based on the original Blake2b's implementation provided by |
||||||
|
* Samuel Neves (https://blake2.net/)
|
||||||
|
* |
||||||
|
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
|
||||||
|
* |
||||||
|
* This software is hereby placed in the public domain. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS |
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE |
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
||||||
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
#ifndef SPONGEOLD_H_ |
||||||
|
#define SPONGEOLD_H_ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#if defined(__GNUC__) |
||||||
|
#define ALIGN __attribute__ ((aligned(32))) |
||||||
|
#elif defined(_MSC_VER) |
||||||
|
#define ALIGN __declspec(align(32)) |
||||||
|
#else |
||||||
|
#define ALIGN |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
/*Blake2b IV Array*/ |
||||||
|
static const uint64_t blake2b_IV[8] = |
||||||
|
{ |
||||||
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, |
||||||
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, |
||||||
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, |
||||||
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL |
||||||
|
}; |
||||||
|
|
||||||
|
/*Blake2b's rotation*/ |
||||||
|
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ |
||||||
|
return ( w >> c ) | ( w << ( 64 - c ) ); |
||||||
|
} |
||||||
|
|
||||||
|
/*Blake2b's G function*/ |
||||||
|
#define G(r,i,a,b,c,d) \ |
||||||
|
do { \ |
||||||
|
a = a + b; \ |
||||||
|
d = rotr64(d ^ a, 32); \ |
||||||
|
c = c + d; \ |
||||||
|
b = rotr64(b ^ c, 24); \ |
||||||
|
a = a + b; \ |
||||||
|
d = rotr64(d ^ a, 16); \ |
||||||
|
c = c + d; \ |
||||||
|
b = rotr64(b ^ c, 63); \ |
||||||
|
} while(0) |
||||||
|
|
||||||
|
|
||||||
|
/*One Round of the Blake2b's compression function*/ |
||||||
|
#define ROUND_LYRA(r) \ |
||||||
|
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ |
||||||
|
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ |
||||||
|
G(r,2,v[ 2],v[ 6],v[10],v[14]); \ |
||||||
|
G(r,3,v[ 3],v[ 7],v[11],v[15]); \ |
||||||
|
G(r,4,v[ 0],v[ 5],v[10],v[15]); \ |
||||||
|
G(r,5,v[ 1],v[ 6],v[11],v[12]); \ |
||||||
|
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ |
||||||
|
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); |
||||||
|
|
||||||
|
|
||||||
|
//---- Housekeeping
|
||||||
|
void initStateO(uint64_t state[/*16*/]); |
||||||
|
|
||||||
|
//---- Squeezes
|
||||||
|
void squeezeO(uint64_t *state, unsigned char *out, unsigned int len); |
||||||
|
void reducedSqueezeRow0O(uint64_t* state, uint64_t* row); |
||||||
|
|
||||||
|
//---- Absorbs
|
||||||
|
void absorbBlockO(uint64_t *state, const uint64_t *in); |
||||||
|
void absorbBlockBlake2SafeO(uint64_t *state, const uint64_t *in); |
||||||
|
|
||||||
|
//---- Duplexes
|
||||||
|
void reducedDuplexRow1O(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); |
||||||
|
void reducedDuplexRowSetupO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); |
||||||
|
void reducedDuplexRowO(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); |
||||||
|
|
||||||
|
//---- Misc
|
||||||
|
void printArrayO(unsigned char *array, unsigned int size, char *name); |
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* SPONGE_H_ */ |
@ -0,0 +1,140 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2007-2009 Colin Percival |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
* |
||||||
|
* This file was originally written by Colin Percival as part of the Tarsnap |
||||||
|
* online backup system. |
||||||
|
*/ |
||||||
|
#ifndef _SYSENDIAN_H_ |
||||||
|
#define _SYSENDIAN_H_ |
||||||
|
|
||||||
|
/* If we don't have be64enc, the <sys/endian.h> we have isn't usable. */ |
||||||
|
#if !HAVE_DECL_BE64ENC |
||||||
|
#undef HAVE_SYS_ENDIAN_H |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef HAVE_SYS_ENDIAN_H |
||||||
|
|
||||||
|
#include <sys/endian.h> |
||||||
|
|
||||||
|
#else |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#if !HAVE_DECL_LE32DEC |
||||||
|
static uint32_t le32dec(const void *pp) |
||||||
|
{ |
||||||
|
const uint8_t *p = (uint8_t const *)pp; |
||||||
|
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + |
||||||
|
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !HAVE_DECL_BE32ENC |
||||||
|
static void be32enc(void *pp, uint32_t x) |
||||||
|
{ |
||||||
|
uint8_t *p = (uint8_t *)pp; |
||||||
|
p[3] = x & 0xff; |
||||||
|
p[2] = (x >> 8) & 0xff; |
||||||
|
p[1] = (x >> 16) & 0xff; |
||||||
|
p[0] = (x >> 24) & 0xff; |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !HAVE_DECL_BE32DEC |
||||||
|
static uint32_t be32dec(const void *pp) |
||||||
|
{ |
||||||
|
const uint8_t *p = (uint8_t const *)pp; |
||||||
|
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + |
||||||
|
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !HAVE_DECL_LE32ENC |
||||||
|
static void le32enc(void *pp, uint32_t x) |
||||||
|
{ |
||||||
|
uint8_t *p = (uint8_t *)pp; |
||||||
|
p[0] = x & 0xff; |
||||||
|
p[1] = (x >> 8) & 0xff; |
||||||
|
p[2] = (x >> 16) & 0xff; |
||||||
|
p[3] = (x >> 24) & 0xff; |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
static uint64_t |
||||||
|
be64dec(const void *pp) |
||||||
|
{ |
||||||
|
const uint8_t *p = (uint8_t const *)pp; |
||||||
|
|
||||||
|
return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) + |
||||||
|
((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) + |
||||||
|
((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) + |
||||||
|
((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56)); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
be64enc(void *pp, uint64_t x) |
||||||
|
{ |
||||||
|
uint8_t * p = (uint8_t *)pp; |
||||||
|
|
||||||
|
p[7] = x & 0xff; |
||||||
|
p[6] = (x >> 8) & 0xff; |
||||||
|
p[5] = (x >> 16) & 0xff; |
||||||
|
p[4] = (x >> 24) & 0xff; |
||||||
|
p[3] = (x >> 32) & 0xff; |
||||||
|
p[2] = (x >> 40) & 0xff; |
||||||
|
p[1] = (x >> 48) & 0xff; |
||||||
|
p[0] = (x >> 56) & 0xff; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static uint64_t |
||||||
|
le64dec(const void *pp) |
||||||
|
{ |
||||||
|
const uint8_t *p = (uint8_t const *)pp; |
||||||
|
|
||||||
|
return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) + |
||||||
|
((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) + |
||||||
|
((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) + |
||||||
|
((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56)); |
||||||
|
} |
||||||
|
|
||||||
|
static void |
||||||
|
le64enc(void *pp, uint64_t x) |
||||||
|
{ |
||||||
|
uint8_t * p = (uint8_t *)pp; |
||||||
|
|
||||||
|
p[0] = x & 0xff; |
||||||
|
p[1] = (x >> 8) & 0xff; |
||||||
|
p[2] = (x >> 16) & 0xff; |
||||||
|
p[3] = (x >> 24) & 0xff; |
||||||
|
p[4] = (x >> 32) & 0xff; |
||||||
|
p[5] = (x >> 40) & 0xff; |
||||||
|
p[6] = (x >> 48) & 0xff; |
||||||
|
p[7] = (x >> 56) & 0xff; |
||||||
|
} |
||||||
|
#endif /* !HAVE_SYS_ENDIAN_H */ |
||||||
|
|
||||||
|
#endif /* !_SYSENDIAN_H_ */ |
@ -0,0 +1,128 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2015 djm34 |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "miner.h" |
||||||
|
|
||||||
|
#include <stdlib.h> |
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "algorithm/yescrypt_core.h" |
||||||
|
|
||||||
|
static const uint32_t diff1targ = 0x0000ffff; |
||||||
|
|
||||||
|
static inline void |
||||||
|
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) |
||||||
|
{ |
||||||
|
uint32_t i; |
||||||
|
|
||||||
|
for (i = 0; i < len; i++) |
||||||
|
dst[i] = htobe32(src[i]); |
||||||
|
} |
||||||
|
|
||||||
|
/* Used externally as confirmation of correct OCL code */ |
||||||
|
int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) |
||||||
|
{ |
||||||
|
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
uint32_t data[20], ohash[8]; |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19); |
||||||
|
data[19] = htobe32(nonce); |
||||||
|
yescrypt_hash((unsigned char*)data,(unsigned char*)ohash); |
||||||
|
|
||||||
|
tmp_hash7 = be32toh(ohash[7]); |
||||||
|
|
||||||
|
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", |
||||||
|
(long unsigned int)Htarg, |
||||||
|
(long unsigned int)diff1targ, |
||||||
|
(long unsigned int)tmp_hash7); |
||||||
|
|
||||||
|
if (tmp_hash7 > diff1targ) |
||||||
|
return -1; |
||||||
|
|
||||||
|
if (tmp_hash7 > Htarg) |
||||||
|
return 0; |
||||||
|
|
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
void yescrypt_regenhash(struct work *work) |
||||||
|
{ |
||||||
|
uint32_t data[20]; |
||||||
|
uint32_t *nonce = (uint32_t *)(work->data + 76); |
||||||
|
uint32_t *ohash = (uint32_t *)(work->hash); |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)work->data, 19); |
||||||
|
data[19] = htobe32(*nonce); |
||||||
|
|
||||||
|
yescrypt_hash((unsigned char*)data, (unsigned char*)ohash); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, |
||||||
|
unsigned char *pdata, unsigned char __maybe_unused *phash1, |
||||||
|
unsigned char __maybe_unused *phash, const unsigned char *ptarget, |
||||||
|
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) |
||||||
|
{ |
||||||
|
uint32_t *nonce = (uint32_t *)(pdata + 76); |
||||||
|
uint32_t data[20]; |
||||||
|
uint32_t tmp_hash7; |
||||||
|
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); |
||||||
|
bool ret = false; |
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19); |
||||||
|
|
||||||
|
while (1) |
||||||
|
{ |
||||||
|
uint32_t ostate[8]; |
||||||
|
|
||||||
|
*nonce = ++n; |
||||||
|
data[19] = (n); |
||||||
|
|
||||||
|
yescrypt_hash((unsigned char*)data, (unsigned char*)ostate); |
||||||
|
tmp_hash7 = (ostate[7]); |
||||||
|
|
||||||
|
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); |
||||||
|
|
||||||
|
if (unlikely(tmp_hash7 <= Htarg)) |
||||||
|
{ |
||||||
|
((uint32_t *)pdata)[19] = htobe32(n); |
||||||
|
*last_nonce = n; |
||||||
|
ret = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (unlikely((n >= max_nonce) || thr->work_restart)) |
||||||
|
{ |
||||||
|
*last_nonce = n; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return ret; |
||||||
|
} |
@ -0,0 +1,10 @@ |
|||||||
|
#ifndef YESCRYPT_H |
||||||
|
#define YESCRYPT_H |
||||||
|
|
||||||
|
#include "miner.h" |
||||||
|
#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar
|
||||||
|
#define YESCRYP_SECBUF_SIZE (128*64*8) |
||||||
|
extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); |
||||||
|
extern void yescrypt_regenhash(struct work *work); |
||||||
|
|
||||||
|
#endif /* YESCRYPT_H */ |
@ -0,0 +1,376 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2009 Colin Percival |
||||||
|
* Copyright 2013,2014 Alexander Peslyak |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
* |
||||||
|
* This file was originally written by Colin Percival as part of the Tarsnap |
||||||
|
* online backup system. |
||||||
|
*/ |
||||||
|
#ifndef _YESCRYPT_H_ |
||||||
|
#define _YESCRYPT_H_ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <stdlib.h> /* for size_t */ |
||||||
|
#include <errno.h> |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" { |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output);
|
||||||
|
extern void yescrypt_hash(const unsigned char *input, unsigned char *output); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): |
||||||
|
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, |
||||||
|
* p, buflen) and write the result into buf. The parameters r, p, and buflen |
||||||
|
* must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N |
||||||
|
* must be a power of 2 greater than 1. |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* MT-safe as long as buf is local to the thread. |
||||||
|
*/ |
||||||
|
extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen, |
||||||
|
const uint8_t * __salt, size_t __saltlen, |
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p, |
||||||
|
uint8_t * __buf, size_t __buflen); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal type used by the memory allocator. Please do not use it directly. |
||||||
|
* Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since |
||||||
|
* they might differ from each other in a future version. |
||||||
|
*/ |
||||||
|
typedef struct { |
||||||
|
void * base, * aligned; |
||||||
|
size_t base_size, aligned_size; |
||||||
|
} yescrypt_region_t; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Types for shared (ROM) and thread-local (RAM) data structures. |
||||||
|
*/ |
||||||
|
typedef yescrypt_region_t yescrypt_shared1_t; |
||||||
|
typedef struct { |
||||||
|
yescrypt_shared1_t shared1; |
||||||
|
uint32_t mask1; |
||||||
|
} yescrypt_shared_t; |
||||||
|
typedef yescrypt_region_t yescrypt_local_t; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible values for yescrypt_init_shared()'s flags argument. |
||||||
|
*/ |
||||||
|
typedef enum { |
||||||
|
YESCRYPT_SHARED_DEFAULTS = 0, |
||||||
|
YESCRYPT_SHARED_PREALLOCATED = 0x100 |
||||||
|
} yescrypt_init_shared_flags_t; |
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible values for the flags argument of yescrypt_kdf(), |
||||||
|
* yescrypt_gensalt_r(), yescrypt_gensalt(). These may be OR'ed together, |
||||||
|
* except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive. |
||||||
|
* Please refer to the description of yescrypt_kdf() below for the meaning of |
||||||
|
* these flags. |
||||||
|
*/ |
||||||
|
typedef enum { |
||||||
|
/* public */ |
||||||
|
YESCRYPT_WORM = 0, |
||||||
|
YESCRYPT_RW = 1, |
||||||
|
YESCRYPT_PARALLEL_SMIX = 2, |
||||||
|
YESCRYPT_PWXFORM = 4, |
||||||
|
/* private */ |
||||||
|
__YESCRYPT_INIT_SHARED_1 = 0x10000, |
||||||
|
__YESCRYPT_INIT_SHARED_2 = 0x20000, |
||||||
|
__YESCRYPT_INIT_SHARED = 0x30000 |
||||||
|
} yescrypt_flags_t; |
||||||
|
|
||||||
|
#define YESCRYPT_KNOWN_FLAGS \ |
||||||
|
(YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \ |
||||||
|
__YESCRYPT_INIT_SHARED) |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask, |
||||||
|
* buf, buflen): |
||||||
|
* Optionally allocate memory for and initialize the shared (ROM) data |
||||||
|
* structure. The parameters N, r, and p must satisfy the same conditions as |
||||||
|
* with crypto_scrypt(). param and paramlen specify a local parameter with |
||||||
|
* which the ROM is seeded. If buf is not NULL, then it is used to return |
||||||
|
* buflen bytes of message digest for the initialized ROM (the caller may use |
||||||
|
* this to verify that the ROM has been computed in the same way that it was on |
||||||
|
* a previous run). |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the |
||||||
|
* ROM is assumed to have been preallocated by the caller, with |
||||||
|
* shared->shared1.aligned being the start address of the ROM and |
||||||
|
* shared->shared1.aligned_size being its size (which must be consistent with |
||||||
|
* N, r, and p). This may be used e.g. when the ROM is to be placed in a SysV |
||||||
|
* shared memory segment allocated by the caller. |
||||||
|
* |
||||||
|
* mask controls the frequency of ROM accesses by yescrypt_kdf(). Normally it |
||||||
|
* should be set to 1, to interleave RAM and ROM accesses, which works well |
||||||
|
* when both regions reside in the machine's RAM anyway. Other values may be |
||||||
|
* used e.g. when the ROM is memory-mapped from a disk file. Recommended mask |
||||||
|
* values are powers of 2 minus 1 or minus 2. Here's the effect of some mask |
||||||
|
* values: |
||||||
|
* mask value ROM accesses in SMix 1st loop ROM accesses in SMix 2nd loop |
||||||
|
* 0 0 1/2 |
||||||
|
* 1 1/2 1/2 |
||||||
|
* 2 0 1/4 |
||||||
|
* 3 1/4 1/4 |
||||||
|
* 6 0 1/8 |
||||||
|
* 7 1/8 1/8 |
||||||
|
* 14 0 1/16 |
||||||
|
* 15 1/16 1/16 |
||||||
|
* 1022 0 1/1024 |
||||||
|
* 1023 1/1024 1/1024 |
||||||
|
* |
||||||
|
* Actual computation of the ROM contents may be avoided, if you don't intend |
||||||
|
* to use a ROM but need a dummy shared structure, by calling this function |
||||||
|
* with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the |
||||||
|
* arguments starting with param and on. |
||||||
|
* |
||||||
|
* MT-safe as long as shared is local to the thread. |
||||||
|
*/ |
||||||
|
extern int yescrypt_init_shared(yescrypt_shared_t * __shared, |
||||||
|
const uint8_t * __param, size_t __paramlen, |
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p, |
||||||
|
yescrypt_init_shared_flags_t __flags, uint32_t __mask, |
||||||
|
uint8_t * __buf, size_t __buflen); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_free_shared(shared): |
||||||
|
* Free memory that had been allocated with yescrypt_init_shared(). |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* MT-safe as long as shared is local to the thread. |
||||||
|
*/ |
||||||
|
extern int yescrypt_free_shared(yescrypt_shared_t * __shared); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_init_local(local): |
||||||
|
* Initialize the thread-local (RAM) data structure. Actual memory allocation |
||||||
|
* is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* MT-safe as long as local is local to the thread. |
||||||
|
*/ |
||||||
|
extern int yescrypt_init_local(yescrypt_local_t * __local); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_free_local(local): |
||||||
|
* Free memory that may have been allocated for an initialized thread-local |
||||||
|
* (RAM) data structure. |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* MT-safe as long as local is local to the thread. |
||||||
|
*/ |
||||||
|
extern int yescrypt_free_local(yescrypt_local_t * __local); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, |
||||||
|
* N, r, p, t, flags, buf, buflen): |
||||||
|
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, |
||||||
|
* p, buflen), or a revision of scrypt as requested by flags and shared, and |
||||||
|
* write the result into buf. The parameters N, r, p, and buflen must satisfy |
||||||
|
* the same conditions as with crypto_scrypt(). t controls computation time |
||||||
|
* while not affecting peak memory usage. shared and flags may request |
||||||
|
* special modes as described below. local is the thread-local data |
||||||
|
* structure, allowing to preserve and reuse a memory allocation across calls, |
||||||
|
* thereby reducing its overhead. |
||||||
|
* |
||||||
|
* Return 0 on success; or -1 on error. |
||||||
|
* |
||||||
|
* t controls computation time. t = 0 is optimal in terms of achieving the |
||||||
|
* highest area-time for ASIC attackers. Thus, higher computation time, if |
||||||
|
* affordable, is best achieved by increasing N rather than by increasing t. |
||||||
|
* However, if the higher memory usage (which goes along with higher N) is not |
||||||
|
* affordable, or if fine-tuning of the time is needed (recall that N must be a |
||||||
|
* power of 2), then t = 1 or above may be used to increase time while staying |
||||||
|
* at the same peak memory usage. t = 1 increases the time by 25% and |
||||||
|
* decreases the normalized area-time to 96% of optimal. (Of course, in |
||||||
|
* absolute terms the area-time increases with higher t. It's just that it |
||||||
|
* would increase slightly more with higher N*r rather than with higher t.) |
||||||
|
* t = 2 increases the time by another 20% and decreases the normalized |
||||||
|
* area-time to 89% of optimal. Thus, these two values are reasonable to use |
||||||
|
* for fine-tuning. Values of t higher than 2 result in further increase in |
||||||
|
* time while reducing the efficiency much further (e.g., down to around 50% of |
||||||
|
* optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact |
||||||
|
* numbers varying by the flags settings). |
||||||
|
* |
||||||
|
* Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and |
||||||
|
* passing a dummy shared structure (see the description of |
||||||
|
* yescrypt_init_shared() above for how to produce one). In this mode, the |
||||||
|
* thread-local memory region (RAM) is first sequentially written to and then |
||||||
|
* randomly read from. This algorithm is friendly towards time-memory |
||||||
|
* tradeoffs (TMTO), available both to defenders (albeit not in this |
||||||
|
* implementation) and to attackers. |
||||||
|
* |
||||||
|
* Setting YESCRYPT_RW adds extra random reads and writes to the thread-local |
||||||
|
* memory region (RAM), which makes TMTO a lot less efficient. This may be |
||||||
|
* used to slow down the kinds of attackers who would otherwise benefit from |
||||||
|
* classic scrypt's efficient TMTO. Since classic scrypt's TMTO allows not |
||||||
|
* only for the tradeoff, but also for a decrease of attacker's area-time (by |
||||||
|
* up to a constant factor), setting YESCRYPT_RW substantially increases the |
||||||
|
* cost of attacks in area-time terms as well. Yet another benefit of it is |
||||||
|
* that optimal area-time is reached at an earlier time than with classic |
||||||
|
* scrypt, and t = 0 actually corresponds to this earlier completion time, |
||||||
|
* resulting in quicker hash computations (and thus in higher request rate |
||||||
|
* capacity). Due to these properties, YESCRYPT_RW should almost always be |
||||||
|
* set, except when compatibility with classic scrypt or TMTO-friendliness are |
||||||
|
* desired. |
||||||
|
* |
||||||
|
* YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a |
||||||
|
* lower level as compared to where it is in classic scrypt. This reduces |
||||||
|
* flexibility for efficient computation (for both attackers and defenders) by |
||||||
|
* requiring that, short of resorting to TMTO, the full amount of memory be |
||||||
|
* allocated as needed for the specified p, regardless of whether that |
||||||
|
* parallelism is actually being fully made use of or not. (For comparison, a |
||||||
|
* single instance of classic scrypt may be computed in less memory without any |
||||||
|
* CPU time overhead, but in more real time, by not making full use of the |
||||||
|
* parallelism.) This may be desirable when the defender has enough memory |
||||||
|
* with sufficiently low latency and high bandwidth for efficient full parallel |
||||||
|
* execution, yet the required memory size is high enough that some likely |
||||||
|
* attackers might end up being forced to choose between using higher latency |
||||||
|
* memory than they could use otherwise (waiting for data longer) or using TMTO |
||||||
|
* (waiting for data more times per one hash computation). The area-time cost |
||||||
|
* for other kinds of attackers (who would use the same memory type and TMTO |
||||||
|
* factor or no TMTO either way) remains roughly the same, given the same |
||||||
|
* running time for the defender. In the TMTO-friendly YESCRYPT_WORM mode, as |
||||||
|
* long as the defender has enough memory that is just as fast as the smaller |
||||||
|
* per-thread regions would be, doesn't expect to ever need greater |
||||||
|
* flexibility (except possibly via TMTO), and doesn't need backwards |
||||||
|
* compatibility with classic scrypt, there are no other serious drawbacks to |
||||||
|
* this setting. In the YESCRYPT_RW mode, which is meant to discourage TMTO, |
||||||
|
* this new approach to parallelization makes TMTO less inefficient. (This is |
||||||
|
* an unfortunate side-effect of avoiding some random writes, as we have to in |
||||||
|
* order to allow for parallel threads to access a common memory region without |
||||||
|
* synchronization overhead.) Thus, in this mode this setting poses an extra |
||||||
|
* tradeoff of its own (higher area-time cost for a subset of attackers vs. |
||||||
|
* better TMTO resistance). Setting YESCRYPT_PARALLEL_SMIX also changes the |
||||||
|
* way the running time is to be controlled from N*r*p (for classic scrypt) to |
||||||
|
* N*r (in this modification). All of this applies only when p > 1. For |
||||||
|
* p = 1, this setting is a no-op. |
||||||
|
* |
||||||
|
* Passing a real shared structure, with ROM contents previously computed by |
||||||
|
* yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for |
||||||
|
* the thread-local RAM region. In order to allow for initialization of the |
||||||
|
* ROM to be split into a separate program, the shared->shared1.aligned and |
||||||
|
* shared->shared1.aligned_size fields may be set by the caller of |
||||||
|
* yescrypt_kdf() manually rather than with yescrypt_init_shared(). |
||||||
|
* |
||||||
|
* local must be initialized with yescrypt_init_local(). |
||||||
|
* |
||||||
|
* MT-safe as long as local and buf are local to the thread. |
||||||
|
*/ |
||||||
|
extern int yescrypt_kdf(const yescrypt_shared_t * __shared, |
||||||
|
yescrypt_local_t * __local, |
||||||
|
const uint8_t * __passwd, size_t __passwdlen, |
||||||
|
const uint8_t * __salt, size_t __saltlen, |
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t, |
||||||
|
yescrypt_flags_t __flags, |
||||||
|
uint8_t * __buf, size_t __buflen); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen): |
||||||
|
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the |
||||||
|
* parameters and salt value encoded in setting. If the shared structure is |
||||||
|
* not dummy, a ROM is used and YESCRYPT_RW is required. Otherwise, whether to |
||||||
|
* use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff |
||||||
|
* discouraging modification) is determined by the setting string. shared and |
||||||
|
* local must be initialized as described above for yescrypt_kdf(). buf must |
||||||
|
* be large enough (as indicated by buflen) to hold the encoded hash string. |
||||||
|
* |
||||||
|
* Return the encoded hash string on success; or NULL on error. |
||||||
|
* |
||||||
|
* MT-safe as long as local and buf are local to the thread. |
||||||
|
*/ |
||||||
|
extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared, |
||||||
|
yescrypt_local_t * __local, |
||||||
|
const uint8_t * __passwd, size_t __passwdlen, |
||||||
|
const uint8_t * __setting, |
||||||
|
uint8_t * __buf, size_t __buflen); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt(passwd, setting): |
||||||
|
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the |
||||||
|
* parameters and salt value encoded in setting. Whether to use the |
||||||
|
* YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff |
||||||
|
* discouraging modification) is determined by the setting string. |
||||||
|
* |
||||||
|
* Return the encoded hash string on success; or NULL on error. |
||||||
|
* |
||||||
|
* This is a crypt(3)-like interface, which is simpler to use than |
||||||
|
* yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, |
||||||
|
* and it is slower than yescrypt_r() for repeated calls because it allocates |
||||||
|
* and frees memory on each call. |
||||||
|
* |
||||||
|
* MT-unsafe. |
||||||
|
*/ |
||||||
|
extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen): |
||||||
|
* Generate a setting string for use with yescrypt_r() and yescrypt() by |
||||||
|
* encoding into it the parameters N_log2 (which is to be set to base 2 |
||||||
|
* logarithm of the desired value for N), r, p, flags, and a salt given by src |
||||||
|
* (of srclen bytes). buf must be large enough (as indicated by buflen) to |
||||||
|
* hold the setting string. |
||||||
|
* |
||||||
|
* Return the setting string on success; or NULL on error. |
||||||
|
* |
||||||
|
* MT-safe as long as buf is local to the thread. |
||||||
|
*/ |
||||||
|
extern uint8_t * yescrypt_gensalt_r( |
||||||
|
uint32_t __N_log2, uint32_t __r, uint32_t __p, |
||||||
|
yescrypt_flags_t __flags, |
||||||
|
const uint8_t * __src, size_t __srclen, |
||||||
|
uint8_t * __buf, size_t __buflen); |
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_gensalt(N_log2, r, p, flags, src, srclen): |
||||||
|
* Generate a setting string for use with yescrypt_r() and yescrypt(). This |
||||||
|
* function is the same as yescrypt_gensalt_r() except that it uses a static |
||||||
|
* buffer and thus is not MT-safe. |
||||||
|
* |
||||||
|
* Return the setting string on success; or NULL on error. |
||||||
|
* |
||||||
|
* MT-unsafe. |
||||||
|
*/ |
||||||
|
extern uint8_t * yescrypt_gensalt( |
||||||
|
uint32_t __N_log2, uint32_t __r, uint32_t __p, |
||||||
|
yescrypt_flags_t __flags, |
||||||
|
const uint8_t * __src, size_t __srclen); |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* !_YESCRYPT_H_ */ |
@ -0,0 +1,360 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2013,2014 Alexander Peslyak |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
#include <stdio.h> |
||||||
|
#include "algorithm/yescrypt_core.h" |
||||||
|
|
||||||
|
#define BYTES2CHARS(bytes) \ |
||||||
|
((((bytes) * 8) + 5) / 6) |
||||||
|
|
||||||
|
#define HASH_SIZE 32 /* bytes */ |
||||||
|
#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */ |
||||||
|
#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM) |
||||||
|
static const char * const itoa64 = |
||||||
|
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
||||||
|
|
||||||
|
static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen, |
||||||
|
uint32_t src, uint32_t srcbits) |
||||||
|
{ |
||||||
|
uint32_t bit; |
||||||
|
|
||||||
|
for (bit = 0; bit < srcbits; bit += 6) { |
||||||
|
if (dstlen < 1) |
||||||
|
return NULL; |
||||||
|
*dst++ = itoa64[src & 0x3f]; |
||||||
|
dstlen--; |
||||||
|
src >>= 6; |
||||||
|
} |
||||||
|
|
||||||
|
return dst; |
||||||
|
} |
||||||
|
|
||||||
|
static uint8_t * encode64(uint8_t * dst, size_t dstlen, |
||||||
|
const uint8_t * src, size_t srclen) |
||||||
|
{ |
||||||
|
size_t i; |
||||||
|
|
||||||
|
for (i = 0; i < srclen; ) { |
||||||
|
uint8_t * dnext; |
||||||
|
uint32_t value = 0, bits = 0; |
||||||
|
do { |
||||||
|
value |= (uint32_t)src[i++] << bits; |
||||||
|
bits += 8; |
||||||
|
} while (bits < 24 && i < srclen); |
||||||
|
dnext = encode64_uint32(dst, dstlen, value, bits); |
||||||
|
if (!dnext) |
||||||
|
return NULL; |
||||||
|
dstlen -= dnext - dst; |
||||||
|
dst = dnext; |
||||||
|
} |
||||||
|
|
||||||
|
return dst; |
||||||
|
} |
||||||
|
|
||||||
|
static int decode64_one(uint32_t * dst, uint8_t src) |
||||||
|
{ |
||||||
|
const char * ptr = strchr(itoa64, src); |
||||||
|
if (ptr) { |
||||||
|
*dst = ptr - itoa64; |
||||||
|
return 0; |
||||||
|
} |
||||||
|
*dst = 0; |
||||||
|
return -1; |
||||||
|
} |
||||||
|
|
||||||
|
static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits, |
||||||
|
const uint8_t * src) |
||||||
|
{ |
||||||
|
uint32_t bit; |
||||||
|
uint32_t value; |
||||||
|
|
||||||
|
value = 0; |
||||||
|
for (bit = 0; bit < dstbits; bit += 6) { |
||||||
|
uint32_t one; |
||||||
|
if (decode64_one(&one, *src)) { |
||||||
|
*dst = 0; |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
src++; |
||||||
|
value |= one << bit; |
||||||
|
} |
||||||
|
|
||||||
|
*dst = value; |
||||||
|
return src; |
||||||
|
} |
||||||
|
|
||||||
|
uint8_t * |
||||||
|
yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local, |
||||||
|
const uint8_t * passwd, size_t passwdlen, |
||||||
|
const uint8_t * setting, |
||||||
|
uint8_t * buf, size_t buflen) |
||||||
|
{ |
||||||
|
uint8_t hash[HASH_SIZE]; |
||||||
|
const uint8_t * src, * salt; |
||||||
|
uint8_t * dst; |
||||||
|
size_t prefixlen, saltlen, need; |
||||||
|
uint8_t version; |
||||||
|
uint64_t N; |
||||||
|
uint32_t r, p; |
||||||
|
yescrypt_flags_t flags = YESCRYPT_WORM; |
||||||
|
fflush(stdout); |
||||||
|
if (setting[0] != '$' || setting[1] != '7') |
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
fflush(stdout); |
||||||
|
src = setting + 2; |
||||||
|
fflush(stdout); |
||||||
|
switch ((version = *src)) { |
||||||
|
case '$': |
||||||
|
fflush(stdout); |
||||||
|
break; |
||||||
|
case 'X': |
||||||
|
src++; |
||||||
|
flags = YESCRYPT_RW; |
||||||
|
fflush(stdout); |
||||||
|
break; |
||||||
|
default: |
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fflush(stdout); |
||||||
|
if (*src != '$') { |
||||||
|
uint32_t decoded_flags; |
||||||
|
if (decode64_one(&decoded_flags, *src)) |
||||||
|
|
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
flags = decoded_flags; |
||||||
|
if (*++src != '$') |
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
} |
||||||
|
src++; |
||||||
|
|
||||||
|
{ |
||||||
|
uint32_t N_log2; |
||||||
|
if (decode64_one(&N_log2, *src)) |
||||||
|
{ |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
src++; |
||||||
|
N = (uint64_t)1 << N_log2; |
||||||
|
} |
||||||
|
|
||||||
|
src = decode64_uint32(&r, 30, src); |
||||||
|
if (!src) |
||||||
|
{ |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
src = decode64_uint32(&p, 30, src); |
||||||
|
if (!src) |
||||||
|
{ |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
prefixlen = src - setting; |
||||||
|
|
||||||
|
salt = src; |
||||||
|
src = (uint8_t *)strrchr((char *)salt, '$'); |
||||||
|
if (src) |
||||||
|
saltlen = src - salt; |
||||||
|
else |
||||||
|
saltlen = strlen((char *)salt); |
||||||
|
|
||||||
|
need = prefixlen + saltlen + 1 + HASH_LEN + 1; |
||||||
|
if (need > buflen || need < saltlen) |
||||||
|
|
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
fflush(stdout); |
||||||
|
if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, |
||||||
|
N, r, p, 0, flags, hash, sizeof(hash))) |
||||||
|
{ |
||||||
|
fflush(stdout); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
dst = buf; |
||||||
|
memcpy(dst, setting, prefixlen + saltlen); |
||||||
|
dst += prefixlen + saltlen; |
||||||
|
*dst++ = '$'; |
||||||
|
|
||||||
|
dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash)); |
||||||
|
/* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its
|
||||||
|
* memory allocations yet anyway. */ |
||||||
|
if (!dst || dst >= buf + buflen) /* Can't happen */ |
||||||
|
{ |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
|
||||||
|
*dst = 0; /* NUL termination */ |
||||||
|
fflush(stdout); |
||||||
|
return buf; |
||||||
|
} |
||||||
|
|
||||||
|
uint8_t * |
||||||
|
yescrypt(const uint8_t * passwd, const uint8_t * setting) |
||||||
|
{ |
||||||
|
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1]; |
||||||
|
yescrypt_shared_t shared; |
||||||
|
yescrypt_local_t local; |
||||||
|
uint8_t * retval; |
||||||
|
if (yescrypt_init_shared(&shared, NULL, 0, |
||||||
|
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) |
||||||
|
return NULL; |
||||||
|
if (yescrypt_init_local(&local)) { |
||||||
|
yescrypt_free_shared(&shared); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
retval = yescrypt_r(&shared, &local, |
||||||
|
passwd, 80, setting, buf, sizeof(buf)); |
||||||
|
// printf("hashse='%s'\n", (char *)retval);
|
||||||
|
if (yescrypt_free_local(&local)) { |
||||||
|
yescrypt_free_shared(&shared); |
||||||
|
return NULL; |
||||||
|
} |
||||||
|
if (yescrypt_free_shared(&shared)) |
||||||
|
return NULL; |
||||||
|
return retval; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
uint8_t * |
||||||
|
yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p, |
||||||
|
yescrypt_flags_t flags, |
||||||
|
const uint8_t * src, size_t srclen, |
||||||
|
uint8_t * buf, size_t buflen) |
||||||
|
{ |
||||||
|
uint8_t * dst; |
||||||
|
size_t prefixlen = 3 + 1 + 5 + 5; |
||||||
|
size_t saltlen = BYTES2CHARS(srclen); |
||||||
|
size_t need; |
||||||
|
|
||||||
|
if (p == 1) |
||||||
|
flags &= ~YESCRYPT_PARALLEL_SMIX; |
||||||
|
|
||||||
|
if (flags) { |
||||||
|
if (flags & ~0x3f) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
prefixlen++; |
||||||
|
if (flags != YESCRYPT_RW) |
||||||
|
prefixlen++; |
||||||
|
} |
||||||
|
|
||||||
|
need = prefixlen + saltlen + 1; |
||||||
|
if (need > buflen || need < saltlen || saltlen < srclen) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30))) |
||||||
|
return NULL; |
||||||
|
|
||||||
|
dst = buf; |
||||||
|
*dst++ = '$'; |
||||||
|
*dst++ = '7'; |
||||||
|
if (flags) { |
||||||
|
*dst++ = 'X'; /* eXperimental, subject to change */ |
||||||
|
if (flags != YESCRYPT_RW) |
||||||
|
*dst++ = itoa64[flags]; |
||||||
|
} |
||||||
|
*dst++ = '$'; |
||||||
|
|
||||||
|
*dst++ = itoa64[N_log2]; |
||||||
|
|
||||||
|
dst = encode64_uint32(dst, buflen - (dst - buf), r, 30); |
||||||
|
if (!dst) /* Can't happen */ |
||||||
|
return NULL; |
||||||
|
|
||||||
|
dst = encode64_uint32(dst, buflen - (dst - buf), p, 30); |
||||||
|
if (!dst) /* Can't happen */ |
||||||
|
return NULL; |
||||||
|
|
||||||
|
dst = encode64(dst, buflen - (dst - buf), src, srclen); |
||||||
|
if (!dst || dst >= buf + buflen) /* Can't happen */ |
||||||
|
return NULL; |
||||||
|
|
||||||
|
*dst = 0; /* NUL termination */ |
||||||
|
|
||||||
|
return buf; |
||||||
|
} |
||||||
|
|
||||||
|
uint8_t * |
||||||
|
yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, |
||||||
|
yescrypt_flags_t flags, |
||||||
|
const uint8_t * src, size_t srclen) |
||||||
|
{ |
||||||
|
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1]; |
||||||
|
return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, |
||||||
|
buf, sizeof(buf)); |
||||||
|
} |
||||||
|
|
||||||
|
static int |
||||||
|
yescrypt_bsty(const uint8_t * passwd, size_t passwdlen, |
||||||
|
const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, |
||||||
|
uint8_t * buf, size_t buflen) |
||||||
|
{ |
||||||
|
static __thread int initialized = 0; |
||||||
|
static __thread yescrypt_shared_t shared; |
||||||
|
static __thread yescrypt_local_t local; |
||||||
|
|
||||||
|
// static __declspec(thread) int initialized = 0;
|
||||||
|
// static __declspec(thread) yescrypt_shared_t shared;
|
||||||
|
// static __declspec(thread) yescrypt_local_t local;
|
||||||
|
|
||||||
|
int retval; |
||||||
|
if (!initialized) { |
||||||
|
/* "shared" could in fact be shared, but it's simpler to keep it private
|
||||||
|
* along with "local". It's dummy and tiny anyway. */ |
||||||
|
if (yescrypt_init_shared(&shared, NULL, 0, |
||||||
|
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) |
||||||
|
return -1; |
||||||
|
if (yescrypt_init_local(&local)) { |
||||||
|
yescrypt_free_shared(&shared); |
||||||
|
return -1; |
||||||
|
} |
||||||
|
initialized = 1; |
||||||
|
} |
||||||
|
retval = yescrypt_kdf(&shared, &local, |
||||||
|
passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS, |
||||||
|
buf, buflen); |
||||||
|
|
||||||
|
return retval; |
||||||
|
} |
||||||
|
|
||||||
|
void yescrypt_hash(const unsigned char *input, unsigned char *output) |
||||||
|
{ |
||||||
|
|
||||||
|
yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32); |
||||||
|
} |
@ -0,0 +1,162 @@ |
|||||||
|
/* |
||||||
|
* bmw256 kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* Copyright (c) 2015 djm34 |
||||||
|
* |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define shl(x, n) ((x) << (n)) |
||||||
|
#define shr(x, n) ((x) >> (n)) |
||||||
|
//#define SHR(x, n) SHR2(x, n) |
||||||
|
//#define SHL(x, n) SHL2(x, n) |
||||||
|
|
||||||
|
|
||||||
|
#define SPH_ROTL32(x,n) rotate(x,(uint)n) |
||||||
|
#define ss0(x) (shr((x), 1) ^ shl((x), 3) ^ SPH_ROTL32((x), 4) ^ SPH_ROTL32((x), 19)) |
||||||
|
#define ss1(x) (shr((x), 1) ^ shl((x), 2) ^ SPH_ROTL32((x), 8) ^ SPH_ROTL32((x), 23)) |
||||||
|
#define ss2(x) (shr((x), 2) ^ shl((x), 1) ^ SPH_ROTL32((x), 12) ^ SPH_ROTL32((x), 25)) |
||||||
|
#define ss3(x) (shr((x), 2) ^ shl((x), 2) ^ SPH_ROTL32((x), 15) ^ SPH_ROTL32((x), 29)) |
||||||
|
#define ss4(x) (shr((x), 1) ^ (x)) |
||||||
|
#define ss5(x) (shr((x), 2) ^ (x)) |
||||||
|
#define rs1(x) SPH_ROTL32((x), 3) |
||||||
|
#define rs2(x) SPH_ROTL32((x), 7) |
||||||
|
#define rs3(x) SPH_ROTL32((x), 13) |
||||||
|
#define rs4(x) SPH_ROTL32((x), 16) |
||||||
|
#define rs5(x) SPH_ROTL32((x), 19) |
||||||
|
#define rs6(x) SPH_ROTL32((x), 23) |
||||||
|
#define rs7(x) SPH_ROTL32((x), 27) |
||||||
|
|
||||||
|
/* Message expansion function 1 */ |
||||||
|
uint expand32_1(int i, uint *M32, uint *H, uint *Q) |
||||||
|
{ |
||||||
|
|
||||||
|
return (ss1(Q[i - 16]) + ss2(Q[i - 15]) + ss3(Q[i - 14]) + ss0(Q[i - 13]) |
||||||
|
+ ss1(Q[i - 12]) + ss2(Q[i - 11]) + ss3(Q[i - 10]) + ss0(Q[i - 9]) |
||||||
|
+ ss1(Q[i - 8]) + ss2(Q[i - 7]) + ss3(Q[i - 6]) + ss0(Q[i - 5]) |
||||||
|
+ ss1(Q[i - 4]) + ss2(Q[i - 3]) + ss3(Q[i - 2]) + ss0(Q[i - 1]) |
||||||
|
+ ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16])); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/* Message expansion function 2 */ |
||||||
|
uint expand32_2(int i, uint *M32, uint *H, uint *Q) |
||||||
|
{ |
||||||
|
|
||||||
|
return (Q[i - 16] + rs1(Q[i - 15]) + Q[i - 14] + rs2(Q[i - 13]) |
||||||
|
+ Q[i - 12] + rs3(Q[i - 11]) + Q[i - 10] + rs4(Q[i - 9]) |
||||||
|
+ Q[i - 8] + rs5(Q[i - 7]) + Q[i - 6] + rs6(Q[i - 5]) |
||||||
|
+ Q[i - 4] + rs7(Q[i - 3]) + ss4(Q[i - 2]) + ss5(Q[i - 1]) |
||||||
|
+ ((i*(0x05555555ul) + SPH_ROTL32(M32[(i - 16) % 16], ((i - 16) % 16) + 1) + SPH_ROTL32(M32[(i - 13) % 16], ((i - 13) % 16) + 1) - SPH_ROTL32(M32[(i - 6) % 16], ((i - 6) % 16) + 1)) ^ H[(i - 16 + 7) % 16])); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
void Compression256(uint *M32, uint *H) |
||||||
|
{ |
||||||
|
|
||||||
|
int i; |
||||||
|
uint XL32, XH32, Q[32]; |
||||||
|
|
||||||
|
|
||||||
|
Q[0] = (M32[5] ^ H[5]) - (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[13] ^ H[13]) + (M32[14] ^ H[14]); |
||||||
|
Q[1] = (M32[6] ^ H[6]) - (M32[8] ^ H[8]) + (M32[11] ^ H[11]) + (M32[14] ^ H[14]) - (M32[15] ^ H[15]); |
||||||
|
Q[2] = (M32[0] ^ H[0]) + (M32[7] ^ H[7]) + (M32[9] ^ H[9]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]); |
||||||
|
Q[3] = (M32[0] ^ H[0]) - (M32[1] ^ H[1]) + (M32[8] ^ H[8]) - (M32[10] ^ H[10]) + (M32[13] ^ H[13]); |
||||||
|
Q[4] = (M32[1] ^ H[1]) + (M32[2] ^ H[2]) + (M32[9] ^ H[9]) - (M32[11] ^ H[11]) - (M32[14] ^ H[14]); |
||||||
|
Q[5] = (M32[3] ^ H[3]) - (M32[2] ^ H[2]) + (M32[10] ^ H[10]) - (M32[12] ^ H[12]) + (M32[15] ^ H[15]); |
||||||
|
Q[6] = (M32[4] ^ H[4]) - (M32[0] ^ H[0]) - (M32[3] ^ H[3]) - (M32[11] ^ H[11]) + (M32[13] ^ H[13]); |
||||||
|
Q[7] = (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[5] ^ H[5]) - (M32[12] ^ H[12]) - (M32[14] ^ H[14]); |
||||||
|
Q[8] = (M32[2] ^ H[2]) - (M32[5] ^ H[5]) - (M32[6] ^ H[6]) + (M32[13] ^ H[13]) - (M32[15] ^ H[15]); |
||||||
|
Q[9] = (M32[0] ^ H[0]) - (M32[3] ^ H[3]) + (M32[6] ^ H[6]) - (M32[7] ^ H[7]) + (M32[14] ^ H[14]); |
||||||
|
Q[10] = (M32[8] ^ H[8]) - (M32[1] ^ H[1]) - (M32[4] ^ H[4]) - (M32[7] ^ H[7]) + (M32[15] ^ H[15]); |
||||||
|
Q[11] = (M32[8] ^ H[8]) - (M32[0] ^ H[0]) - (M32[2] ^ H[2]) - (M32[5] ^ H[5]) + (M32[9] ^ H[9]); |
||||||
|
Q[12] = (M32[1] ^ H[1]) + (M32[3] ^ H[3]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[10] ^ H[10]); |
||||||
|
Q[13] = (M32[2] ^ H[2]) + (M32[4] ^ H[4]) + (M32[7] ^ H[7]) + (M32[10] ^ H[10]) + (M32[11] ^ H[11]); |
||||||
|
Q[14] = (M32[3] ^ H[3]) - (M32[5] ^ H[5]) + (M32[8] ^ H[8]) - (M32[11] ^ H[11]) - (M32[12] ^ H[12]); |
||||||
|
Q[15] = (M32[12] ^ H[12]) - (M32[4] ^ H[4]) - (M32[6] ^ H[6]) - (M32[9] ^ H[9]) + (M32[13] ^ H[13]); |
||||||
|
|
||||||
|
/* Diffuse the differences in every word in a bijective manner with ssi, and then add the values of the previous double pipe.*/ |
||||||
|
Q[0] = ss0(Q[0]) + H[1]; |
||||||
|
Q[1] = ss1(Q[1]) + H[2]; |
||||||
|
Q[2] = ss2(Q[2]) + H[3]; |
||||||
|
Q[3] = ss3(Q[3]) + H[4]; |
||||||
|
Q[4] = ss4(Q[4]) + H[5]; |
||||||
|
Q[5] = ss0(Q[5]) + H[6]; |
||||||
|
Q[6] = ss1(Q[6]) + H[7]; |
||||||
|
Q[7] = ss2(Q[7]) + H[8]; |
||||||
|
Q[8] = ss3(Q[8]) + H[9]; |
||||||
|
Q[9] = ss4(Q[9]) + H[10]; |
||||||
|
Q[10] = ss0(Q[10]) + H[11]; |
||||||
|
Q[11] = ss1(Q[11]) + H[12]; |
||||||
|
Q[12] = ss2(Q[12]) + H[13]; |
||||||
|
Q[13] = ss3(Q[13]) + H[14]; |
||||||
|
Q[14] = ss4(Q[14]) + H[15]; |
||||||
|
Q[15] = ss0(Q[15]) + H[0]; |
||||||
|
|
||||||
|
/* This is the Message expansion or f_1 in the documentation. */ |
||||||
|
/* It has 16 rounds. */ |
||||||
|
/* Blue Midnight Wish has two tunable security parameters. */ |
||||||
|
/* The parameters are named EXPAND_1_ROUNDS and EXPAND_2_ROUNDS. */ |
||||||
|
/* The following relation for these parameters should is satisfied: */ |
||||||
|
/* EXPAND_1_ROUNDS + EXPAND_2_ROUNDS = 16 */ |
||||||
|
#pragma unroll |
||||||
|
for (i = 0; i<2; i++) |
||||||
|
Q[i + 16] = expand32_1(i + 16, M32, H, Q); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (i = 2; i<16; i++) |
||||||
|
Q[i + 16] = expand32_2(i + 16, M32, H, Q); |
||||||
|
|
||||||
|
/* Blue Midnight Wish has two temporary cummulative variables that accumulate via XORing */ |
||||||
|
/* 16 new variables that are prooduced in the Message Expansion part. */ |
||||||
|
XL32 = Q[16] ^ Q[17] ^ Q[18] ^ Q[19] ^ Q[20] ^ Q[21] ^ Q[22] ^ Q[23]; |
||||||
|
XH32 = XL32^Q[24] ^ Q[25] ^ Q[26] ^ Q[27] ^ Q[28] ^ Q[29] ^ Q[30] ^ Q[31]; |
||||||
|
|
||||||
|
|
||||||
|
/* This part is the function f_2 - in the documentation */ |
||||||
|
|
||||||
|
/* Compute the double chaining pipe for the next message block. */ |
||||||
|
H[0] = (shl(XH32, 5) ^ shr(Q[16], 5) ^ M32[0]) + (XL32 ^ Q[24] ^ Q[0]); |
||||||
|
H[1] = (shr(XH32, 7) ^ shl(Q[17], 8) ^ M32[1]) + (XL32 ^ Q[25] ^ Q[1]); |
||||||
|
H[2] = (shr(XH32, 5) ^ shl(Q[18], 5) ^ M32[2]) + (XL32 ^ Q[26] ^ Q[2]); |
||||||
|
H[3] = (shr(XH32, 1) ^ shl(Q[19], 5) ^ M32[3]) + (XL32 ^ Q[27] ^ Q[3]); |
||||||
|
H[4] = (shr(XH32, 3) ^ Q[20] ^ M32[4]) + (XL32 ^ Q[28] ^ Q[4]); |
||||||
|
H[5] = (shl(XH32, 6) ^ shr(Q[21], 6) ^ M32[5]) + (XL32 ^ Q[29] ^ Q[5]); |
||||||
|
H[6] = (shr(XH32, 4) ^ shl(Q[22], 6) ^ M32[6]) + (XL32 ^ Q[30] ^ Q[6]); |
||||||
|
H[7] = (shr(XH32, 11) ^ shl(Q[23], 2) ^ M32[7]) + (XL32 ^ Q[31] ^ Q[7]); |
||||||
|
|
||||||
|
H[8] = SPH_ROTL32(H[4], 9) + (XH32 ^ Q[24] ^ M32[8]) + (shl(XL32, 8) ^ Q[23] ^ Q[8]); |
||||||
|
H[9] = SPH_ROTL32(H[5], 10) + (XH32 ^ Q[25] ^ M32[9]) + (shr(XL32, 6) ^ Q[16] ^ Q[9]); |
||||||
|
H[10] = SPH_ROTL32(H[6], 11) + (XH32 ^ Q[26] ^ M32[10]) + (shl(XL32, 6) ^ Q[17] ^ Q[10]); |
||||||
|
H[11] = SPH_ROTL32(H[7], 12) + (XH32 ^ Q[27] ^ M32[11]) + (shl(XL32, 4) ^ Q[18] ^ Q[11]); |
||||||
|
H[12] = SPH_ROTL32(H[0], 13) + (XH32 ^ Q[28] ^ M32[12]) + (shr(XL32, 3) ^ Q[19] ^ Q[12]); |
||||||
|
H[13] = SPH_ROTL32(H[1], 14) + (XH32 ^ Q[29] ^ M32[13]) + (shr(XL32, 4) ^ Q[20] ^ Q[13]); |
||||||
|
H[14] = SPH_ROTL32(H[2], 15) + (XH32 ^ Q[30] ^ M32[14]) + (shr(XL32, 7) ^ Q[21] ^ Q[14]); |
||||||
|
H[15] = SPH_ROTL32(H[3], 16) + (XH32 ^ Q[31] ^ M32[15]) + (shr(XL32, 2) ^ Q[22] ^ Q[15]); |
||||||
|
|
||||||
|
} |
@ -0,0 +1,232 @@ |
|||||||
|
/* |
||||||
|
* "credits" kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2015 djm34 |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
#if !defined(cl_khr_byte_addressable_store) |
||||||
|
#error "Device does not support unaligned stores" |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
#define ROL32(x, n) rotate(x, (uint) n) |
||||||
|
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) |
||||||
|
#define SWAP64(x) as_ulong(as_uchar8(x).s32107654) /// hmm... |
||||||
|
|
||||||
|
#define SHR(x, n) ((x) >> n) |
||||||
|
|
||||||
|
#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) |
||||||
|
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) |
||||||
|
|
||||||
|
#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) |
||||||
|
#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) |
||||||
|
|
||||||
|
#define P(a,b,c,d,e,f,g,h,x,K) \ |
||||||
|
{ \ |
||||||
|
temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ |
||||||
|
d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ |
||||||
|
} |
||||||
|
|
||||||
|
#define F0(y, x, z) bitselect(z, y, z ^ x) |
||||||
|
#define F1(x, y, z) bitselect(z, y, x) |
||||||
|
|
||||||
|
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) |
||||||
|
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) |
||||||
|
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) |
||||||
|
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) |
||||||
|
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) |
||||||
|
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) |
||||||
|
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) |
||||||
|
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) |
||||||
|
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) |
||||||
|
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) |
||||||
|
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) |
||||||
|
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) |
||||||
|
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) |
||||||
|
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) |
||||||
|
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) |
||||||
|
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) |
||||||
|
|
||||||
|
#define RD14 (S1(W12) + W7 + S0(W15) + W14) |
||||||
|
#define RD15 (S1(W13) + W8 + S0(W0) + W15) |
||||||
|
|
||||||
|
/// generic sha transform |
||||||
|
inline uint8 sha256_Transform(uint16 data, uint8 state) |
||||||
|
{ |
||||||
|
uint temp1; |
||||||
|
uint8 res = state; |
||||||
|
uint W0 = data.s0; |
||||||
|
uint W1 = data.s1; |
||||||
|
uint W2 = data.s2; |
||||||
|
uint W3 = data.s3; |
||||||
|
uint W4 = data.s4; |
||||||
|
uint W5 = data.s5; |
||||||
|
uint W6 = data.s6; |
||||||
|
uint W7 = data.s7; |
||||||
|
uint W8 = data.s8; |
||||||
|
uint W9 = data.s9; |
||||||
|
uint W10 = data.sA; |
||||||
|
uint W11 = data.sB; |
||||||
|
uint W12 = data.sC; |
||||||
|
uint W13 = data.sD; |
||||||
|
uint W14 = data.sE; |
||||||
|
uint W15 = data.sF; |
||||||
|
|
||||||
|
#define v0 res.s0 |
||||||
|
#define v1 res.s1 |
||||||
|
#define v2 res.s2 |
||||||
|
#define v3 res.s3 |
||||||
|
#define v4 res.s4 |
||||||
|
#define v5 res.s5 |
||||||
|
#define v6 res.s6 |
||||||
|
#define v7 res.s7 |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); |
||||||
|
#undef v0 |
||||||
|
#undef v1 |
||||||
|
#undef v2 |
||||||
|
#undef v3 |
||||||
|
#undef v4 |
||||||
|
#undef v5 |
||||||
|
#undef v6 |
||||||
|
#undef v7 |
||||||
|
return (res + state); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static __constant uint8 H256 = { |
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, |
||||||
|
0xA54FF53A, 0x510E527F, 0x9B05688C, |
||||||
|
0x1F83D9AB, 0x5BE0CD19 |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
static __constant uint8 pad_data = |
||||||
|
{ |
||||||
|
0x00000000, 0x00000000, 0x80000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000540 |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint8 pad_state = |
||||||
|
{ |
||||||
|
0x80000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000100 |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search(__global const uchar* restrict input, __global uint* restrict output,const ulong target, uint8 midstate ) |
||||||
|
{ |
||||||
|
|
||||||
|
|
||||||
|
uint nonce = get_global_id(0); |
||||||
|
uint16 in; |
||||||
|
uint8 state1; |
||||||
|
|
||||||
|
in.lo = ((__global const uint8 *)input)[4]; |
||||||
|
in.hi = pad_data; |
||||||
|
in.hi.s0 = ((__global const uint *)input)[40]; |
||||||
|
in.hi.s1 = ((__global const uint *)input)[41]; |
||||||
|
in.s3 = nonce; |
||||||
|
state1 = sha256_Transform(in, midstate); |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad_state; |
||||||
|
state1 = sha256_Transform(in,H256); |
||||||
|
|
||||||
|
if (SWAP64(state1.s67) <= target) |
||||||
|
output[atomic_inc(output + 0xFF)] = nonce; |
||||||
|
|
||||||
|
} |
||||||
|
|
@ -0,0 +1,132 @@ |
|||||||
|
// cubehash256 |
||||||
|
// djm34 2015 based on ccminer cubehash512 |
||||||
|
|
||||||
|
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ |
||||||
|
#define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */ |
||||||
|
|
||||||
|
|
||||||
|
#define LROT(x, bits) rotate( x,(uint) bits) |
||||||
|
|
||||||
|
|
||||||
|
#define ROTATEUPWARDS7(a) LROT(a,7) |
||||||
|
#define ROTATEUPWARDS11(a) LROT(a,11) |
||||||
|
|
||||||
|
#define SWAP(a,b) { uint u = a; a = b; b = u; } |
||||||
|
|
||||||
|
inline void rrounds(uint x[2][2][2][2][2]) |
||||||
|
{ |
||||||
|
int r; |
||||||
|
int j; |
||||||
|
int k; |
||||||
|
int l; |
||||||
|
int m; |
||||||
|
|
||||||
|
//#pragma unroll 2 |
||||||
|
for (r = 0; r < CUBEHASH_ROUNDS; ++r) { |
||||||
|
|
||||||
|
/* "add x_0jklm into x_1jklmn modulo 2^32" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[1][j][k][l][m] += x[0][j][k][l][m]; |
||||||
|
|
||||||
|
/* "rotate x_0jklm upwards by 7 bits" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[0][j][k][l][m] = ROTATEUPWARDS7(x[0][j][k][l][m]); |
||||||
|
|
||||||
|
/* "swap x_00klm with x_01klm" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
SWAP(x[0][0][k][l][m], x[0][1][k][l][m]) |
||||||
|
|
||||||
|
/* "xor x_1jklm into x_0jklm" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[0][j][k][l][m] ^= x[1][j][k][l][m]; |
||||||
|
|
||||||
|
/* "swap x_1jk0m with x_1jk1m" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
SWAP(x[1][j][k][0][m], x[1][j][k][1][m]) |
||||||
|
|
||||||
|
/* "add x_0jklm into x_1jklm modulo 2^32" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[1][j][k][l][m] += x[0][j][k][l][m]; |
||||||
|
|
||||||
|
/* "rotate x_0jklm upwards by 11 bits" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[0][j][k][l][m] = ROTATEUPWARDS11(x[0][j][k][l][m]); |
||||||
|
|
||||||
|
/* "swap x_0j0lm with x_0j1lm" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
SWAP(x[0][j][0][l][m], x[0][j][1][l][m]) |
||||||
|
|
||||||
|
/* "xor x_1jklm into x_0jklm" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (m = 0; m < 2; ++m) |
||||||
|
x[0][j][k][l][m] ^= x[1][j][k][l][m]; |
||||||
|
|
||||||
|
/* "swap x_1jkl0 with x_1jkl1" */ |
||||||
|
//#pragma unroll 2 |
||||||
|
for (j = 0; j < 2; ++j) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (k = 0; k < 2; ++k) |
||||||
|
//#pragma unroll 2 |
||||||
|
for (l = 0; l < 2; ++l) |
||||||
|
SWAP(x[1][j][k][l][0], x[1][j][k][l][1]) |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
@ -0,0 +1,525 @@ |
|||||||
|
/* |
||||||
|
* Lyra2RE kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* Copyright (c) 2014 djm34 |
||||||
|
* Copyright (c) 2014 James Lovejoy |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
// typedef unsigned int uint; |
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||||
|
|
||||||
|
#ifndef LYRA2RE_CL |
||||||
|
#define LYRA2RE_CL |
||||||
|
|
||||||
|
#if __ENDIAN_LITTLE__ |
||||||
|
#define SPH_LITTLE_ENDIAN 1 |
||||||
|
#else |
||||||
|
#define SPH_BIG_ENDIAN 1 |
||||||
|
#endif |
||||||
|
|
||||||
|
#define SPH_UPTR sph_u64 |
||||||
|
|
||||||
|
typedef unsigned int sph_u32; |
||||||
|
typedef int sph_s32; |
||||||
|
#ifndef __OPENCL_VERSION__ |
||||||
|
typedef unsigned long sph_u64; |
||||||
|
typedef long sph_s64; |
||||||
|
#else |
||||||
|
typedef unsigned long sph_u64; |
||||||
|
typedef long sph_s64; |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
#define SPH_64 1 |
||||||
|
#define SPH_64_TRUE 1 |
||||||
|
|
||||||
|
#define SPH_C32(x) ((sph_u32)(x ## U)) |
||||||
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
||||||
|
|
||||||
|
#define SPH_C64(x) ((sph_u64)(x ## UL)) |
||||||
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) |
||||||
|
|
||||||
|
//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) |
||||||
|
//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) |
||||||
|
//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) |
||||||
|
//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) |
||||||
|
|
||||||
|
#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6 |
||||||
|
#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n)) |
||||||
|
#define SPH_ROTL64(x,n) rotate(x,(ulong)n) |
||||||
|
#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n)) |
||||||
|
static inline sph_u64 ror64(sph_u64 vw, unsigned a) { |
||||||
|
uint2 result; |
||||||
|
uint2 v = as_uint2(vw); |
||||||
|
unsigned n = (unsigned)(64 - a); |
||||||
|
if (n == 32) { return as_ulong((uint2)(v.y, v.x)); } |
||||||
|
if (n < 32) { |
||||||
|
result.y = ((v.y << (n)) | (v.x >> (32 - n))); |
||||||
|
result.x = ((v.x << (n)) | (v.y >> (32 - n))); |
||||||
|
} |
||||||
|
else { |
||||||
|
result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); |
||||||
|
result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); |
||||||
|
} |
||||||
|
return as_ulong(result); |
||||||
|
} |
||||||
|
|
||||||
|
//#define SPH_ROTR64(l,n) ror64(l,n) |
||||||
|
#define memshift 3 |
||||||
|
#include "blake256.cl" |
||||||
|
#include "lyra2v2.cl" |
||||||
|
#include "keccak1600.cl" |
||||||
|
#include "skein256.cl" |
||||||
|
#include "cubehash.cl" |
||||||
|
#include "bmw256.cl" |
||||||
|
|
||||||
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx) |
||||||
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) |
||||||
|
//#define SWAP8(x) as_ulong(as_uchar8(x).s32107654) |
||||||
|
#if SPH_BIG_ENDIAN |
||||||
|
#define DEC64E(x) (x) |
||||||
|
#define DEC64BE(x) (*(const __global sph_u64 *) (x)); |
||||||
|
#define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x)); |
||||||
|
#define DEC32LE(x) (*(const __global sph_u32 *) (x)); |
||||||
|
#else |
||||||
|
#define DEC64E(x) SWAP8(x) |
||||||
|
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); |
||||||
|
#define DEC64LE(x) (*(const __global sph_u64 *) (x)); |
||||||
|
#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); |
||||||
|
#endif |
||||||
|
|
||||||
|
typedef union { |
||||||
|
unsigned char h1[32]; |
||||||
|
uint h4[8]; |
||||||
|
ulong h8[4]; |
||||||
|
} hash_t; |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search( |
||||||
|
__global uchar* hashes, |
||||||
|
// precalc hash from fisrt part of message |
||||||
|
const uint h0, |
||||||
|
const uint h1, |
||||||
|
const uint h2, |
||||||
|
const uint h3, |
||||||
|
const uint h4, |
||||||
|
const uint h5, |
||||||
|
const uint h6, |
||||||
|
const uint h7, |
||||||
|
// last 12 bytes of original message |
||||||
|
const uint in16, |
||||||
|
const uint in17, |
||||||
|
const uint in18 |
||||||
|
) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
||||||
|
|
||||||
|
unsigned int h[8]; |
||||||
|
unsigned int m[16]; |
||||||
|
unsigned int v[16]; |
||||||
|
|
||||||
|
|
||||||
|
h[0]=h0; |
||||||
|
h[1]=h1; |
||||||
|
h[2]=h2; |
||||||
|
h[3]=h3; |
||||||
|
h[4]=h4; |
||||||
|
h[5]=h5; |
||||||
|
h[6]=h6; |
||||||
|
h[7]=h7; |
||||||
|
// compress 2nd round |
||||||
|
m[0] = in16; |
||||||
|
m[1] = in17; |
||||||
|
m[2] = in18; |
||||||
|
m[3] = SWAP4(gid); |
||||||
|
|
||||||
|
for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];} |
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++) {v[i] = h[i];} |
||||||
|
|
||||||
|
v[8] = c_u256[0]; |
||||||
|
v[9] = c_u256[1]; |
||||||
|
v[10] = c_u256[2]; |
||||||
|
v[11] = c_u256[3]; |
||||||
|
v[12] = c_u256[4] ^ 640; |
||||||
|
v[13] = c_u256[5] ^ 640; |
||||||
|
v[14] = c_u256[6]; |
||||||
|
v[15] = c_u256[7]; |
||||||
|
|
||||||
|
for (int r = 0; r < 14; r++) { |
||||||
|
GS(0, 4, 0x8, 0xC, 0x0); |
||||||
|
GS(1, 5, 0x9, 0xD, 0x2); |
||||||
|
GS(2, 6, 0xA, 0xE, 0x4); |
||||||
|
GS(3, 7, 0xB, 0xF, 0x6); |
||||||
|
GS(0, 5, 0xA, 0xF, 0x8); |
||||||
|
GS(1, 6, 0xB, 0xC, 0xA); |
||||||
|
GS(2, 7, 0x8, 0xD, 0xC); |
||||||
|
GS(3, 4, 0x9, 0xE, 0xE); |
||||||
|
} |
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) { |
||||||
|
int j = i & 7; |
||||||
|
h[j] ^= v[i];} |
||||||
|
|
||||||
|
for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// keccak256 |
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search1(__global uchar* hashes) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
||||||
|
|
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
sph_u64 keccak_gpu_state[25]; |
||||||
|
|
||||||
|
for (int i = 0; i<25; i++) { |
||||||
|
if (i<4) { keccak_gpu_state[i] = hash->h8[i]; } |
||||||
|
else { keccak_gpu_state[i] = 0; } |
||||||
|
} |
||||||
|
keccak_gpu_state[4] = 0x0000000000000001; |
||||||
|
keccak_gpu_state[16] = 0x8000000000000000; |
||||||
|
|
||||||
|
keccak_block(keccak_gpu_state); |
||||||
|
for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; } |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// cubehash256 |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search2(__global uchar* hashes) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; |
||||||
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; |
||||||
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; |
||||||
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; |
||||||
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; |
||||||
|
sph_u32 xf = 0x40E5FBAB; |
||||||
|
|
||||||
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; |
||||||
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; |
||||||
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; |
||||||
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; |
||||||
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; |
||||||
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; |
||||||
|
|
||||||
|
x0 ^= (hash->h4[0]); |
||||||
|
x1 ^= (hash->h4[1]); |
||||||
|
x2 ^= (hash->h4[2]); |
||||||
|
x3 ^= (hash->h4[3]); |
||||||
|
x4 ^= (hash->h4[4]); |
||||||
|
x5 ^= (hash->h4[5]); |
||||||
|
x6 ^= (hash->h4[6]); |
||||||
|
x7 ^= (hash->h4[7]); |
||||||
|
|
||||||
|
|
||||||
|
SIXTEEN_ROUNDS; |
||||||
|
x0 ^= 0x80; |
||||||
|
SIXTEEN_ROUNDS; |
||||||
|
xv ^= 0x01; |
||||||
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; |
||||||
|
|
||||||
|
hash->h4[0] = x0; |
||||||
|
hash->h4[1] = x1; |
||||||
|
hash->h4[2] = x2; |
||||||
|
hash->h4[3] = x3; |
||||||
|
hash->h4[4] = x4; |
||||||
|
hash->h4[5] = x5; |
||||||
|
hash->h4[6] = x6; |
||||||
|
hash->h4[7] = x7; |
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/// lyra2 algo |
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search3(__global uchar* hashes,__global uchar* matrix ) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong4 *DMatrix = (__global ulong4 *)(matrix + (4 * memshift * 4 * 4 * 8 * (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
// uint offset = (4 * memshift * 4 * 4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))/32; |
||||||
|
ulong4 state[4]; |
||||||
|
|
||||||
|
state[0].x = hash->h8[0]; //password |
||||||
|
state[0].y = hash->h8[1]; //password |
||||||
|
state[0].z = hash->h8[2]; //password |
||||||
|
state[0].w = hash->h8[3]; //password |
||||||
|
state[1] = state[0]; |
||||||
|
state[2] = (ulong4)(0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL); |
||||||
|
state[3] = (ulong4)(0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL); |
||||||
|
for (int i = 0; i<12; i++) { round_lyra(state); } |
||||||
|
|
||||||
|
state[0] ^= (ulong4)(0x20,0x20,0x20,0x01); |
||||||
|
state[1] ^= (ulong4)(0x04,0x04,0x80,0x0100000000000000); |
||||||
|
|
||||||
|
for (int i = 0; i<12; i++) { round_lyra(state); } |
||||||
|
|
||||||
|
|
||||||
|
uint ps1 = (memshift * 3); |
||||||
|
//#pragma unroll 4 |
||||||
|
for (int i = 0; i < 4; i++) |
||||||
|
{ |
||||||
|
uint s1 = ps1 - memshift * i; |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
(DMatrix)[j+s1] = state[j]; |
||||||
|
|
||||||
|
round_lyra(state); |
||||||
|
} |
||||||
|
|
||||||
|
reduceDuplexf(state,DMatrix); |
||||||
|
|
||||||
|
reduceDuplexRowSetupf(1, 0, 2,state, DMatrix); |
||||||
|
reduceDuplexRowSetupf(2, 1, 3, state,DMatrix); |
||||||
|
|
||||||
|
|
||||||
|
uint rowa; |
||||||
|
uint prev = 3; |
||||||
|
for (uint i = 0; i<4; i++) { |
||||||
|
rowa = state[0].x & 3; |
||||||
|
reduceDuplexRowf(prev, rowa, i, state, DMatrix); |
||||||
|
prev = i; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint shift = (memshift * 4 * rowa); |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
state[j] ^= (DMatrix)[j+shift]; |
||||||
|
|
||||||
|
for (int i = 0; i < 12; i++) |
||||||
|
round_lyra(state); |
||||||
|
////////////////////////////////////// |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<4; i++) {hash->h8[i] = ((ulong*)state)[i];} |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
//skein256 |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search4(__global uchar* hashes) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
sph_u64 h[9]; |
||||||
|
sph_u64 t[3]; |
||||||
|
sph_u64 dt0,dt1,dt2,dt3; |
||||||
|
sph_u64 p0, p1, p2, p3, p4, p5, p6, p7; |
||||||
|
h[8] = skein_ks_parity; |
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) { |
||||||
|
h[i] = SKEIN_IV512_256[i]; |
||||||
|
h[8] ^= h[i];} |
||||||
|
|
||||||
|
t[0]=t12[0]; |
||||||
|
t[1]=t12[1]; |
||||||
|
t[2]=t12[2]; |
||||||
|
|
||||||
|
dt0=hash->h8[0]; |
||||||
|
dt1=hash->h8[1]; |
||||||
|
dt2=hash->h8[2]; |
||||||
|
dt3=hash->h8[3]; |
||||||
|
|
||||||
|
p0 = h[0] + dt0; |
||||||
|
p1 = h[1] + dt1; |
||||||
|
p2 = h[2] + dt2; |
||||||
|
p3 = h[3] + dt3; |
||||||
|
p4 = h[4]; |
||||||
|
p5 = h[5] + t[0]; |
||||||
|
p6 = h[6] + t[1]; |
||||||
|
p7 = h[7]; |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);} |
||||||
|
p0 ^= dt0; |
||||||
|
p1 ^= dt1; |
||||||
|
p2 ^= dt2; |
||||||
|
p3 ^= dt3; |
||||||
|
|
||||||
|
h[0] = p0; |
||||||
|
h[1] = p1; |
||||||
|
h[2] = p2; |
||||||
|
h[3] = p3; |
||||||
|
h[4] = p4; |
||||||
|
h[5] = p5; |
||||||
|
h[6] = p6; |
||||||
|
h[7] = p7; |
||||||
|
h[8] = skein_ks_parity; |
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) { h[8] ^= h[i]; } |
||||||
|
|
||||||
|
t[0] = t12[3]; |
||||||
|
t[1] = t12[4]; |
||||||
|
t[2] = t12[5]; |
||||||
|
p5 += t[0]; //p5 already equal h[5] |
||||||
|
p6 += t[1]; |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); } |
||||||
|
|
||||||
|
hash->h8[0] = p0; |
||||||
|
hash->h8[1] = p1; |
||||||
|
hash->h8[2] = p2; |
||||||
|
hash->h8[3] = p3; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
//cubehash |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search5(__global uchar* hashes) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; |
||||||
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; |
||||||
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; |
||||||
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; |
||||||
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; |
||||||
|
sph_u32 xf = 0x40E5FBAB; |
||||||
|
|
||||||
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; |
||||||
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; |
||||||
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; |
||||||
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; |
||||||
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; |
||||||
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; |
||||||
|
|
||||||
|
x0 ^= (hash->h4[0]); |
||||||
|
x1 ^= (hash->h4[1]); |
||||||
|
x2 ^= (hash->h4[2]); |
||||||
|
x3 ^= (hash->h4[3]); |
||||||
|
x4 ^= (hash->h4[4]); |
||||||
|
x5 ^= (hash->h4[5]); |
||||||
|
x6 ^= (hash->h4[6]); |
||||||
|
x7 ^= (hash->h4[7]); |
||||||
|
|
||||||
|
|
||||||
|
SIXTEEN_ROUNDS; |
||||||
|
x0 ^= 0x80; |
||||||
|
SIXTEEN_ROUNDS; |
||||||
|
xv ^= 0x01; |
||||||
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; |
||||||
|
|
||||||
|
hash->h4[0] = x0; |
||||||
|
hash->h4[1] = x1; |
||||||
|
hash->h4[2] = x2; |
||||||
|
hash->h4[3] = x3; |
||||||
|
hash->h4[4] = x4; |
||||||
|
hash->h4[5] = x5; |
||||||
|
hash->h4[6] = x6; |
||||||
|
hash->h4[7] = x7; |
||||||
|
|
||||||
|
|
||||||
|
barrier(CLK_GLOBAL_MEM_FENCE); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search6(__global uchar* hashes, __global uint* output, const ulong target) |
||||||
|
{ |
||||||
|
uint gid = get_global_id(0); |
||||||
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
uint dh[16] = { |
||||||
|
0x40414243, 0x44454647, |
||||||
|
0x48494A4B, 0x4C4D4E4F, |
||||||
|
0x50515253, 0x54555657, |
||||||
|
0x58595A5B, 0x5C5D5E5F, |
||||||
|
0x60616263, 0x64656667, |
||||||
|
0x68696A6B, 0x6C6D6E6F, |
||||||
|
0x70717273, 0x74757677, |
||||||
|
0x78797A7B, 0x7C7D7E7F |
||||||
|
}; |
||||||
|
uint final_s[16] = { |
||||||
|
0xaaaaaaa0, 0xaaaaaaa1, 0xaaaaaaa2, |
||||||
|
0xaaaaaaa3, 0xaaaaaaa4, 0xaaaaaaa5, |
||||||
|
0xaaaaaaa6, 0xaaaaaaa7, 0xaaaaaaa8, |
||||||
|
0xaaaaaaa9, 0xaaaaaaaa, 0xaaaaaaab, |
||||||
|
0xaaaaaaac, 0xaaaaaaad, 0xaaaaaaae, |
||||||
|
0xaaaaaaaf |
||||||
|
}; |
||||||
|
|
||||||
|
uint message[16]; |
||||||
|
for (int i = 0; i<8; i++) message[i] = hash->h4[i]; |
||||||
|
for (int i = 9; i<14; i++) message[i] = 0; |
||||||
|
message[8]= 0x80; |
||||||
|
message[14]=0x100; |
||||||
|
message[15]=0; |
||||||
|
|
||||||
|
Compression256(message, dh); |
||||||
|
Compression256(dh, final_s); |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
|
||||||
|
bool result = ( ((ulong*)final_s)[7] <= target); |
||||||
|
if (result) { |
||||||
|
output[atomic_inc(output + 0xFF)] = SWAP4(gid); |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
#endif // LYRA2RE_CL |
@ -0,0 +1,184 @@ |
|||||||
|
/* |
||||||
|
* Lyra2 kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* Copyright (c) 2014 djm34 |
||||||
|
* |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define ROTL64(x,n) rotate(x,(ulong)n) |
||||||
|
#define ROTR64(x,n) rotate(x,(ulong)(64-n)) |
||||||
|
#define SWAP32(x) as_ulong(as_uint2(x).s10) |
||||||
|
#define SWAP24(x) as_ulong(as_uchar8(x).s34567012) |
||||||
|
#define SWAP16(x) as_ulong(as_uchar8(x).s23456701) |
||||||
|
|
||||||
|
#define G(a,b,c,d) \ |
||||||
|
do { \ |
||||||
|
a += b; d ^= a; d = SWAP32(d); \ |
||||||
|
c += d; b ^= c; b = ROTR64(b,24); \ |
||||||
|
a += b; d ^= a; d = ROTR64(d,16); \ |
||||||
|
c += d; b ^= c; b = ROTR64(b, 63); \ |
||||||
|
\ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
#define G_old(a,b,c,d) \ |
||||||
|
do { \ |
||||||
|
a += b; d ^= a; d = ROTR64(d, 32); \ |
||||||
|
c += d; b ^= c; b = ROTR64(b, 24); \ |
||||||
|
a += b; d ^= a; d = ROTR64(d, 16); \ |
||||||
|
c += d; b ^= c; b = ROTR64(b, 63); \ |
||||||
|
\ |
||||||
|
} while (0) |
||||||
|
|
||||||
|
|
||||||
|
/*One Round of the Blake2b's compression function*/ |
||||||
|
|
||||||
|
#define round_lyra(s) \ |
||||||
|
do { \ |
||||||
|
G(s[0].x, s[1].x, s[2].x, s[3].x); \ |
||||||
|
G(s[0].y, s[1].y, s[2].y, s[3].y); \ |
||||||
|
G(s[0].z, s[1].z, s[2].z, s[3].z); \ |
||||||
|
G(s[0].w, s[1].w, s[2].w, s[3].w); \ |
||||||
|
G(s[0].x, s[1].y, s[2].z, s[3].w); \ |
||||||
|
G(s[0].y, s[1].z, s[2].w, s[3].x); \ |
||||||
|
G(s[0].z, s[1].w, s[2].x, s[3].y); \ |
||||||
|
G(s[0].w, s[1].x, s[2].y, s[3].z); \ |
||||||
|
} while(0) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexf(ulong4* state ,__global ulong4* DMatrix) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong4 state1[3]; |
||||||
|
uint ps1 = 0; |
||||||
|
uint ps2 = (memshift * 3 + memshift * 4); |
||||||
|
//#pragma unroll 4 |
||||||
|
for (int i = 0; i < 4; i++) |
||||||
|
{ |
||||||
|
uint s1 = ps1 + i*memshift; |
||||||
|
uint s2 = ps2 - i*memshift; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j]; |
||||||
|
round_lyra(state); |
||||||
|
for (int j = 0; j < 3; j++) state1[j] ^= state[j]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) (DMatrix)[j + s2] = state1[j]; |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexRowf(uint rowIn,uint rowInOut,uint rowOut,ulong4 * state, __global ulong4 * DMatrix) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong4 state1[3], state2[3]; |
||||||
|
uint ps1 = (memshift * 4 * rowIn); |
||||||
|
uint ps2 = (memshift * 4 * rowInOut); |
||||||
|
uint ps3 = (memshift * 4 * rowOut); |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) |
||||||
|
{ |
||||||
|
uint s1 = ps1 + i*memshift; |
||||||
|
uint s2 = ps2 + i*memshift; |
||||||
|
uint s3 = ps3 + i*memshift; |
||||||
|
|
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] += state2[j]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state[j] ^= state1[j]; |
||||||
|
|
||||||
|
|
||||||
|
round_lyra(state); |
||||||
|
|
||||||
|
((ulong*)state2)[0] ^= ((ulong*)state)[11]; |
||||||
|
for (int j = 0; j < 11; j++) |
||||||
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; |
||||||
|
|
||||||
|
if (rowInOut != rowOut) { |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
(DMatrix)[j + s2] = state2[j]; |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
(DMatrix)[j + s3] ^= state[j]; |
||||||
|
} |
||||||
|
else { |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
state2[j] ^= state[j]; |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
(DMatrix)[j + s2] = state2[j]; |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void reduceDuplexRowSetupf(uint rowIn, uint rowInOut, uint rowOut, ulong4 *state, __global ulong4* DMatrix) { |
||||||
|
|
||||||
|
ulong4 state2[3], state1[3]; |
||||||
|
uint ps1 = (memshift * 4 * rowIn); |
||||||
|
uint ps2 = (memshift * 4 * rowInOut); |
||||||
|
uint ps3 = (memshift * 3 + memshift * 4 * rowOut); |
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) |
||||||
|
{ |
||||||
|
uint s1 = ps1 + i*memshift; |
||||||
|
uint s2 = ps2 + i*memshift; |
||||||
|
uint s3 = ps3 - i*memshift; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state1[j] = (DMatrix)[j + s1]; |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) state2[j] = (DMatrix)[j + s2]; |
||||||
|
for (int j = 0; j < 3; j++) { |
||||||
|
ulong4 tmp = state1[j] + state2[j]; |
||||||
|
state[j] ^= tmp; |
||||||
|
} |
||||||
|
round_lyra(state); |
||||||
|
|
||||||
|
for (int j = 0; j < 3; j++) { |
||||||
|
state1[j] ^= state[j]; |
||||||
|
(DMatrix)[j + s3] = state1[j]; |
||||||
|
} |
||||||
|
|
||||||
|
((ulong*)state2)[0] ^= ((ulong*)state)[11]; |
||||||
|
for (int j = 0; j < 11; j++) |
||||||
|
((ulong*)state2)[j + 1] ^= ((ulong*)state)[j]; |
||||||
|
for (int j = 0; j < 3; j++) |
||||||
|
(DMatrix)[j + s2] = state2[j]; |
||||||
|
} |
||||||
|
} |
||||||
|
|
@ -0,0 +1,314 @@ |
|||||||
|
/* |
||||||
|
* "yescrypt" kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2015 djm34 |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
#if !defined(cl_khr_byte_addressable_store) |
||||||
|
#error "Device does not support unaligned stores" |
||||||
|
#endif |
||||||
|
|
||||||
|
#include "yescrypt_essential.cl" |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target) |
||||||
|
{ |
||||||
|
|
||||||
|
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint nonce = (get_global_id(0)); |
||||||
|
uint data[20]; |
||||||
|
uint16 in; |
||||||
|
uint8 state1, state2; |
||||||
|
// uint8 sha256tokeep; |
||||||
|
|
||||||
|
// ulong16 Bdev[8]; // will require an additional buffer |
||||||
|
((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; |
||||||
|
((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; |
||||||
|
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } |
||||||
|
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } |
||||||
|
uint8 passwd = sha256_80(data, nonce); |
||||||
|
//pbkdf |
||||||
|
in.lo = pad1.lo ^ passwd; |
||||||
|
in.hi = pad1.hi; |
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in.lo = pad2.lo ^ passwd; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in = ((uint16*)data)[0]; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
{ |
||||||
|
uint16 result; |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 1; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.lo = swapvec(sha256_Transform(in, state2)); |
||||||
|
if (i == 0) sha256tokeep[0] = result.lo; |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 2; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.hi = swapvec(sha256_Transform(in, state2)); |
||||||
|
Bdev[i].lo = as_ulong8(shuffle(result)); |
||||||
|
// Bdev[i].lo = as_ulong8(result); |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 3; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.lo = swapvec(sha256_Transform(in, state2)); |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 4; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.hi = swapvec(sha256_Transform(in, state2)); |
||||||
|
|
||||||
|
|
||||||
|
Bdev[i].hi = as_ulong8(shuffle(result)); |
||||||
|
// Bdev[i].hi = as_ulong8(result); |
||||||
|
} |
||||||
|
|
||||||
|
//mixing1 |
||||||
|
|
||||||
|
prevstate[0] = Bdev[0]; |
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
prevstate[1] = Bdev[0]; |
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
|
||||||
|
uint n = 1; |
||||||
|
#pragma unroll 1 |
||||||
|
for (uint i = 2; i < 64; i++) |
||||||
|
{ |
||||||
|
|
||||||
|
prevstate[i] = Bdev[0]; |
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1; |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); |
||||||
|
|
||||||
|
j += i - n; |
||||||
|
Bdev[0] ^= prevstate[j]; |
||||||
|
|
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search1(__global uchar *buffer1, __global uchar *buffer2) |
||||||
|
{ |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) |
||||||
|
{ |
||||||
|
|
||||||
|
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
hashbuffer[i] = Bdev[i]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
hashbuffer[i + 8] = Bdev[i]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
int n = 1; |
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 2; i < 2048; i ++) |
||||||
|
{ |
||||||
|
|
||||||
|
for (int k = 0; k<8; k++) |
||||||
|
(hashbuffer + 8 * i)[k] = Bdev[k]; |
||||||
|
|
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1; |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); |
||||||
|
j += i - n; |
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++) |
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||||
|
|
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search3(__global uchar *buffer1, __global uchar *buffer2) |
||||||
|
{ |
||||||
|
} |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) |
||||||
|
{ |
||||||
|
|
||||||
|
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
#pragma unroll 1 |
||||||
|
for (int z = 0; z < 684; z++) |
||||||
|
{ |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & 2047; |
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++) |
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||||
|
|
||||||
|
if (z<682) |
||||||
|
for (int k = 0; k<8; k++) |
||||||
|
(hashbuffer + 8 * j)[k] = Bdev[k]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
//// |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search5(__global uchar *buffer1, __global uchar *buffer2) |
||||||
|
{ |
||||||
|
} |
||||||
|
*/ |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target) |
||||||
|
{ |
||||||
|
|
||||||
|
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
uint nonce = (get_global_id(0)); |
||||||
|
|
||||||
|
|
||||||
|
uint data[20]; |
||||||
|
((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; |
||||||
|
((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; |
||||||
|
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } |
||||||
|
uint8 swpass = swapvec(sha256tokeep[0]); |
||||||
|
uint16 in; |
||||||
|
uint8 state1,state2; |
||||||
|
in.lo = pad1.lo ^ swpass; |
||||||
|
in.hi = pad1.hi; |
||||||
|
|
||||||
|
|
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in.lo = pad2.lo ^ swpass; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 0; i<8; i++) { |
||||||
|
in = unshuffle(Bdev[i].lo); |
||||||
|
in = swapvec16(in); |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in = unshuffle(Bdev[i].hi); |
||||||
|
in = swapvec16(in); |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
} |
||||||
|
in = pad5; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
uint8 res = sha256_Transform(in, state2); |
||||||
|
|
||||||
|
//hmac and final sha |
||||||
|
|
||||||
|
in.lo = pad1.lo ^ res; |
||||||
|
in.hi = pad1.hi; |
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
in.lo = pad2.lo ^ res; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
in = ((uint16*)data)[0]; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in = padsha80; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = get_global_id(0); |
||||||
|
in.sf = 0x480; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
state1 = sha256_Transform(in, state2); |
||||||
|
// state2 = H256; |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
in.sf = 0x100; |
||||||
|
res = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
|
||||||
|
if (SWAP32(res.s7) <= (target)) |
||||||
|
output[atomic_inc(output + 0xFF)] = (nonce); |
||||||
|
|
||||||
|
} |
@ -0,0 +1,253 @@ |
|||||||
|
/* |
||||||
|
* "yescrypt" kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2015 djm34 |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
#if !defined(cl_khr_byte_addressable_store) |
||||||
|
#error "Device does not support unaligned stores" |
||||||
|
#endif |
||||||
|
|
||||||
|
#include "yescrypt_essential.cl" |
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||||
|
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, const uint target) |
||||||
|
{ |
||||||
|
|
||||||
|
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint nonce = (get_global_id(0)); |
||||||
|
uint data[20]; |
||||||
|
uint16 in; |
||||||
|
uint8 state1, state2; |
||||||
|
uint8 sha256tokeep; |
||||||
|
|
||||||
|
((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; |
||||||
|
((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; |
||||||
|
for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } |
||||||
|
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } |
||||||
|
uint8 passwd = sha256_80(data, nonce); |
||||||
|
//pbkdf |
||||||
|
in.lo = pad1.lo ^ passwd; |
||||||
|
in.hi = pad1.hi; |
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in.lo = pad2.lo ^ passwd; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in = ((uint16*)data)[0]; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
{ |
||||||
|
uint16 result; |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 1; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.lo = swapvec(sha256_Transform(in, state2)); |
||||||
|
if (i == 0) sha256tokeep = result.lo; |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 2; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.hi = swapvec(sha256_Transform(in, state2)); |
||||||
|
Bdev[i].lo = as_ulong8(shuffle(result)); |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 3; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.lo = swapvec(sha256_Transform(in, state2)); |
||||||
|
in = pad3; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
in.s4 = 4 * i + 4; |
||||||
|
in.lo = sha256_Transform(in, state1); |
||||||
|
in.hi = pad4; |
||||||
|
result.hi = swapvec(sha256_Transform(in, state2)); |
||||||
|
|
||||||
|
|
||||||
|
Bdev[i].hi = as_ulong8(shuffle(result)); |
||||||
|
} |
||||||
|
|
||||||
|
//mixing1 |
||||||
|
|
||||||
|
prevstate[0] = Bdev[0]; |
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
prevstate[1] = Bdev[0]; |
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
|
||||||
|
uint n = 1; |
||||||
|
#pragma unroll 1 |
||||||
|
for (uint i = 2; i < 64; i++) |
||||||
|
{ |
||||||
|
|
||||||
|
prevstate[i] = Bdev[0]; |
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1; |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); |
||||||
|
|
||||||
|
j += i - n; |
||||||
|
Bdev[0] ^= prevstate[j]; |
||||||
|
|
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
hashbuffer[i] = Bdev[i]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) |
||||||
|
hashbuffer[i + 8] = Bdev[i]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
n = 1; |
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 2; i < 2048; i++) |
||||||
|
{ |
||||||
|
|
||||||
|
for (int k = 0; k<8; k++) |
||||||
|
(hashbuffer + 8 * i)[k] = Bdev[k]; |
||||||
|
|
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1; |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); |
||||||
|
j += i - n; |
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++) |
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||||
|
|
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
#pragma unroll 1 |
||||||
|
for (int z = 0; z < 684; z++) |
||||||
|
{ |
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & 2047; |
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++) |
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||||
|
|
||||||
|
if (z<682) |
||||||
|
for (int k = 0; k<8; k++) |
||||||
|
(hashbuffer + 8 * j)[k] = Bdev[k]; |
||||||
|
|
||||||
|
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||||
|
//// |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint8 swpass = swapvec(sha256tokeep); |
||||||
|
// uint16 in; |
||||||
|
// uint8 state1, state2; |
||||||
|
in.lo = pad1.lo ^ swpass; |
||||||
|
in.hi = pad1.hi; |
||||||
|
|
||||||
|
|
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
in.lo = pad2.lo ^ swpass; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 0; i<8; i++) { |
||||||
|
in = unshuffle(Bdev[i].lo); |
||||||
|
in = swapvec16(in); |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in = unshuffle(Bdev[i].hi); |
||||||
|
in = swapvec16(in); |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
} |
||||||
|
in = pad5; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
uint8 res = sha256_Transform(in, state2); |
||||||
|
|
||||||
|
//hmac and final sha |
||||||
|
|
||||||
|
in.lo = pad1.lo ^ res; |
||||||
|
in.hi = pad1.hi; |
||||||
|
state1 = sha256_Transform(in, H256); |
||||||
|
in.lo = pad2.lo ^ res; |
||||||
|
in.hi = pad2.hi; |
||||||
|
state2 = sha256_Transform(in, H256); |
||||||
|
in = ((uint16*)data)[0]; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in = padsha80; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = get_global_id(0); |
||||||
|
in.sf = 0x480; |
||||||
|
state1 = sha256_Transform(in, state1); |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
state1 = sha256_Transform(in, state2); |
||||||
|
// state2 = H256; |
||||||
|
in.lo = state1; |
||||||
|
in.hi = pad4; |
||||||
|
in.sf = 0x100; |
||||||
|
res = sha256_Transform(in, H256); |
||||||
|
|
||||||
|
|
||||||
|
if (SWAP32(res.s7) <= (target)) |
||||||
|
output[atomic_inc(output + 0xFF)] = (nonce); |
||||||
|
|
||||||
|
} |
||||||
|
|
@ -0,0 +1,760 @@ |
|||||||
|
/* |
||||||
|
* "yescrypt" kernel implementation. |
||||||
|
* |
||||||
|
* ==========================(LICENSE BEGIN)============================ |
||||||
|
* |
||||||
|
* Copyright (c) 2015 djm34 |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining |
||||||
|
* a copy of this software and associated documentation files (the |
||||||
|
* "Software"), to deal in the Software without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to |
||||||
|
* permit persons to whom the Software is furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be |
||||||
|
* included in all copies or substantial portions of the Software. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||||
|
* |
||||||
|
* ===========================(LICENSE END)============================= |
||||||
|
* |
||||||
|
* @author djm34 |
||||||
|
*/ |
||||||
|
|
||||||
|
#define ROL32(x, n) rotate(x, (uint) n) |
||||||
|
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) |
||||||
|
//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) |
||||||
|
#define HASH_MEMORY 4096 |
||||||
|
|
||||||
|
|
||||||
|
#define SALSA(a,b,c,d) do { \ |
||||||
|
t =a+d; b^=ROL32(t, 7U); \ |
||||||
|
t =b+a; c^=ROL32(t, 9U); \ |
||||||
|
t =c+b; d^=ROL32(t, 13U); \ |
||||||
|
t =d+c; a^=ROL32(t, 18U); \ |
||||||
|
} while(0) |
||||||
|
|
||||||
|
|
||||||
|
#define SALSA_CORE(state) do { \ |
||||||
|
\ |
||||||
|
SALSA(state.s0,state.s4,state.s8,state.sc); \ |
||||||
|
SALSA(state.s5,state.s9,state.sd,state.s1); \ |
||||||
|
SALSA(state.sa,state.se,state.s2,state.s6); \ |
||||||
|
SALSA(state.sf,state.s3,state.s7,state.sb); \ |
||||||
|
SALSA(state.s0,state.s1,state.s2,state.s3); \ |
||||||
|
SALSA(state.s5,state.s6,state.s7,state.s4); \ |
||||||
|
SALSA(state.sa,state.sb,state.s8,state.s9); \ |
||||||
|
SALSA(state.sf,state.sc,state.sd,state.se); \ |
||||||
|
} while(0) |
||||||
|
|
||||||
|
#define uSALSA_CORE(state) do { \ |
||||||
|
\ |
||||||
|
SALSA(state.s0,state.s4,state.s8,state.sc); \ |
||||||
|
SALSA(state.s1,state.s5,state.s9,state.sd); \ |
||||||
|
SALSA(state.s2,state.s6,state.sa,state.se); \ |
||||||
|
SALSA(state.s3,state.s7,state.sb,state.sf); \ |
||||||
|
SALSA(state.s0,state.sd,state.sa,state.s7); \ |
||||||
|
SALSA(state.s1,state.se,state.sb,state.s4); \ |
||||||
|
SALSA(state.s2,state.sf,state.s8,state.s5); \ |
||||||
|
SALSA(state.s3,state.sc,state.s9,state.s6); \ |
||||||
|
} while(0) |
||||||
|
|
||||||
|
|
||||||
|
#define unshuffle(state) (as_uint16(state).s0da741eb852fc963) |
||||||
|
|
||||||
|
#define shuffle(state) (as_uint16(state).s05af49e38d27c16b) |
||||||
|
|
||||||
|
static __constant uint16 pad1 = |
||||||
|
{ |
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636, |
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636, |
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636, |
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636 |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint16 pad2 = |
||||||
|
{ |
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, |
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, |
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, |
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint16 pad5 = |
||||||
|
{ |
||||||
|
0x00000001, 0x80000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00002220 |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint16 pad3 = |
||||||
|
{ |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x80000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x000004a0 |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint16 padsha80 = |
||||||
|
{ |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x80000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000280 |
||||||
|
}; |
||||||
|
|
||||||
|
static __constant uint8 pad4 = |
||||||
|
{ |
||||||
|
0x80000000, 0x00000000, 0x00000000, 0x00000000, |
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000300 |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static __constant uint8 H256 = { |
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372, |
||||||
|
0xA54FF53A, 0x510E527F, 0x9B05688C, |
||||||
|
0x1F83D9AB, 0x5BE0CD19 |
||||||
|
}; |
||||||
|
|
||||||
|
inline uint8 swapvec(uint8 buf) |
||||||
|
{ |
||||||
|
uint8 vec; |
||||||
|
vec.s0 = SWAP32(buf.s0); |
||||||
|
vec.s1 = SWAP32(buf.s1); |
||||||
|
vec.s2 = SWAP32(buf.s2); |
||||||
|
vec.s3 = SWAP32(buf.s3); |
||||||
|
vec.s4 = SWAP32(buf.s4); |
||||||
|
vec.s5 = SWAP32(buf.s5); |
||||||
|
vec.s6 = SWAP32(buf.s6); |
||||||
|
vec.s7 = SWAP32(buf.s7); |
||||||
|
return vec; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline uint16 swapvec16(uint16 buf) |
||||||
|
{ |
||||||
|
uint16 vec; |
||||||
|
vec.s0 = SWAP32(buf.s0); |
||||||
|
vec.s1 = SWAP32(buf.s1); |
||||||
|
vec.s2 = SWAP32(buf.s2); |
||||||
|
vec.s3 = SWAP32(buf.s3); |
||||||
|
vec.s4 = SWAP32(buf.s4); |
||||||
|
vec.s5 = SWAP32(buf.s5); |
||||||
|
vec.s6 = SWAP32(buf.s6); |
||||||
|
vec.s7 = SWAP32(buf.s7); |
||||||
|
vec.s8 = SWAP32(buf.s8); |
||||||
|
vec.s9 = SWAP32(buf.s9); |
||||||
|
vec.sa = SWAP32(buf.sa); |
||||||
|
vec.sb = SWAP32(buf.sb); |
||||||
|
vec.sc = SWAP32(buf.sc); |
||||||
|
vec.sd = SWAP32(buf.sd); |
||||||
|
vec.se = SWAP32(buf.se); |
||||||
|
vec.sf = SWAP32(buf.sf); |
||||||
|
return vec; |
||||||
|
} |
||||||
|
|
||||||
|
ulong8 salsa20_8(uint16 Bx) |
||||||
|
{ |
||||||
|
uint t; |
||||||
|
uint16 st = Bx; |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
return(as_ulong8(st + Bx)); |
||||||
|
} |
||||||
|
|
||||||
|
ulong8 salsa20_8n(uint16 Bx) |
||||||
|
{ |
||||||
|
uint t; |
||||||
|
uint16 st = Bx; |
||||||
|
SALSA_CORE(st); |
||||||
|
SALSA_CORE(st); |
||||||
|
SALSA_CORE(st); |
||||||
|
SALSA_CORE(st); |
||||||
|
return(as_ulong8(st + Bx)); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
ulong16 blockmix_salsa8_small2(ulong16 Bin) |
||||||
|
{ |
||||||
|
ulong8 X = Bin.hi; |
||||||
|
X ^= Bin.lo; |
||||||
|
X = salsa20_8(as_uint16(X)); |
||||||
|
Bin.lo = X; |
||||||
|
X ^= Bin.hi; |
||||||
|
X = salsa20_8(as_uint16(X)); |
||||||
|
Bin.hi = X; |
||||||
|
return(Bin); |
||||||
|
} |
||||||
|
/* |
||||||
|
uint16 salsa20_8_2(uint16 Bx) |
||||||
|
{ |
||||||
|
uint t; |
||||||
|
uint16 st = Bx; |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
uSALSA_CORE(st); |
||||||
|
return(st + Bx); |
||||||
|
} |
||||||
|
|
||||||
|
ulong16 blockmix_salsa8_small2(ulong16 Bin) |
||||||
|
{ |
||||||
|
uint16 X = as_uint16(Bin.hi); |
||||||
|
X ^= as_uint16(Bin.lo); |
||||||
|
X = salsa20_8_2(as_uint16(X)); |
||||||
|
Bin.lo = as_ulong8(X); |
||||||
|
X ^= as_uint16(Bin.hi); |
||||||
|
X = salsa20_8_2(as_uint16(X)); |
||||||
|
Bin.hi = as_ulong8(X); |
||||||
|
return(Bin); |
||||||
|
} |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
inline ulong2 madd4long2(uint4 a, uint4 b) |
||||||
|
{ |
||||||
|
uint4 result; |
||||||
|
result.x = a.x*a.y + b.x; |
||||||
|
result.y = b.y + mad_hi(a.x, a.y, b.x); |
||||||
|
result.z = a.z*a.w + b.z; |
||||||
|
result.w = b.w + mad_hi(a.z, a.w, b.z); |
||||||
|
return as_ulong2(result); |
||||||
|
} |
||||||
|
|
||||||
|
inline ulong2 madd4long3(uint4 a, ulong2 b) |
||||||
|
{ |
||||||
|
ulong2 result; |
||||||
|
result.x = (ulong)a.x*(ulong)a.y + b.x; |
||||||
|
result.y = (ulong)a.z*(ulong)a.w + b.y; |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong2 vec = Bout.lo.lo; |
||||||
|
|
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = ((__global ulong2*)(prevstate ))[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.lo.lo = vec; |
||||||
|
vec = Bout.lo.hi; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.lo.hi = vec; |
||||||
|
|
||||||
|
vec = Bout.hi.lo; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y]; |
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.hi.lo = vec; |
||||||
|
vec = Bout.hi.hi; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.hi.hi = vec; |
||||||
|
|
||||||
|
return(Bout); |
||||||
|
} |
||||||
|
|
||||||
|
inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong2 vec = Bout.lo.lo; |
||||||
|
|
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = prevstate[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = (prevstate + 32*8)[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.lo.lo = vec; |
||||||
|
vec = Bout.lo.hi; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
|
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = prevstate[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = (prevstate + 32 * 8)[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.lo.hi = vec; |
||||||
|
|
||||||
|
vec = Bout.hi.lo; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = prevstate[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = (prevstate + 32 * 8)[x.y]; |
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.hi.lo = vec; |
||||||
|
vec = Bout.hi.hi; |
||||||
|
for (int i = 0; i < 6; i++) |
||||||
|
{ |
||||||
|
ulong2 p0, p1; |
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); |
||||||
|
p0 = prevstate[x.x]; |
||||||
|
vec = madd4long3(as_uint4(vec), p0); |
||||||
|
p1 = (prevstate + 32 * 8)[x.y]; |
||||||
|
|
||||||
|
vec ^= p1; |
||||||
|
} |
||||||
|
Bout.hi.hi = vec; |
||||||
|
|
||||||
|
return(Bout); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline void blockmix_pwxform(__global ulong8 *Bin, __global ulong16 *prevstate) |
||||||
|
{ |
||||||
|
Bin[0] ^= Bin[15]; |
||||||
|
Bin[0] = block_pwxform_long_old(Bin[0], prevstate); |
||||||
|
#pragma unroll 1 |
||||||
|
for (int i = 1; i < 16; i++) |
||||||
|
{ |
||||||
|
Bin[i] ^= Bin[i - 1]; |
||||||
|
Bin[i] = block_pwxform_long_old(Bin[i], prevstate); |
||||||
|
} |
||||||
|
Bin[15] = salsa20_8(as_uint16(Bin[15])); |
||||||
|
} |
||||||
|
|
||||||
|
#define SHR(x, n) ((x) >> n) |
||||||
|
|
||||||
|
|
||||||
|
#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) |
||||||
|
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) |
||||||
|
|
||||||
|
#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) |
||||||
|
#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) |
||||||
|
|
||||||
|
#define P(a,b,c,d,e,f,g,h,x,K) \ |
||||||
|
{ \ |
||||||
|
temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ |
||||||
|
d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ |
||||||
|
} |
||||||
|
|
||||||
|
#define PLAST(a,b,c,d,e,f,g,h,x,K) \ |
||||||
|
{ \ |
||||||
|
d += h + S3(e) + F1(e,f,g) + (x + K); \ |
||||||
|
} |
||||||
|
|
||||||
|
#define F0(y, x, z) bitselect(z, y, z ^ x) |
||||||
|
#define F1(x, y, z) bitselect(z, y, x) |
||||||
|
|
||||||
|
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) |
||||||
|
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) |
||||||
|
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) |
||||||
|
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) |
||||||
|
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) |
||||||
|
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) |
||||||
|
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) |
||||||
|
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) |
||||||
|
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) |
||||||
|
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) |
||||||
|
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) |
||||||
|
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) |
||||||
|
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) |
||||||
|
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) |
||||||
|
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) |
||||||
|
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) |
||||||
|
|
||||||
|
#define RD14 (S1(W12) + W7 + S0(W15) + W14) |
||||||
|
#define RD15 (S1(W13) + W8 + S0(W0) + W15) |
||||||
|
|
||||||
|
/// generic sha transform |
||||||
|
inline uint8 sha256_Transform(uint16 data, uint8 state) |
||||||
|
{ |
||||||
|
uint temp1; |
||||||
|
uint8 res = state; |
||||||
|
uint W0 = data.s0; |
||||||
|
uint W1 = data.s1; |
||||||
|
uint W2 = data.s2; |
||||||
|
uint W3 = data.s3; |
||||||
|
uint W4 = data.s4; |
||||||
|
uint W5 = data.s5; |
||||||
|
uint W6 = data.s6; |
||||||
|
uint W7 = data.s7; |
||||||
|
uint W8 = data.s8; |
||||||
|
uint W9 = data.s9; |
||||||
|
uint W10 = data.sA; |
||||||
|
uint W11 = data.sB; |
||||||
|
uint W12 = data.sC; |
||||||
|
uint W13 = data.sD; |
||||||
|
uint W14 = data.sE; |
||||||
|
uint W15 = data.sF; |
||||||
|
|
||||||
|
#define v0 res.s0 |
||||||
|
#define v1 res.s1 |
||||||
|
#define v2 res.s2 |
||||||
|
#define v3 res.s3 |
||||||
|
#define v4 res.s4 |
||||||
|
#define v5 res.s5 |
||||||
|
#define v6 res.s6 |
||||||
|
#define v7 res.s7 |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); |
||||||
|
#undef v0 |
||||||
|
#undef v1 |
||||||
|
#undef v2 |
||||||
|
#undef v3 |
||||||
|
#undef v4 |
||||||
|
#undef v5 |
||||||
|
#undef v6 |
||||||
|
#undef v7 |
||||||
|
return (res+state); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
static inline uint8 sha256_round1(uint16 data) |
||||||
|
{ |
||||||
|
uint temp1; |
||||||
|
uint8 res; |
||||||
|
uint W0 = data.s0; |
||||||
|
uint W1 = data.s1; |
||||||
|
uint W2 = data.s2; |
||||||
|
uint W3 = data.s3; |
||||||
|
uint W4 = data.s4; |
||||||
|
uint W5 = data.s5; |
||||||
|
uint W6 = data.s6; |
||||||
|
uint W7 = data.s7; |
||||||
|
uint W8 = data.s8; |
||||||
|
uint W9 = data.s9; |
||||||
|
uint W10 = data.sA; |
||||||
|
uint W11 = data.sB; |
||||||
|
uint W12 = data.sC; |
||||||
|
uint W13 = data.sD; |
||||||
|
uint W14 = data.sE; |
||||||
|
uint W15 = data.sF; |
||||||
|
|
||||||
|
uint v0 = 0x6A09E667; |
||||||
|
uint v1 = 0xBB67AE85; |
||||||
|
uint v2 = 0x3C6EF372; |
||||||
|
uint v3 = 0xA54FF53A; |
||||||
|
uint v4 = 0x510E527F; |
||||||
|
uint v5 = 0x9B05688C; |
||||||
|
uint v6 = 0x1F83D9AB; |
||||||
|
uint v7 = 0x5BE0CD19; |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); |
||||||
|
|
||||||
|
res.s0 = v0 + 0x6A09E667; |
||||||
|
res.s1 = v1 + 0xBB67AE85; |
||||||
|
res.s2 = v2 + 0x3C6EF372; |
||||||
|
res.s3 = v3 + 0xA54FF53A; |
||||||
|
res.s4 = v4 + 0x510E527F; |
||||||
|
res.s5 = v5 + 0x9B05688C; |
||||||
|
res.s6 = v6 + 0x1F83D9AB; |
||||||
|
res.s7 = v7 + 0x5BE0CD19; |
||||||
|
return (res); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
static inline uint8 sha256_round2(uint16 data,uint8 buf) |
||||||
|
{ |
||||||
|
uint temp1; |
||||||
|
uint8 res; |
||||||
|
uint W0 = data.s0; |
||||||
|
uint W1 = data.s1; |
||||||
|
uint W2 = data.s2; |
||||||
|
uint W3 = data.s3; |
||||||
|
uint W4 = data.s4; |
||||||
|
uint W5 = data.s5; |
||||||
|
uint W6 = data.s6; |
||||||
|
uint W7 = data.s7; |
||||||
|
uint W8 = data.s8; |
||||||
|
uint W9 = data.s9; |
||||||
|
uint W10 = data.sA; |
||||||
|
uint W11 = data.sB; |
||||||
|
uint W12 = data.sC; |
||||||
|
uint W13 = data.sD; |
||||||
|
uint W14 = data.sE; |
||||||
|
uint W15 = data.sF; |
||||||
|
|
||||||
|
uint v0 = buf.s0; |
||||||
|
uint v1 = buf.s1; |
||||||
|
uint v2 = buf.s2; |
||||||
|
uint v3 = buf.s3; |
||||||
|
uint v4 = buf.s4; |
||||||
|
uint v5 = buf.s5; |
||||||
|
uint v6 = buf.s6; |
||||||
|
uint v7 = buf.s7; |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); |
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); |
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); |
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); |
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); |
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); |
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); |
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); |
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); |
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); |
||||||
|
|
||||||
|
res.s0 = (v0 + buf.s0); |
||||||
|
res.s1 = (v1 + buf.s1); |
||||||
|
res.s2 = (v2 + buf.s2); |
||||||
|
res.s3 = (v3 + buf.s3); |
||||||
|
res.s4 = (v4 + buf.s4); |
||||||
|
res.s5 = (v5 + buf.s5); |
||||||
|
res.s6 = (v6 + buf.s6); |
||||||
|
res.s7 = (v7 + buf.s7); |
||||||
|
return (res); |
||||||
|
} |
||||||
|
|
||||||
|
static inline uint8 sha256_80(uint* data,uint nonce) |
||||||
|
{ |
||||||
|
|
||||||
|
uint8 buf = sha256_round1( ((uint16*)data)[0]); |
||||||
|
uint16 in = padsha80; |
||||||
|
in.s0 = data[16]; |
||||||
|
in.s1 = data[17]; |
||||||
|
in.s2 = data[18]; |
||||||
|
in.s3 = nonce; |
||||||
|
|
||||||
|
return(sha256_round2(in,buf)); |
||||||
|
} |
||||||
|
|
@ -1,3 +1,3 @@ |
|||||||
noinst_LIBRARIES = libsph.a |
noinst_LIBRARIES = libsph.a |
||||||
|
|
||||||
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c |
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c |
||||||
|
@ -0,0 +1,418 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <sys/types.h> |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "algorithm/sysendian.h" |
||||||
|
|
||||||
|
#include "sph/sha256_Y.h" |
||||||
|
|
||||||
|
/*
|
||||||
|
* Encode a length len/4 vector of (uint32_t) into a length len vector of |
||||||
|
* (unsigned char) in big-endian form. Assumes len is a multiple of 4. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len) |
||||||
|
{ |
||||||
|
size_t i; |
||||||
|
|
||||||
|
for (i = 0; i < len / 4; i++) |
||||||
|
be32enc(dst + i * 4, src[i]); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* Decode a big-endian length len vector of (unsigned char) into a length |
||||||
|
* len/4 vector of (uint32_t). Assumes len is a multiple of 4. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len) |
||||||
|
{ |
||||||
|
size_t i; |
||||||
|
|
||||||
|
for (i = 0; i < len / 4; i++) |
||||||
|
dst[i] = be32dec(src + i * 4); |
||||||
|
} |
||||||
|
|
||||||
|
/* Elementary functions used by SHA256 */ |
||||||
|
#define Ch(x, y, z) ((x & (y ^ z)) ^ z) |
||||||
|
#define Maj(x, y, z) ((x & (y | z)) | (y & z)) |
||||||
|
#define SHR(x, n) (x >> n) |
||||||
|
#define ROTR(x, n) ((x >> n) | (x << (32 - n))) |
||||||
|
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) |
||||||
|
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) |
||||||
|
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) |
||||||
|
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) |
||||||
|
|
||||||
|
/* SHA256 round function */ |
||||||
|
#define RND(a, b, c, d, e, f, g, h, k) \ |
||||||
|
t0 = h + S1(e) + Ch(e, f, g) + k; \ |
||||||
|
t1 = S0(a) + Maj(a, b, c); \ |
||||||
|
d += t0; \ |
||||||
|
h = t0 + t1; |
||||||
|
|
||||||
|
/* Adjusted round function for rotating state */ |
||||||
|
#define RNDr(S, W, i, k) \ |
||||||
|
RND(S[(64 - i) % 8], S[(65 - i) % 8], \ |
||||||
|
S[(66 - i) % 8], S[(67 - i) % 8], \ |
||||||
|
S[(68 - i) % 8], S[(69 - i) % 8], \ |
||||||
|
S[(70 - i) % 8], S[(71 - i) % 8], \ |
||||||
|
W[i] + k) |
||||||
|
|
||||||
|
/*
|
||||||
|
* SHA256 block compression function. The 256-bit state is transformed via |
||||||
|
* the 512-bit input block to produce a new state. |
||||||
|
*/ |
||||||
|
static void |
||||||
|
SHA256_Transform(uint32_t * state, const unsigned char block[64]) |
||||||
|
{ |
||||||
|
uint32_t W[64]; |
||||||
|
uint32_t S[8]; |
||||||
|
uint32_t t0, t1; |
||||||
|
int i; |
||||||
|
/* 1. Prepare message schedule W. */ |
||||||
|
be32dec_vect(W, block, 64); |
||||||
|
|
||||||
|
for (i = 16; i < 64; i++) |
||||||
|
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; |
||||||
|
|
||||||
|
/* 2. Initialize working variables. */ |
||||||
|
memcpy(S, state, 32); |
||||||
|
|
||||||
|
/* 3. Mix. */ |
||||||
|
RNDr(S, W, 0, 0x428a2f98); |
||||||
|
RNDr(S, W, 1, 0x71374491); |
||||||
|
RNDr(S, W, 2, 0xb5c0fbcf); |
||||||
|
RNDr(S, W, 3, 0xe9b5dba5); |
||||||
|
RNDr(S, W, 4, 0x3956c25b); |
||||||
|
RNDr(S, W, 5, 0x59f111f1); |
||||||
|
RNDr(S, W, 6, 0x923f82a4); |
||||||
|
RNDr(S, W, 7, 0xab1c5ed5); |
||||||
|
RNDr(S, W, 8, 0xd807aa98); |
||||||
|
RNDr(S, W, 9, 0x12835b01); |
||||||
|
RNDr(S, W, 10, 0x243185be); |
||||||
|
RNDr(S, W, 11, 0x550c7dc3); |
||||||
|
RNDr(S, W, 12, 0x72be5d74); |
||||||
|
RNDr(S, W, 13, 0x80deb1fe); |
||||||
|
RNDr(S, W, 14, 0x9bdc06a7); |
||||||
|
RNDr(S, W, 15, 0xc19bf174); |
||||||
|
RNDr(S, W, 16, 0xe49b69c1); |
||||||
|
RNDr(S, W, 17, 0xefbe4786); |
||||||
|
RNDr(S, W, 18, 0x0fc19dc6); |
||||||
|
RNDr(S, W, 19, 0x240ca1cc); |
||||||
|
RNDr(S, W, 20, 0x2de92c6f); |
||||||
|
RNDr(S, W, 21, 0x4a7484aa); |
||||||
|
RNDr(S, W, 22, 0x5cb0a9dc); |
||||||
|
RNDr(S, W, 23, 0x76f988da); |
||||||
|
RNDr(S, W, 24, 0x983e5152); |
||||||
|
RNDr(S, W, 25, 0xa831c66d); |
||||||
|
RNDr(S, W, 26, 0xb00327c8); |
||||||
|
RNDr(S, W, 27, 0xbf597fc7); |
||||||
|
RNDr(S, W, 28, 0xc6e00bf3); |
||||||
|
RNDr(S, W, 29, 0xd5a79147); |
||||||
|
RNDr(S, W, 30, 0x06ca6351); |
||||||
|
RNDr(S, W, 31, 0x14292967); |
||||||
|
RNDr(S, W, 32, 0x27b70a85); |
||||||
|
RNDr(S, W, 33, 0x2e1b2138); |
||||||
|
RNDr(S, W, 34, 0x4d2c6dfc); |
||||||
|
RNDr(S, W, 35, 0x53380d13); |
||||||
|
RNDr(S, W, 36, 0x650a7354); |
||||||
|
RNDr(S, W, 37, 0x766a0abb); |
||||||
|
RNDr(S, W, 38, 0x81c2c92e); |
||||||
|
RNDr(S, W, 39, 0x92722c85); |
||||||
|
RNDr(S, W, 40, 0xa2bfe8a1); |
||||||
|
RNDr(S, W, 41, 0xa81a664b); |
||||||
|
RNDr(S, W, 42, 0xc24b8b70); |
||||||
|
RNDr(S, W, 43, 0xc76c51a3); |
||||||
|
RNDr(S, W, 44, 0xd192e819); |
||||||
|
RNDr(S, W, 45, 0xd6990624); |
||||||
|
RNDr(S, W, 46, 0xf40e3585); |
||||||
|
RNDr(S, W, 47, 0x106aa070); |
||||||
|
RNDr(S, W, 48, 0x19a4c116); |
||||||
|
RNDr(S, W, 49, 0x1e376c08); |
||||||
|
RNDr(S, W, 50, 0x2748774c); |
||||||
|
RNDr(S, W, 51, 0x34b0bcb5); |
||||||
|
RNDr(S, W, 52, 0x391c0cb3); |
||||||
|
RNDr(S, W, 53, 0x4ed8aa4a); |
||||||
|
RNDr(S, W, 54, 0x5b9cca4f); |
||||||
|
RNDr(S, W, 55, 0x682e6ff3); |
||||||
|
RNDr(S, W, 56, 0x748f82ee); |
||||||
|
RNDr(S, W, 57, 0x78a5636f); |
||||||
|
RNDr(S, W, 58, 0x84c87814); |
||||||
|
RNDr(S, W, 59, 0x8cc70208); |
||||||
|
RNDr(S, W, 60, 0x90befffa); |
||||||
|
RNDr(S, W, 61, 0xa4506ceb); |
||||||
|
RNDr(S, W, 62, 0xbef9a3f7); |
||||||
|
RNDr(S, W, 63, 0xc67178f2); |
||||||
|
|
||||||
|
/* 4. Mix local working variables into global state */ |
||||||
|
for (i = 0; i < 8; i++) { |
||||||
|
state[i] += S[i]; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/* Clean the stack. */ |
||||||
|
memset(W, 0, 256); |
||||||
|
memset(S, 0, 32); |
||||||
|
t0 = t1 = 0; |
||||||
|
} |
||||||
|
|
||||||
|
static unsigned char PAD[64] = { |
||||||
|
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
||||||
|
}; |
||||||
|
|
||||||
|
/* Add padding and terminating bit-count. */ |
||||||
|
static void |
||||||
|
SHA256_Pad(SHA256_CTX_Y * ctx) |
||||||
|
{ |
||||||
|
unsigned char len[8]; |
||||||
|
uint32_t r, plen; |
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert length to a vector of bytes -- we do this now rather |
||||||
|
* than later because the length will change after we pad. |
||||||
|
*/ |
||||||
|
be32enc_vect(len, ctx->count, 8); |
||||||
|
|
||||||
|
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */ |
||||||
|
r = (ctx->count[1] >> 3) & 0x3f; |
||||||
|
plen = (r < 56) ? (56 - r) : (120 - r); |
||||||
|
SHA256_Update_Y(ctx, PAD, (size_t)plen); |
||||||
|
|
||||||
|
/* Add the terminating bit-count */ |
||||||
|
SHA256_Update_Y(ctx, len, 8); |
||||||
|
} |
||||||
|
|
||||||
|
/* SHA-256 initialization. Begins a SHA-256 operation. */ |
||||||
|
void |
||||||
|
SHA256_Init_Y(SHA256_CTX_Y * ctx) |
||||||
|
{ |
||||||
|
|
||||||
|
/* Zero bits processed so far */ |
||||||
|
ctx->count[0] = ctx->count[1] = 0; |
||||||
|
|
||||||
|
/* Magic initialization constants */ |
||||||
|
ctx->state[0] = 0x6A09E667; |
||||||
|
ctx->state[1] = 0xBB67AE85; |
||||||
|
ctx->state[2] = 0x3C6EF372; |
||||||
|
ctx->state[3] = 0xA54FF53A; |
||||||
|
ctx->state[4] = 0x510E527F; |
||||||
|
ctx->state[5] = 0x9B05688C; |
||||||
|
ctx->state[6] = 0x1F83D9AB; |
||||||
|
ctx->state[7] = 0x5BE0CD19; |
||||||
|
} |
||||||
|
|
||||||
|
/* Add bytes into the hash */ |
||||||
|
void |
||||||
|
SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len) |
||||||
|
{ |
||||||
|
uint32_t bitlen[2]; |
||||||
|
uint32_t r; |
||||||
|
const unsigned char *src = in; |
||||||
|
|
||||||
|
/* Number of bytes left in the buffer from previous updates */ |
||||||
|
r = (ctx->count[1] >> 3) & 0x3f; |
||||||
|
|
||||||
|
/* Convert the length into a number of bits */ |
||||||
|
bitlen[1] = ((uint32_t)len) << 3; |
||||||
|
bitlen[0] = (uint32_t)(len >> 29); |
||||||
|
|
||||||
|
/* Update number of bits */ |
||||||
|
if ((ctx->count[1] += bitlen[1]) < bitlen[1]) |
||||||
|
ctx->count[0]++; |
||||||
|
ctx->count[0] += bitlen[0]; |
||||||
|
|
||||||
|
/* Handle the case where we don't need to perform any transforms */ |
||||||
|
if (len < 64 - r) { |
||||||
|
|
||||||
|
memcpy(&ctx->buf[r], src, len); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/* Finish the current block */ |
||||||
|
memcpy(&ctx->buf[r], src, 64 - r); |
||||||
|
|
||||||
|
SHA256_Transform(ctx->state, ctx->buf); |
||||||
|
src += 64 - r; |
||||||
|
len -= 64 - r; |
||||||
|
|
||||||
|
/* Perform complete blocks */ |
||||||
|
|
||||||
|
while (len >= 64) { |
||||||
|
SHA256_Transform(ctx->state, src); |
||||||
|
src += 64; |
||||||
|
len -= 64; |
||||||
|
} |
||||||
|
|
||||||
|
/* Copy left over data into buffer */ |
||||||
|
memcpy(ctx->buf, src, len); |
||||||
|
} |
||||||
|
|
||||||
|
/*
|
||||||
|
* SHA-256 finalization. Pads the input data, exports the hash value, |
||||||
|
* and clears the context state. |
||||||
|
*/ |
||||||
|
void |
||||||
|
SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx) |
||||||
|
{ |
||||||
|
/* Add padding */ |
||||||
|
SHA256_Pad(ctx); |
||||||
|
|
||||||
|
/* Write the hash */ |
||||||
|
be32enc_vect(digest, ctx->state, 32); |
||||||
|
|
||||||
|
/* Clear the context state */ |
||||||
|
memset((void *)ctx, 0, sizeof(*ctx)); |
||||||
|
} |
||||||
|
|
||||||
|
/* Initialize an HMAC-SHA256 operation with the given key. */ |
||||||
|
void |
||||||
|
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen) |
||||||
|
{ |
||||||
|
unsigned char pad[64]; |
||||||
|
unsigned char khash[32]; |
||||||
|
const unsigned char * K = _K; |
||||||
|
size_t i; |
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */ |
||||||
|
if (Klen > 64) { |
||||||
|
SHA256_Init_Y(&ctx->ictx); |
||||||
|
SHA256_Update_Y(&ctx->ictx, K, Klen); |
||||||
|
SHA256_Final_Y(khash, &ctx->ictx); |
||||||
|
K = khash; |
||||||
|
Klen = 32; |
||||||
|
} |
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ |
||||||
|
SHA256_Init_Y(&ctx->ictx); |
||||||
|
memset(pad, 0x36, 64); |
||||||
|
for (i = 0; i < Klen; i++) { |
||||||
|
pad[i] ^= K[i]; |
||||||
|
} |
||||||
|
SHA256_Update_Y(&ctx->ictx, pad, 64); |
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ |
||||||
|
SHA256_Init_Y(&ctx->octx); |
||||||
|
memset(pad, 0x5c, 64); |
||||||
|
for (i = 0; i < Klen; i++) |
||||||
|
{ |
||||||
|
pad[i] ^= K[i]; |
||||||
|
} |
||||||
|
SHA256_Update_Y(&ctx->octx, pad, 64); |
||||||
|
|
||||||
|
/* Clean the stack. */ |
||||||
|
memset(khash, 0, 32); |
||||||
|
} |
||||||
|
|
||||||
|
/* Add bytes to the HMAC-SHA256 operation. */ |
||||||
|
void |
||||||
|
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len) |
||||||
|
{ |
||||||
|
/* Feed data to the inner SHA256 operation. */ |
||||||
|
SHA256_Update_Y(&ctx->ictx, in, len); |
||||||
|
} |
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */ |
||||||
|
void |
||||||
|
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx) |
||||||
|
{ |
||||||
|
unsigned char ihash[32]; |
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */ |
||||||
|
SHA256_Final_Y(ihash, &ctx->ictx); |
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */ |
||||||
|
SHA256_Update_Y(&ctx->octx, ihash, 32); |
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */ |
||||||
|
SHA256_Final_Y(digest, &ctx->octx); |
||||||
|
|
||||||
|
/* Clean the stack. */ |
||||||
|
memset(ihash, 0, 32); |
||||||
|
} |
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): |
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and |
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). |
||||||
|
*/ |
||||||
|
|
||||||
|
void |
||||||
|
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, |
||||||
|
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) |
||||||
|
{ |
||||||
|
HMAC_SHA256_CTX_Y PShctx, hctx; |
||||||
|
size_t i; |
||||||
|
uint8_t ivec[4]; |
||||||
|
uint8_t U[32]; |
||||||
|
uint8_t T[32]; |
||||||
|
uint64_t j; |
||||||
|
int k; |
||||||
|
size_t clen; |
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */ |
||||||
|
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen); |
||||||
|
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen); |
||||||
|
|
||||||
|
/* Iterate through the blocks. */ |
||||||
|
for (i = 0; i * 32 < dkLen; i++) { |
||||||
|
/* Generate INT(i + 1). */ |
||||||
|
be32enc(ivec, (uint32_t)(i + 1)); |
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */ |
||||||
|
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y)); |
||||||
|
HMAC_SHA256_Update_Y(&hctx, ivec, 4); |
||||||
|
HMAC_SHA256_Final_Y(U, &hctx); |
||||||
|
|
||||||
|
/* T_i = U_1 ... */ |
||||||
|
memcpy(T, U, 32); |
||||||
|
|
||||||
|
for (j = 2; j <= c; j++) { |
||||||
|
/* Compute U_j. */ |
||||||
|
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen); |
||||||
|
HMAC_SHA256_Update_Y(&hctx, U, 32); |
||||||
|
HMAC_SHA256_Final_Y(U, &hctx); |
||||||
|
|
||||||
|
/* ... xor U_j ... */ |
||||||
|
for (k = 0; k < 32; k++) |
||||||
|
T[k] ^= U[k]; |
||||||
|
} |
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */ |
||||||
|
clen = dkLen - i * 32; |
||||||
|
if (clen > 32) |
||||||
|
clen = 32; |
||||||
|
memcpy(&buf[i * 32], T, clen); |
||||||
|
} |
||||||
|
|
||||||
|
/* Clean PShctx, since we never called _Final on it. */ |
||||||
|
memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y)); |
||||||
|
} |
@ -0,0 +1,63 @@ |
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in the |
||||||
|
* documentation and/or other materials provided with the distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
||||||
|
* SUCH DAMAGE. |
||||||
|
* |
||||||
|
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $ |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef _SHA256_H_ |
||||||
|
#define _SHA256_H_ |
||||||
|
|
||||||
|
#include <sys/types.h> |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
typedef struct SHA256Context { |
||||||
|
uint32_t state[8]; |
||||||
|
uint32_t count[2]; |
||||||
|
unsigned char buf[64]; |
||||||
|
} SHA256_CTX_Y; |
||||||
|
|
||||||
|
typedef struct HMAC_SHA256Context { |
||||||
|
SHA256_CTX_Y ictx; |
||||||
|
SHA256_CTX_Y octx; |
||||||
|
} HMAC_SHA256_CTX_Y; |
||||||
|
|
||||||
|
void SHA256_Init_Y(SHA256_CTX_Y *); |
||||||
|
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t); |
||||||
|
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *); |
||||||
|
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); |
||||||
|
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t); |
||||||
|
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *); |
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): |
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and |
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). |
||||||
|
*/ |
||||||
|
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, |
||||||
|
uint64_t, uint8_t *, size_t); |
||||||
|
|
||||||
|
|
||||||
|
#endif /* !_SHA256_H_ */ |
Loading…
Reference in new issue