mirror of
https://github.com/GOSTSec/sgminer
synced 2025-03-11 05:01:09 +00:00
first version supporting yescrypt algo
this is a temporary kernel a somewhat faster should be implemented soon
This commit is contained in:
parent
cdae391248
commit
3eccf326a3
@ -70,6 +70,7 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
|
|||||||
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
|
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
|
||||||
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
|
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
|
||||||
sgminer_SOURCES += algorithm/Lyra2RE.c algorithm/Lyra2RE.h algorithm/Lyra2.c algorithm/Lyra2.h algorithm/Sponge.c algorithm/Sponge.h
|
sgminer_SOURCES += algorithm/Lyra2RE.c algorithm/Lyra2RE.h algorithm/Lyra2.c algorithm/Lyra2.h algorithm/Sponge.c algorithm/Sponge.h
|
||||||
|
sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h
|
||||||
|
|
||||||
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl
|
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl
|
||||||
|
|
||||||
|
79
algorithm.c
79
algorithm.c
@ -33,6 +33,7 @@
|
|||||||
#include "algorithm/neoscrypt.h"
|
#include "algorithm/neoscrypt.h"
|
||||||
#include "algorithm/Lyra2RE.h"
|
#include "algorithm/Lyra2RE.h"
|
||||||
#include "algorithm/pluck.h"
|
#include "algorithm/pluck.h"
|
||||||
|
#include "algorithm/yescrypt.h"
|
||||||
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
|
|
||||||
@ -56,7 +57,8 @@ const char *algorithm_type_str[] = {
|
|||||||
"Whirlcoin",
|
"Whirlcoin",
|
||||||
"Neoscrypt",
|
"Neoscrypt",
|
||||||
"Lyra2RE",
|
"Lyra2RE",
|
||||||
"pluck"
|
"pluck",
|
||||||
|
"yescrypt"
|
||||||
};
|
};
|
||||||
|
|
||||||
void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
|
void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
|
||||||
@ -205,6 +207,71 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
|
{
|
||||||
|
cl_kernel *kernel = &clState->kernel;
|
||||||
|
unsigned int num = 0;
|
||||||
|
cl_uint le_target;
|
||||||
|
cl_int status = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// le_target = (*(cl_uint *)(blk->work->device_target + 28));
|
||||||
|
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
|
||||||
|
// memcpy(clState->cldata, blk->work->data, 80);
|
||||||
|
flip80(clState->cldata, blk->work->data);
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
|
||||||
|
|
||||||
|
CL_SET_ARG(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->outputBuffer);
|
||||||
|
CL_SET_ARG(clState->padbuffer8);
|
||||||
|
CL_SET_ARG(clState->buffer1);
|
||||||
|
CL_SET_ARG(le_target);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
|
{
|
||||||
|
// cl_kernel *kernel = &clState->kernel;
|
||||||
|
cl_kernel *kernel;
|
||||||
|
unsigned int num = 0;
|
||||||
|
cl_uint le_target;
|
||||||
|
cl_int status = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// le_target = (*(cl_uint *)(blk->work->device_target + 28));
|
||||||
|
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
|
||||||
|
// memcpy(clState->cldata, blk->work->data, 80);
|
||||||
|
flip80(clState->cldata, blk->work->data);
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
|
||||||
|
//pbkdf and initial sha
|
||||||
|
kernel = &clState->kernel;
|
||||||
|
CL_SET_ARG_0(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->buffer2);
|
||||||
|
CL_SET_ARG(clState->buffer3);
|
||||||
|
//mix1_1 (salsa)
|
||||||
|
num = 0;
|
||||||
|
kernel = clState->extra_kernels;
|
||||||
|
CL_SET_ARG_0(clState->buffer1);
|
||||||
|
CL_SET_ARG(clState->buffer2);
|
||||||
|
//mix1_2/2_2 (pwxform)
|
||||||
|
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||||
|
CL_SET_ARG(clState->buffer1);
|
||||||
|
CL_SET_ARG(clState->buffer2);
|
||||||
|
//mix2_2
|
||||||
|
// CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||||
|
// CL_SET_ARG(clState->buffer1);
|
||||||
|
// CL_SET_ARG(clState->buffer2);
|
||||||
|
//pbkdf and finalization
|
||||||
|
CL_NEXTKERNEL_SET_ARG_0(clState->CLbuffer0);
|
||||||
|
CL_SET_ARG(clState->outputBuffer);
|
||||||
|
CL_SET_ARG(clState->buffer2);
|
||||||
|
CL_SET_ARG(clState->buffer3);
|
||||||
|
CL_SET_ARG(le_target);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -747,6 +814,16 @@ static algorithm_settings_t algos[] = {
|
|||||||
A_PLUCK("pluck"),
|
A_PLUCK("pluck"),
|
||||||
#undef A_PLUCK
|
#undef A_PLUCK
|
||||||
|
|
||||||
|
#define A_YESCRYPT(a) \
|
||||||
|
{ a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
|
||||||
|
A_YESCRYPT("yescrypt"),
|
||||||
|
#undef A_YESCRYPT
|
||||||
|
|
||||||
|
//#define A_YESCRYPT(a) \
|
||||||
|
// { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 3, -1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options}
|
||||||
|
// A_YESCRYPT("yescrypt"),
|
||||||
|
//#undef A_YESCRYPT
|
||||||
|
|
||||||
|
|
||||||
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
||||||
#define A_QUARK(a, b) \
|
#define A_QUARK(a, b) \
|
||||||
|
@ -27,7 +27,8 @@ typedef enum {
|
|||||||
ALGO_WHIRL,
|
ALGO_WHIRL,
|
||||||
ALGO_NEOSCRYPT,
|
ALGO_NEOSCRYPT,
|
||||||
ALGO_LYRA2RE,
|
ALGO_LYRA2RE,
|
||||||
ALGO_PLUCK
|
ALGO_PLUCK,
|
||||||
|
ALGO_YESCRYPT
|
||||||
} algorithm_type_t;
|
} algorithm_type_t;
|
||||||
|
|
||||||
extern const char *algorithm_type_str[];
|
extern const char *algorithm_type_str[];
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
#define PLUCK_SCRATCHBUF_SIZE (128 * 1024)
|
#define PLUCK_SCRATCHBUF_SIZE (128 * 1024)
|
||||||
|
#define PLUCK_SECBUF_SIZE (64 * 1024)
|
||||||
|
|
||||||
extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget,
|
extern int pluck_test(unsigned char *pdata, const unsigned char *ptarget,
|
||||||
uint32_t nonce);
|
uint32_t nonce);
|
||||||
extern void pluck_regenhash(struct work *work);
|
extern void pluck_regenhash(struct work *work);
|
||||||
|
140
algorithm/sysendian.h
Normal file
140
algorithm/sysendian.h
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2007-2009 Colin Percival
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||||
|
* online backup system.
|
||||||
|
*/
|
||||||
|
#ifndef _SYSENDIAN_H_
|
||||||
|
#define _SYSENDIAN_H_
|
||||||
|
|
||||||
|
/* If we don't have be64enc, the <sys/endian.h> we have isn't usable. */
|
||||||
|
#if !HAVE_DECL_BE64ENC
|
||||||
|
#undef HAVE_SYS_ENDIAN_H
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_SYS_ENDIAN_H
|
||||||
|
|
||||||
|
#include <sys/endian.h>
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if !HAVE_DECL_LE32DEC
|
||||||
|
static uint32_t le32dec(const void *pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = (uint8_t const *)pp;
|
||||||
|
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
|
||||||
|
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !HAVE_DECL_BE32ENC
|
||||||
|
static void be32enc(void *pp, uint32_t x)
|
||||||
|
{
|
||||||
|
uint8_t *p = (uint8_t *)pp;
|
||||||
|
p[3] = x & 0xff;
|
||||||
|
p[2] = (x >> 8) & 0xff;
|
||||||
|
p[1] = (x >> 16) & 0xff;
|
||||||
|
p[0] = (x >> 24) & 0xff;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !HAVE_DECL_BE32DEC
|
||||||
|
static uint32_t be32dec(const void *pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = (uint8_t const *)pp;
|
||||||
|
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
|
||||||
|
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !HAVE_DECL_LE32ENC
|
||||||
|
static void le32enc(void *pp, uint32_t x)
|
||||||
|
{
|
||||||
|
uint8_t *p = (uint8_t *)pp;
|
||||||
|
p[0] = x & 0xff;
|
||||||
|
p[1] = (x >> 8) & 0xff;
|
||||||
|
p[2] = (x >> 16) & 0xff;
|
||||||
|
p[3] = (x >> 24) & 0xff;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
be64dec(const void *pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = (uint8_t const *)pp;
|
||||||
|
|
||||||
|
return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) +
|
||||||
|
((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) +
|
||||||
|
((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) +
|
||||||
|
((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
be64enc(void *pp, uint64_t x)
|
||||||
|
{
|
||||||
|
uint8_t * p = (uint8_t *)pp;
|
||||||
|
|
||||||
|
p[7] = x & 0xff;
|
||||||
|
p[6] = (x >> 8) & 0xff;
|
||||||
|
p[5] = (x >> 16) & 0xff;
|
||||||
|
p[4] = (x >> 24) & 0xff;
|
||||||
|
p[3] = (x >> 32) & 0xff;
|
||||||
|
p[2] = (x >> 40) & 0xff;
|
||||||
|
p[1] = (x >> 48) & 0xff;
|
||||||
|
p[0] = (x >> 56) & 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
le64dec(const void *pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = (uint8_t const *)pp;
|
||||||
|
|
||||||
|
return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) +
|
||||||
|
((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) +
|
||||||
|
((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) +
|
||||||
|
((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
le64enc(void *pp, uint64_t x)
|
||||||
|
{
|
||||||
|
uint8_t * p = (uint8_t *)pp;
|
||||||
|
|
||||||
|
p[0] = x & 0xff;
|
||||||
|
p[1] = (x >> 8) & 0xff;
|
||||||
|
p[2] = (x >> 16) & 0xff;
|
||||||
|
p[3] = (x >> 24) & 0xff;
|
||||||
|
p[4] = (x >> 32) & 0xff;
|
||||||
|
p[5] = (x >> 40) & 0xff;
|
||||||
|
p[6] = (x >> 48) & 0xff;
|
||||||
|
p[7] = (x >> 56) & 0xff;
|
||||||
|
}
|
||||||
|
#endif /* !HAVE_SYS_ENDIAN_H */
|
||||||
|
|
||||||
|
#endif /* !_SYSENDIAN_H_ */
|
1364
algorithm/yescrypt-opt.c
Normal file
1364
algorithm/yescrypt-opt.c
Normal file
File diff suppressed because it is too large
Load Diff
128
algorithm/yescrypt.c
Normal file
128
algorithm/yescrypt.c
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2015 djm34
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "algorithm/yescrypt_core.h"
|
||||||
|
|
||||||
|
static const uint32_t diff1targ = 0x0000ffff;
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
dst[i] = htobe32(src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Used externally as confirmation of correct OCL code */
|
||||||
|
int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||||
|
{
|
||||||
|
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||||
|
uint32_t data[20], ohash[8];
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||||
|
data[19] = htobe32(nonce);
|
||||||
|
yescrypt_hash((unsigned char*)data,(unsigned char*)ohash);
|
||||||
|
|
||||||
|
tmp_hash7 = be32toh(ohash[7]);
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
|
||||||
|
(long unsigned int)Htarg,
|
||||||
|
(long unsigned int)diff1targ,
|
||||||
|
(long unsigned int)tmp_hash7);
|
||||||
|
|
||||||
|
if (tmp_hash7 > diff1targ)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (tmp_hash7 > Htarg)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void yescrypt_regenhash(struct work *work)
|
||||||
|
{
|
||||||
|
uint32_t data[20];
|
||||||
|
uint32_t *nonce = (uint32_t *)(work->data + 76);
|
||||||
|
uint32_t *ohash = (uint32_t *)(work->hash);
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)work->data, 19);
|
||||||
|
data[19] = htobe32(*nonce);
|
||||||
|
|
||||||
|
yescrypt_hash((unsigned char*)data, (unsigned char*)ohash);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||||
|
unsigned char *pdata, unsigned char __maybe_unused *phash1,
|
||||||
|
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
|
||||||
|
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
|
||||||
|
{
|
||||||
|
uint32_t *nonce = (uint32_t *)(pdata + 76);
|
||||||
|
uint32_t data[20];
|
||||||
|
uint32_t tmp_hash7;
|
||||||
|
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
uint32_t ostate[8];
|
||||||
|
|
||||||
|
*nonce = ++n;
|
||||||
|
data[19] = (n);
|
||||||
|
pluckrehash(ostate, data);
|
||||||
|
yescrypt_hash((unsigned char*)data, (unsigned char*)ostate);
|
||||||
|
tmp_hash7 = (ostate[7]);
|
||||||
|
|
||||||
|
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
|
||||||
|
|
||||||
|
if (unlikely(tmp_hash7 <= Htarg))
|
||||||
|
{
|
||||||
|
((uint32_t *)pdata)[19] = htobe32(n);
|
||||||
|
*last_nonce = n;
|
||||||
|
ret = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely((n >= max_nonce) || thr->work_restart))
|
||||||
|
{
|
||||||
|
*last_nonce = n;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
10
algorithm/yescrypt.h
Normal file
10
algorithm/yescrypt.h
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#ifndef YESCRYPT_H
|
||||||
|
#define YESCRYPT_H
|
||||||
|
|
||||||
|
#include "miner.h"
|
||||||
|
#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar
|
||||||
|
#define YESCRYP_SECBUF_SIZE (128*64*8)
|
||||||
|
extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
|
||||||
|
extern void yescrypt_regenhash(struct work *work);
|
||||||
|
|
||||||
|
#endif /* YESCRYPT_H */
|
376
algorithm/yescrypt_core.h
Normal file
376
algorithm/yescrypt_core.h
Normal file
@ -0,0 +1,376 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2009 Colin Percival
|
||||||
|
* Copyright 2013,2014 Alexander Peslyak
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||||
|
* online backup system.
|
||||||
|
*/
|
||||||
|
#ifndef _YESCRYPT_H_
|
||||||
|
#define _YESCRYPT_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h> /* for size_t */
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output);
|
||||||
|
extern void yescrypt_hash(const unsigned char *input, unsigned char *output);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
|
||||||
|
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
|
||||||
|
* p, buflen) and write the result into buf. The parameters r, p, and buflen
|
||||||
|
* must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N
|
||||||
|
* must be a power of 2 greater than 1.
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as buf is local to the thread.
|
||||||
|
*/
|
||||||
|
extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen,
|
||||||
|
const uint8_t * __salt, size_t __saltlen,
|
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p,
|
||||||
|
uint8_t * __buf, size_t __buflen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal type used by the memory allocator. Please do not use it directly.
|
||||||
|
* Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since
|
||||||
|
* they might differ from each other in a future version.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
void * base, * aligned;
|
||||||
|
size_t base_size, aligned_size;
|
||||||
|
} yescrypt_region_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Types for shared (ROM) and thread-local (RAM) data structures.
|
||||||
|
*/
|
||||||
|
typedef yescrypt_region_t yescrypt_shared1_t;
|
||||||
|
typedef struct {
|
||||||
|
yescrypt_shared1_t shared1;
|
||||||
|
uint32_t mask1;
|
||||||
|
} yescrypt_shared_t;
|
||||||
|
typedef yescrypt_region_t yescrypt_local_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible values for yescrypt_init_shared()'s flags argument.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
YESCRYPT_SHARED_DEFAULTS = 0,
|
||||||
|
YESCRYPT_SHARED_PREALLOCATED = 0x100
|
||||||
|
} yescrypt_init_shared_flags_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Possible values for the flags argument of yescrypt_kdf(),
|
||||||
|
* yescrypt_gensalt_r(), yescrypt_gensalt(). These may be OR'ed together,
|
||||||
|
* except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive.
|
||||||
|
* Please refer to the description of yescrypt_kdf() below for the meaning of
|
||||||
|
* these flags.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
/* public */
|
||||||
|
YESCRYPT_WORM = 0,
|
||||||
|
YESCRYPT_RW = 1,
|
||||||
|
YESCRYPT_PARALLEL_SMIX = 2,
|
||||||
|
YESCRYPT_PWXFORM = 4,
|
||||||
|
/* private */
|
||||||
|
__YESCRYPT_INIT_SHARED_1 = 0x10000,
|
||||||
|
__YESCRYPT_INIT_SHARED_2 = 0x20000,
|
||||||
|
__YESCRYPT_INIT_SHARED = 0x30000
|
||||||
|
} yescrypt_flags_t;
|
||||||
|
|
||||||
|
#define YESCRYPT_KNOWN_FLAGS \
|
||||||
|
(YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \
|
||||||
|
__YESCRYPT_INIT_SHARED)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask,
|
||||||
|
* buf, buflen):
|
||||||
|
* Optionally allocate memory for and initialize the shared (ROM) data
|
||||||
|
* structure. The parameters N, r, and p must satisfy the same conditions as
|
||||||
|
* with crypto_scrypt(). param and paramlen specify a local parameter with
|
||||||
|
* which the ROM is seeded. If buf is not NULL, then it is used to return
|
||||||
|
* buflen bytes of message digest for the initialized ROM (the caller may use
|
||||||
|
* this to verify that the ROM has been computed in the same way that it was on
|
||||||
|
* a previous run).
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the
|
||||||
|
* ROM is assumed to have been preallocated by the caller, with
|
||||||
|
* shared->shared1.aligned being the start address of the ROM and
|
||||||
|
* shared->shared1.aligned_size being its size (which must be consistent with
|
||||||
|
* N, r, and p). This may be used e.g. when the ROM is to be placed in a SysV
|
||||||
|
* shared memory segment allocated by the caller.
|
||||||
|
*
|
||||||
|
* mask controls the frequency of ROM accesses by yescrypt_kdf(). Normally it
|
||||||
|
* should be set to 1, to interleave RAM and ROM accesses, which works well
|
||||||
|
* when both regions reside in the machine's RAM anyway. Other values may be
|
||||||
|
* used e.g. when the ROM is memory-mapped from a disk file. Recommended mask
|
||||||
|
* values are powers of 2 minus 1 or minus 2. Here's the effect of some mask
|
||||||
|
* values:
|
||||||
|
* mask value ROM accesses in SMix 1st loop ROM accesses in SMix 2nd loop
|
||||||
|
* 0 0 1/2
|
||||||
|
* 1 1/2 1/2
|
||||||
|
* 2 0 1/4
|
||||||
|
* 3 1/4 1/4
|
||||||
|
* 6 0 1/8
|
||||||
|
* 7 1/8 1/8
|
||||||
|
* 14 0 1/16
|
||||||
|
* 15 1/16 1/16
|
||||||
|
* 1022 0 1/1024
|
||||||
|
* 1023 1/1024 1/1024
|
||||||
|
*
|
||||||
|
* Actual computation of the ROM contents may be avoided, if you don't intend
|
||||||
|
* to use a ROM but need a dummy shared structure, by calling this function
|
||||||
|
* with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the
|
||||||
|
* arguments starting with param and on.
|
||||||
|
*
|
||||||
|
* MT-safe as long as shared is local to the thread.
|
||||||
|
*/
|
||||||
|
extern int yescrypt_init_shared(yescrypt_shared_t * __shared,
|
||||||
|
const uint8_t * __param, size_t __paramlen,
|
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p,
|
||||||
|
yescrypt_init_shared_flags_t __flags, uint32_t __mask,
|
||||||
|
uint8_t * __buf, size_t __buflen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_free_shared(shared):
|
||||||
|
* Free memory that had been allocated with yescrypt_init_shared().
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as shared is local to the thread.
|
||||||
|
*/
|
||||||
|
extern int yescrypt_free_shared(yescrypt_shared_t * __shared);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_init_local(local):
|
||||||
|
* Initialize the thread-local (RAM) data structure. Actual memory allocation
|
||||||
|
* is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r().
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as local is local to the thread.
|
||||||
|
*/
|
||||||
|
extern int yescrypt_init_local(yescrypt_local_t * __local);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_free_local(local):
|
||||||
|
* Free memory that may have been allocated for an initialized thread-local
|
||||||
|
* (RAM) data structure.
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as local is local to the thread.
|
||||||
|
*/
|
||||||
|
extern int yescrypt_free_local(yescrypt_local_t * __local);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
|
||||||
|
* N, r, p, t, flags, buf, buflen):
|
||||||
|
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
|
||||||
|
* p, buflen), or a revision of scrypt as requested by flags and shared, and
|
||||||
|
* write the result into buf. The parameters N, r, p, and buflen must satisfy
|
||||||
|
* the same conditions as with crypto_scrypt(). t controls computation time
|
||||||
|
* while not affecting peak memory usage. shared and flags may request
|
||||||
|
* special modes as described below. local is the thread-local data
|
||||||
|
* structure, allowing to preserve and reuse a memory allocation across calls,
|
||||||
|
* thereby reducing its overhead.
|
||||||
|
*
|
||||||
|
* Return 0 on success; or -1 on error.
|
||||||
|
*
|
||||||
|
* t controls computation time. t = 0 is optimal in terms of achieving the
|
||||||
|
* highest area-time for ASIC attackers. Thus, higher computation time, if
|
||||||
|
* affordable, is best achieved by increasing N rather than by increasing t.
|
||||||
|
* However, if the higher memory usage (which goes along with higher N) is not
|
||||||
|
* affordable, or if fine-tuning of the time is needed (recall that N must be a
|
||||||
|
* power of 2), then t = 1 or above may be used to increase time while staying
|
||||||
|
* at the same peak memory usage. t = 1 increases the time by 25% and
|
||||||
|
* decreases the normalized area-time to 96% of optimal. (Of course, in
|
||||||
|
* absolute terms the area-time increases with higher t. It's just that it
|
||||||
|
* would increase slightly more with higher N*r rather than with higher t.)
|
||||||
|
* t = 2 increases the time by another 20% and decreases the normalized
|
||||||
|
* area-time to 89% of optimal. Thus, these two values are reasonable to use
|
||||||
|
* for fine-tuning. Values of t higher than 2 result in further increase in
|
||||||
|
* time while reducing the efficiency much further (e.g., down to around 50% of
|
||||||
|
* optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact
|
||||||
|
* numbers varying by the flags settings).
|
||||||
|
*
|
||||||
|
* Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and
|
||||||
|
* passing a dummy shared structure (see the description of
|
||||||
|
* yescrypt_init_shared() above for how to produce one). In this mode, the
|
||||||
|
* thread-local memory region (RAM) is first sequentially written to and then
|
||||||
|
* randomly read from. This algorithm is friendly towards time-memory
|
||||||
|
* tradeoffs (TMTO), available both to defenders (albeit not in this
|
||||||
|
* implementation) and to attackers.
|
||||||
|
*
|
||||||
|
* Setting YESCRYPT_RW adds extra random reads and writes to the thread-local
|
||||||
|
* memory region (RAM), which makes TMTO a lot less efficient. This may be
|
||||||
|
* used to slow down the kinds of attackers who would otherwise benefit from
|
||||||
|
* classic scrypt's efficient TMTO. Since classic scrypt's TMTO allows not
|
||||||
|
* only for the tradeoff, but also for a decrease of attacker's area-time (by
|
||||||
|
* up to a constant factor), setting YESCRYPT_RW substantially increases the
|
||||||
|
* cost of attacks in area-time terms as well. Yet another benefit of it is
|
||||||
|
* that optimal area-time is reached at an earlier time than with classic
|
||||||
|
* scrypt, and t = 0 actually corresponds to this earlier completion time,
|
||||||
|
* resulting in quicker hash computations (and thus in higher request rate
|
||||||
|
* capacity). Due to these properties, YESCRYPT_RW should almost always be
|
||||||
|
* set, except when compatibility with classic scrypt or TMTO-friendliness are
|
||||||
|
* desired.
|
||||||
|
*
|
||||||
|
* YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a
|
||||||
|
* lower level as compared to where it is in classic scrypt. This reduces
|
||||||
|
* flexibility for efficient computation (for both attackers and defenders) by
|
||||||
|
* requiring that, short of resorting to TMTO, the full amount of memory be
|
||||||
|
* allocated as needed for the specified p, regardless of whether that
|
||||||
|
* parallelism is actually being fully made use of or not. (For comparison, a
|
||||||
|
* single instance of classic scrypt may be computed in less memory without any
|
||||||
|
* CPU time overhead, but in more real time, by not making full use of the
|
||||||
|
* parallelism.) This may be desirable when the defender has enough memory
|
||||||
|
* with sufficiently low latency and high bandwidth for efficient full parallel
|
||||||
|
* execution, yet the required memory size is high enough that some likely
|
||||||
|
* attackers might end up being forced to choose between using higher latency
|
||||||
|
* memory than they could use otherwise (waiting for data longer) or using TMTO
|
||||||
|
* (waiting for data more times per one hash computation). The area-time cost
|
||||||
|
* for other kinds of attackers (who would use the same memory type and TMTO
|
||||||
|
* factor or no TMTO either way) remains roughly the same, given the same
|
||||||
|
* running time for the defender. In the TMTO-friendly YESCRYPT_WORM mode, as
|
||||||
|
* long as the defender has enough memory that is just as fast as the smaller
|
||||||
|
* per-thread regions would be, doesn't expect to ever need greater
|
||||||
|
* flexibility (except possibly via TMTO), and doesn't need backwards
|
||||||
|
* compatibility with classic scrypt, there are no other serious drawbacks to
|
||||||
|
* this setting. In the YESCRYPT_RW mode, which is meant to discourage TMTO,
|
||||||
|
* this new approach to parallelization makes TMTO less inefficient. (This is
|
||||||
|
* an unfortunate side-effect of avoiding some random writes, as we have to in
|
||||||
|
* order to allow for parallel threads to access a common memory region without
|
||||||
|
* synchronization overhead.) Thus, in this mode this setting poses an extra
|
||||||
|
* tradeoff of its own (higher area-time cost for a subset of attackers vs.
|
||||||
|
* better TMTO resistance). Setting YESCRYPT_PARALLEL_SMIX also changes the
|
||||||
|
* way the running time is to be controlled from N*r*p (for classic scrypt) to
|
||||||
|
* N*r (in this modification). All of this applies only when p > 1. For
|
||||||
|
* p = 1, this setting is a no-op.
|
||||||
|
*
|
||||||
|
* Passing a real shared structure, with ROM contents previously computed by
|
||||||
|
* yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for
|
||||||
|
* the thread-local RAM region. In order to allow for initialization of the
|
||||||
|
* ROM to be split into a separate program, the shared->shared1.aligned and
|
||||||
|
* shared->shared1.aligned_size fields may be set by the caller of
|
||||||
|
* yescrypt_kdf() manually rather than with yescrypt_init_shared().
|
||||||
|
*
|
||||||
|
* local must be initialized with yescrypt_init_local().
|
||||||
|
*
|
||||||
|
* MT-safe as long as local and buf are local to the thread.
|
||||||
|
*/
|
||||||
|
extern int yescrypt_kdf(const yescrypt_shared_t * __shared,
|
||||||
|
yescrypt_local_t * __local,
|
||||||
|
const uint8_t * __passwd, size_t __passwdlen,
|
||||||
|
const uint8_t * __salt, size_t __saltlen,
|
||||||
|
uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t,
|
||||||
|
yescrypt_flags_t __flags,
|
||||||
|
uint8_t * __buf, size_t __buflen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen):
|
||||||
|
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the
|
||||||
|
* parameters and salt value encoded in setting. If the shared structure is
|
||||||
|
* not dummy, a ROM is used and YESCRYPT_RW is required. Otherwise, whether to
|
||||||
|
* use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
|
||||||
|
* discouraging modification) is determined by the setting string. shared and
|
||||||
|
* local must be initialized as described above for yescrypt_kdf(). buf must
|
||||||
|
* be large enough (as indicated by buflen) to hold the encoded hash string.
|
||||||
|
*
|
||||||
|
* Return the encoded hash string on success; or NULL on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as local and buf are local to the thread.
|
||||||
|
*/
|
||||||
|
extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
|
||||||
|
yescrypt_local_t * __local,
|
||||||
|
const uint8_t * __passwd, size_t __passwdlen,
|
||||||
|
const uint8_t * __setting,
|
||||||
|
uint8_t * __buf, size_t __buflen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt(passwd, setting):
|
||||||
|
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the
|
||||||
|
* parameters and salt value encoded in setting. Whether to use the
|
||||||
|
* YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
|
||||||
|
* discouraging modification) is determined by the setting string.
|
||||||
|
*
|
||||||
|
* Return the encoded hash string on success; or NULL on error.
|
||||||
|
*
|
||||||
|
* This is a crypt(3)-like interface, which is simpler to use than
|
||||||
|
* yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM,
|
||||||
|
* and it is slower than yescrypt_r() for repeated calls because it allocates
|
||||||
|
* and frees memory on each call.
|
||||||
|
*
|
||||||
|
* MT-unsafe.
|
||||||
|
*/
|
||||||
|
extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen):
|
||||||
|
* Generate a setting string for use with yescrypt_r() and yescrypt() by
|
||||||
|
* encoding into it the parameters N_log2 (which is to be set to base 2
|
||||||
|
* logarithm of the desired value for N), r, p, flags, and a salt given by src
|
||||||
|
* (of srclen bytes). buf must be large enough (as indicated by buflen) to
|
||||||
|
* hold the setting string.
|
||||||
|
*
|
||||||
|
* Return the setting string on success; or NULL on error.
|
||||||
|
*
|
||||||
|
* MT-safe as long as buf is local to the thread.
|
||||||
|
*/
|
||||||
|
extern uint8_t * yescrypt_gensalt_r(
|
||||||
|
uint32_t __N_log2, uint32_t __r, uint32_t __p,
|
||||||
|
yescrypt_flags_t __flags,
|
||||||
|
const uint8_t * __src, size_t __srclen,
|
||||||
|
uint8_t * __buf, size_t __buflen);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* yescrypt_gensalt(N_log2, r, p, flags, src, srclen):
|
||||||
|
* Generate a setting string for use with yescrypt_r() and yescrypt(). This
|
||||||
|
* function is the same as yescrypt_gensalt_r() except that it uses a static
|
||||||
|
* buffer and thus is not MT-safe.
|
||||||
|
*
|
||||||
|
* Return the setting string on success; or NULL on error.
|
||||||
|
*
|
||||||
|
* MT-unsafe.
|
||||||
|
*/
|
||||||
|
extern uint8_t * yescrypt_gensalt(
|
||||||
|
uint32_t __N_log2, uint32_t __r, uint32_t __p,
|
||||||
|
yescrypt_flags_t __flags,
|
||||||
|
const uint8_t * __src, size_t __srclen);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* !_YESCRYPT_H_ */
|
360
algorithm/yescryptcommon.c
Normal file
360
algorithm/yescryptcommon.c
Normal file
@ -0,0 +1,360 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2013,2014 Alexander Peslyak
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "algorithm/yescrypt_core.h"
|
||||||
|
|
||||||
|
#define BYTES2CHARS(bytes) \
|
||||||
|
((((bytes) * 8) + 5) / 6)
|
||||||
|
|
||||||
|
#define HASH_SIZE 32 /* bytes */
|
||||||
|
#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */
|
||||||
|
#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM)
|
||||||
|
static const char * const itoa64 =
|
||||||
|
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||||
|
|
||||||
|
static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen,
|
||||||
|
uint32_t src, uint32_t srcbits)
|
||||||
|
{
|
||||||
|
uint32_t bit;
|
||||||
|
|
||||||
|
for (bit = 0; bit < srcbits; bit += 6) {
|
||||||
|
if (dstlen < 1)
|
||||||
|
return NULL;
|
||||||
|
*dst++ = itoa64[src & 0x3f];
|
||||||
|
dstlen--;
|
||||||
|
src >>= 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint8_t * encode64(uint8_t * dst, size_t dstlen,
|
||||||
|
const uint8_t * src, size_t srclen)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < srclen; ) {
|
||||||
|
uint8_t * dnext;
|
||||||
|
uint32_t value = 0, bits = 0;
|
||||||
|
do {
|
||||||
|
value |= (uint32_t)src[i++] << bits;
|
||||||
|
bits += 8;
|
||||||
|
} while (bits < 24 && i < srclen);
|
||||||
|
dnext = encode64_uint32(dst, dstlen, value, bits);
|
||||||
|
if (!dnext)
|
||||||
|
return NULL;
|
||||||
|
dstlen -= dnext - dst;
|
||||||
|
dst = dnext;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int decode64_one(uint32_t * dst, uint8_t src)
|
||||||
|
{
|
||||||
|
const char * ptr = strchr(itoa64, src);
|
||||||
|
if (ptr) {
|
||||||
|
*dst = ptr - itoa64;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*dst = 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits,
|
||||||
|
const uint8_t * src)
|
||||||
|
{
|
||||||
|
uint32_t bit;
|
||||||
|
uint32_t value;
|
||||||
|
|
||||||
|
value = 0;
|
||||||
|
for (bit = 0; bit < dstbits; bit += 6) {
|
||||||
|
uint32_t one;
|
||||||
|
if (decode64_one(&one, *src)) {
|
||||||
|
*dst = 0;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
src++;
|
||||||
|
value |= one << bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
*dst = value;
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *
|
||||||
|
yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
||||||
|
const uint8_t * passwd, size_t passwdlen,
|
||||||
|
const uint8_t * setting,
|
||||||
|
uint8_t * buf, size_t buflen)
|
||||||
|
{
|
||||||
|
uint8_t hash[HASH_SIZE];
|
||||||
|
const uint8_t * src, * salt;
|
||||||
|
uint8_t * dst;
|
||||||
|
size_t prefixlen, saltlen, need;
|
||||||
|
uint8_t version;
|
||||||
|
uint64_t N;
|
||||||
|
uint32_t r, p;
|
||||||
|
yescrypt_flags_t flags = YESCRYPT_WORM;
|
||||||
|
fflush(stdout);
|
||||||
|
if (setting[0] != '$' || setting[1] != '7')
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
src = setting + 2;
|
||||||
|
fflush(stdout);
|
||||||
|
switch ((version = *src)) {
|
||||||
|
case '$':
|
||||||
|
fflush(stdout);
|
||||||
|
break;
|
||||||
|
case 'X':
|
||||||
|
src++;
|
||||||
|
flags = YESCRYPT_RW;
|
||||||
|
fflush(stdout);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(stdout);
|
||||||
|
if (*src != '$') {
|
||||||
|
uint32_t decoded_flags;
|
||||||
|
if (decode64_one(&decoded_flags, *src))
|
||||||
|
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
flags = decoded_flags;
|
||||||
|
if (*++src != '$')
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
src++;
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t N_log2;
|
||||||
|
if (decode64_one(&N_log2, *src))
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
src++;
|
||||||
|
N = (uint64_t)1 << N_log2;
|
||||||
|
}
|
||||||
|
|
||||||
|
src = decode64_uint32(&r, 30, src);
|
||||||
|
if (!src)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
src = decode64_uint32(&p, 30, src);
|
||||||
|
if (!src)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
prefixlen = src - setting;
|
||||||
|
|
||||||
|
salt = src;
|
||||||
|
src = (uint8_t *)strrchr((char *)salt, '$');
|
||||||
|
if (src)
|
||||||
|
saltlen = src - salt;
|
||||||
|
else
|
||||||
|
saltlen = strlen((char *)salt);
|
||||||
|
|
||||||
|
need = prefixlen + saltlen + 1 + HASH_LEN + 1;
|
||||||
|
if (need > buflen || need < saltlen)
|
||||||
|
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(stdout);
|
||||||
|
if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
|
||||||
|
N, r, p, 0, flags, hash, sizeof(hash)))
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst = buf;
|
||||||
|
memcpy(dst, setting, prefixlen + saltlen);
|
||||||
|
dst += prefixlen + saltlen;
|
||||||
|
*dst++ = '$';
|
||||||
|
|
||||||
|
dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash));
|
||||||
|
/* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its
|
||||||
|
* memory allocations yet anyway. */
|
||||||
|
if (!dst || dst >= buf + buflen) /* Can't happen */
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
*dst = 0; /* NUL termination */
|
||||||
|
fflush(stdout);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *
|
||||||
|
yescrypt(const uint8_t * passwd, const uint8_t * setting)
|
||||||
|
{
|
||||||
|
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1];
|
||||||
|
yescrypt_shared_t shared;
|
||||||
|
yescrypt_local_t local;
|
||||||
|
uint8_t * retval;
|
||||||
|
if (yescrypt_init_shared(&shared, NULL, 0,
|
||||||
|
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
|
||||||
|
return NULL;
|
||||||
|
if (yescrypt_init_local(&local)) {
|
||||||
|
yescrypt_free_shared(&shared);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
retval = yescrypt_r(&shared, &local,
|
||||||
|
passwd, 80, setting, buf, sizeof(buf));
|
||||||
|
// printf("hashse='%s'\n", (char *)retval);
|
||||||
|
if (yescrypt_free_local(&local)) {
|
||||||
|
yescrypt_free_shared(&shared);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (yescrypt_free_shared(&shared))
|
||||||
|
return NULL;
|
||||||
|
return retval;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *
|
||||||
|
yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p,
|
||||||
|
yescrypt_flags_t flags,
|
||||||
|
const uint8_t * src, size_t srclen,
|
||||||
|
uint8_t * buf, size_t buflen)
|
||||||
|
{
|
||||||
|
uint8_t * dst;
|
||||||
|
size_t prefixlen = 3 + 1 + 5 + 5;
|
||||||
|
size_t saltlen = BYTES2CHARS(srclen);
|
||||||
|
size_t need;
|
||||||
|
|
||||||
|
if (p == 1)
|
||||||
|
flags &= ~YESCRYPT_PARALLEL_SMIX;
|
||||||
|
|
||||||
|
if (flags) {
|
||||||
|
if (flags & ~0x3f)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
prefixlen++;
|
||||||
|
if (flags != YESCRYPT_RW)
|
||||||
|
prefixlen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
need = prefixlen + saltlen + 1;
|
||||||
|
if (need > buflen || need < saltlen || saltlen < srclen)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30)))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
dst = buf;
|
||||||
|
*dst++ = '$';
|
||||||
|
*dst++ = '7';
|
||||||
|
if (flags) {
|
||||||
|
*dst++ = 'X'; /* eXperimental, subject to change */
|
||||||
|
if (flags != YESCRYPT_RW)
|
||||||
|
*dst++ = itoa64[flags];
|
||||||
|
}
|
||||||
|
*dst++ = '$';
|
||||||
|
|
||||||
|
*dst++ = itoa64[N_log2];
|
||||||
|
|
||||||
|
dst = encode64_uint32(dst, buflen - (dst - buf), r, 30);
|
||||||
|
if (!dst) /* Can't happen */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
dst = encode64_uint32(dst, buflen - (dst - buf), p, 30);
|
||||||
|
if (!dst) /* Can't happen */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
dst = encode64(dst, buflen - (dst - buf), src, srclen);
|
||||||
|
if (!dst || dst >= buf + buflen) /* Can't happen */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
*dst = 0; /* NUL termination */
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *
|
||||||
|
yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p,
|
||||||
|
yescrypt_flags_t flags,
|
||||||
|
const uint8_t * src, size_t srclen)
|
||||||
|
{
|
||||||
|
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1];
|
||||||
|
return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen,
|
||||||
|
buf, sizeof(buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
|
||||||
|
const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
|
||||||
|
uint8_t * buf, size_t buflen)
|
||||||
|
{
|
||||||
|
static __thread int initialized = 0;
|
||||||
|
static __thread yescrypt_shared_t shared;
|
||||||
|
static __thread yescrypt_local_t local;
|
||||||
|
|
||||||
|
// static __declspec(thread) int initialized = 0;
|
||||||
|
// static __declspec(thread) yescrypt_shared_t shared;
|
||||||
|
// static __declspec(thread) yescrypt_local_t local;
|
||||||
|
|
||||||
|
int retval;
|
||||||
|
if (!initialized) {
|
||||||
|
/* "shared" could in fact be shared, but it's simpler to keep it private
|
||||||
|
* along with "local". It's dummy and tiny anyway. */
|
||||||
|
if (yescrypt_init_shared(&shared, NULL, 0,
|
||||||
|
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
|
||||||
|
return -1;
|
||||||
|
if (yescrypt_init_local(&local)) {
|
||||||
|
yescrypt_free_shared(&shared);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
initialized = 1;
|
||||||
|
}
|
||||||
|
retval = yescrypt_kdf(&shared, &local,
|
||||||
|
passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS,
|
||||||
|
buf, buflen);
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void yescrypt_hash(const unsigned char *input, unsigned char *output)
|
||||||
|
{
|
||||||
|
|
||||||
|
yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32);
|
||||||
|
}
|
13
example.bat
13
example.bat
@ -1,9 +1,12 @@
|
|||||||
setx GPU_FORCE_64BIT_PTR 0
|
rem setx GPU_MAX_HEAP_SIZE 100
|
||||||
setx GPU_MAX_HEAP_SIZE 100
|
rem setx GPU_USE_SYNC_OBJECTS 1
|
||||||
setx GPU_USE_SYNC_OBJECTS 1
|
rem setx GPU_MAX_ALLOC_PERCENT 100
|
||||||
setx GPU_MAX_ALLOC_PERCENT 100
|
|
||||||
del *.bin
|
del *.bin
|
||||||
|
|
||||||
sgminer.exe --no-submit-stale --kernel Lyra2RE -o stratum+tcp://92.27.201.170:9174 -u m -p 1 --gpu-platform 2 -I 19 --shaders 2816 -w 64 -g 2
|
@rem sgminer.exe --no-submit-stale --kernel Lyra2RE -o stratum+tcp://pool.verters.com:4444 -u djm34t.user -p password --gpu-platform 2
|
||||||
|
@rem sgminer.exe --no-submit-stale --kernel pluck -o stratum+tcp://sup.suprnova.cc:7777 -u djm34.2 -p password --gpu-platform 2 --thread-concurrency 8192 -w 4 -I 12
|
||||||
|
@rem sgminer.exe --no-submit-stale --kernel yescrypt -o stratum+tcp://mine2.bsty.nonce-pool.com:4095 -u djm34.1 -p password --gpu-platform 1 -w 32 --thread-concurrency 512 --text-only --debug
|
||||||
|
@rem sgminer.exe --no-submit-stale --kernel yescrypt -o stratum+tcp://mine2.bsty.nonce-pool.com:4095 -u djm34.1 -p password --gpu-platform 1 -w 32 --thread-concurrency 512 --text-only -D
|
||||||
|
|
||||||
|
sgminer.exe --no-submit-stale --kernel yescrypt -o stratum+tcp://mine2.bsty.nonce-pool.com:4095 -u djm34.1 -p password --gpu-platform 0 -w 16 -g 2
|
||||||
pause
|
pause
|
271
kernel/yescrypt.cl
Normal file
271
kernel/yescrypt.cl
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
/*
|
||||||
|
* "yescrypt" kernel implementation.
|
||||||
|
*
|
||||||
|
* ==========================(LICENSE BEGIN)============================
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015 djm34
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ===========================(LICENSE END)=============================
|
||||||
|
*
|
||||||
|
* @author djm34
|
||||||
|
*/
|
||||||
|
#if !defined(cl_khr_byte_addressable_store)
|
||||||
|
#error "Device does not support unaligned stores"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "yescrypt_essential.cl"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
|
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff2, const uint target)
|
||||||
|
{
|
||||||
|
|
||||||
|
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048*128 *sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
__global ulong16 *prevstate = (__global ulong16 *)(buff2 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint nonce = (get_global_id(0));
|
||||||
|
uint data[20];
|
||||||
|
uint16 in;
|
||||||
|
uint8 state1, state2;
|
||||||
|
uint8 sha256tokeep;
|
||||||
|
|
||||||
|
ulong16 Bdev[8]; // will require an additional buffer
|
||||||
|
((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
|
||||||
|
((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
|
||||||
|
for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
|
||||||
|
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); }
|
||||||
|
uint8 passwd = sha256_80(data, nonce);
|
||||||
|
//pbkdf
|
||||||
|
in.lo = pad1.lo ^ passwd;
|
||||||
|
in.hi = pad1.hi;
|
||||||
|
state1 = sha256_Transform(in, H256);
|
||||||
|
|
||||||
|
in.lo = pad2.lo ^ passwd;
|
||||||
|
in.hi = pad2.hi;
|
||||||
|
state2 = sha256_Transform(in, H256);
|
||||||
|
|
||||||
|
in = ((uint16*)data)[0];
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++)
|
||||||
|
{
|
||||||
|
uint16 result;
|
||||||
|
in = pad3;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
in.s4 = 4*i+1;
|
||||||
|
in.lo = sha256_Transform(in, state1);
|
||||||
|
in.hi = pad4;
|
||||||
|
result.lo = swapvec(sha256_Transform(in, state2));
|
||||||
|
if (i == 0) sha256tokeep = result.lo;
|
||||||
|
in = pad3;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
in.s4 = 4 * i + 2;
|
||||||
|
in.lo = sha256_Transform(in, state1);
|
||||||
|
in.hi = pad4;
|
||||||
|
result.hi = swapvec(sha256_Transform(in, state2));
|
||||||
|
Bdev[i].lo = as_ulong8(shuffle(result));
|
||||||
|
in = pad3;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
in.s4 = 4 * i + 3;
|
||||||
|
in.lo = sha256_Transform(in, state1);
|
||||||
|
in.hi = pad4;
|
||||||
|
result.lo = swapvec(sha256_Transform(in, state2));
|
||||||
|
in = pad3;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
in.s4 = 4 * i + 4;
|
||||||
|
in.lo = sha256_Transform(in, state1);
|
||||||
|
in.hi = pad4;
|
||||||
|
result.hi = swapvec(sha256_Transform(in, state2));
|
||||||
|
|
||||||
|
|
||||||
|
Bdev[i].hi = as_ulong8(shuffle(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
//mixing1
|
||||||
|
|
||||||
|
prevstate[0] = Bdev[0];
|
||||||
|
Bdev[0]=blockmix_salsa8_small2(Bdev[0]);
|
||||||
|
prevstate[1] = Bdev[0];
|
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint n = 1;
|
||||||
|
#pragma unroll 1
|
||||||
|
for (uint i = 2; i < 64; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
prevstate[i] = Bdev[0];
|
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1;
|
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1);
|
||||||
|
|
||||||
|
j += i - n;
|
||||||
|
Bdev[0] ^= prevstate[j];
|
||||||
|
|
||||||
|
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
|
||||||
|
}
|
||||||
|
//mixing1_2
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++)
|
||||||
|
hashbuffer[i] = Bdev[i];
|
||||||
|
|
||||||
|
blockmix_pwxform((ulong8*)Bdev,prevstate);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++)
|
||||||
|
hashbuffer[i + 8] = Bdev[i];
|
||||||
|
|
||||||
|
blockmix_pwxform((ulong8*)Bdev,prevstate);
|
||||||
|
n = 1;
|
||||||
|
#pragma unroll 1
|
||||||
|
for (int i = 2; i < 2048; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k<8; k++)
|
||||||
|
(hashbuffer + 8 * i)[k] = Bdev[k];
|
||||||
|
|
||||||
|
|
||||||
|
if ((i&(i - 1)) == 0) n = n << 1;
|
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1);
|
||||||
|
j += i - n;
|
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++)
|
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k];
|
||||||
|
|
||||||
|
|
||||||
|
blockmix_pwxform( (ulong8*)Bdev,prevstate);
|
||||||
|
|
||||||
|
}
|
||||||
|
/////////////////////////
|
||||||
|
|
||||||
|
////mix2_2
|
||||||
|
|
||||||
|
|
||||||
|
//#pragma unroll
|
||||||
|
#pragma unroll 1
|
||||||
|
for (int z = 0; z < 684; z++)
|
||||||
|
{
|
||||||
|
|
||||||
|
uint j = as_uint2(Bdev[7].hi.s0).x & 2047;
|
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k < 8; k++)
|
||||||
|
Bdev[k] ^= (hashbuffer + 8 * j)[k];
|
||||||
|
|
||||||
|
if (z<682)
|
||||||
|
for (int k = 0; k<8; k++)
|
||||||
|
(hashbuffer+8 * j)[k] = Bdev[k];
|
||||||
|
|
||||||
|
blockmix_pwxform((ulong8*)Bdev,prevstate);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) {
|
||||||
|
Bdev[i].lo = as_ulong8(unshuffle(Bdev[i].lo));
|
||||||
|
Bdev[i].hi = as_ulong8(unshuffle(Bdev[i].hi));
|
||||||
|
}
|
||||||
|
/////////////////////////////////////
|
||||||
|
///////// pbkdf final
|
||||||
|
|
||||||
|
|
||||||
|
uint8 swpass = swapvec(sha256tokeep);
|
||||||
|
in.lo = pad1.lo ^ swpass;
|
||||||
|
in.hi = pad1.hi;
|
||||||
|
|
||||||
|
state1 = sha256_Transform(in, H256);
|
||||||
|
|
||||||
|
in.lo = pad2.lo ^ swpass;
|
||||||
|
in.hi = pad2.hi;
|
||||||
|
state2 = sha256_Transform(in, H256);
|
||||||
|
|
||||||
|
for (int i = 0; i<8; i++) {
|
||||||
|
in = as_uint16(Bdev[i].lo);
|
||||||
|
in = swapvec16(in);
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
in = as_uint16(Bdev[i].hi);
|
||||||
|
in = swapvec16(in);
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
}
|
||||||
|
in = pad5;
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
in.lo = state1;
|
||||||
|
in.hi = pad4;
|
||||||
|
uint8 res = sha256_Transform(in, state2);
|
||||||
|
|
||||||
|
//hmac and final sha
|
||||||
|
|
||||||
|
in.lo = pad1.lo ^ res;
|
||||||
|
in.hi = pad1.hi;
|
||||||
|
state1 = sha256_Transform(in, H256);
|
||||||
|
in.lo = pad2.lo ^ res;
|
||||||
|
in.hi = pad2.hi;
|
||||||
|
state2 = sha256_Transform(in, H256);
|
||||||
|
in = ((uint16*)data)[0];
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
in = padsha80;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
in.sf = 0x480;
|
||||||
|
state1 = sha256_Transform(in, state1);
|
||||||
|
in.lo = state1;
|
||||||
|
in.hi = pad4;
|
||||||
|
state1 = sha256_Transform(in, state2);
|
||||||
|
// state2 = H256;
|
||||||
|
in.lo = state1;
|
||||||
|
in.hi = pad4;
|
||||||
|
in.sf = 0x100;
|
||||||
|
res = sha256_Transform(in, H256);
|
||||||
|
// return(swapvec(res));
|
||||||
|
|
||||||
|
|
||||||
|
// if (nonce == 10) { }
|
||||||
|
|
||||||
|
if ( SWAP32(res.s7) <= (target)) {
|
||||||
|
output[atomic_inc(output + 0xFF)] = (nonce);
|
||||||
|
//printf("gpu hashbuffer %08x nonce %08x\n",((__global uint *)hashbuffer)[7] ,SWAP32(get_global_id(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
746
kernel/yescrypt_essential.cl
Normal file
746
kernel/yescrypt_essential.cl
Normal file
@ -0,0 +1,746 @@
|
|||||||
|
/*
|
||||||
|
* "yescrypt" kernel implementation.
|
||||||
|
*
|
||||||
|
* ==========================(LICENSE BEGIN)============================
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015 djm34
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* ===========================(LICENSE END)=============================
|
||||||
|
*
|
||||||
|
* @author djm34
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define ROL32(x, n) rotate(x, (uint) n)
|
||||||
|
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))
|
||||||
|
//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
|
||||||
|
#define HASH_MEMORY 4096
|
||||||
|
|
||||||
|
|
||||||
|
#define SALSA(a,b,c,d) do { \
|
||||||
|
t =a+d; b^=ROL32(t, 7U); \
|
||||||
|
t =b+a; c^=ROL32(t, 9U); \
|
||||||
|
t =c+b; d^=ROL32(t, 13U); \
|
||||||
|
t =d+c; a^=ROL32(t, 18U); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
#define SALSA_CORE(state) do { \
|
||||||
|
\
|
||||||
|
SALSA(state.s0,state.s4,state.s8,state.sc); \
|
||||||
|
SALSA(state.s5,state.s9,state.sd,state.s1); \
|
||||||
|
SALSA(state.sa,state.se,state.s2,state.s6); \
|
||||||
|
SALSA(state.sf,state.s3,state.s7,state.sb); \
|
||||||
|
SALSA(state.s0,state.s1,state.s2,state.s3); \
|
||||||
|
SALSA(state.s5,state.s6,state.s7,state.s4); \
|
||||||
|
SALSA(state.sa,state.sb,state.s8,state.s9); \
|
||||||
|
SALSA(state.sf,state.sc,state.sd,state.se); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define uSALSA_CORE(state) do { \
|
||||||
|
\
|
||||||
|
SALSA(state.s0,state.s4,state.s8,state.sc); \
|
||||||
|
SALSA(state.s1,state.s5,state.s9,state.sd); \
|
||||||
|
SALSA(state.s2,state.s6,state.sa,state.se); \
|
||||||
|
SALSA(state.s3,state.s7,state.sb,state.sf); \
|
||||||
|
SALSA(state.s0,state.sd,state.sa,state.s7); \
|
||||||
|
SALSA(state.s1,state.se,state.sb,state.s4); \
|
||||||
|
SALSA(state.s2,state.sf,state.s8,state.s5); \
|
||||||
|
SALSA(state.s3,state.sc,state.s9,state.s6); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
#define unshuffle(state) (as_uint16(state).s0da741eb852fc963)
|
||||||
|
|
||||||
|
#define shuffle(state) (as_uint16(state).s05af49e38d27c16b)
|
||||||
|
|
||||||
|
static __constant uint16 pad1 =
|
||||||
|
{
|
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636,
|
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636,
|
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636,
|
||||||
|
0x36363636, 0x36363636, 0x36363636, 0x36363636
|
||||||
|
};
|
||||||
|
|
||||||
|
static __constant uint16 pad2 =
|
||||||
|
{
|
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
|
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
|
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
|
||||||
|
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c
|
||||||
|
};
|
||||||
|
|
||||||
|
static __constant uint16 pad5 =
|
||||||
|
{
|
||||||
|
0x00000001, 0x80000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00002220
|
||||||
|
};
|
||||||
|
|
||||||
|
static __constant uint16 pad3 =
|
||||||
|
{
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x80000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x000004a0
|
||||||
|
};
|
||||||
|
|
||||||
|
static __constant uint16 padsha80 =
|
||||||
|
{
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x80000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000280
|
||||||
|
};
|
||||||
|
|
||||||
|
static __constant uint8 pad4 =
|
||||||
|
{
|
||||||
|
0x80000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000300
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static __constant uint8 H256 = {
|
||||||
|
0x6A09E667, 0xBB67AE85, 0x3C6EF372,
|
||||||
|
0xA54FF53A, 0x510E527F, 0x9B05688C,
|
||||||
|
0x1F83D9AB, 0x5BE0CD19
|
||||||
|
};
|
||||||
|
|
||||||
|
inline uint8 swapvec(uint8 buf)
|
||||||
|
{
|
||||||
|
uint8 vec;
|
||||||
|
vec.s0 = SWAP32(buf.s0);
|
||||||
|
vec.s1 = SWAP32(buf.s1);
|
||||||
|
vec.s2 = SWAP32(buf.s2);
|
||||||
|
vec.s3 = SWAP32(buf.s3);
|
||||||
|
vec.s4 = SWAP32(buf.s4);
|
||||||
|
vec.s5 = SWAP32(buf.s5);
|
||||||
|
vec.s6 = SWAP32(buf.s6);
|
||||||
|
vec.s7 = SWAP32(buf.s7);
|
||||||
|
return vec;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint16 swapvec16(uint16 buf)
|
||||||
|
{
|
||||||
|
uint16 vec;
|
||||||
|
vec.s0 = SWAP32(buf.s0);
|
||||||
|
vec.s1 = SWAP32(buf.s1);
|
||||||
|
vec.s2 = SWAP32(buf.s2);
|
||||||
|
vec.s3 = SWAP32(buf.s3);
|
||||||
|
vec.s4 = SWAP32(buf.s4);
|
||||||
|
vec.s5 = SWAP32(buf.s5);
|
||||||
|
vec.s6 = SWAP32(buf.s6);
|
||||||
|
vec.s7 = SWAP32(buf.s7);
|
||||||
|
vec.s8 = SWAP32(buf.s8);
|
||||||
|
vec.s9 = SWAP32(buf.s9);
|
||||||
|
vec.sa = SWAP32(buf.sa);
|
||||||
|
vec.sb = SWAP32(buf.sb);
|
||||||
|
vec.sc = SWAP32(buf.sc);
|
||||||
|
vec.sd = SWAP32(buf.sd);
|
||||||
|
vec.se = SWAP32(buf.se);
|
||||||
|
vec.sf = SWAP32(buf.sf);
|
||||||
|
return vec;
|
||||||
|
}
|
||||||
|
|
||||||
|
ulong8 salsa20_8(uint16 Bx)
|
||||||
|
{
|
||||||
|
uint t;
|
||||||
|
uint16 st = Bx;
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
return(as_ulong8(st + Bx));
|
||||||
|
}
|
||||||
|
|
||||||
|
ulong16 blockmix_salsa8_small2(ulong16 Bin)
|
||||||
|
{
|
||||||
|
ulong8 X = Bin.hi;
|
||||||
|
X ^= Bin.lo;
|
||||||
|
X = salsa20_8(as_uint16(X));
|
||||||
|
Bin.lo = X;
|
||||||
|
X ^= Bin.hi;
|
||||||
|
X = salsa20_8(as_uint16(X));
|
||||||
|
Bin.hi = X;
|
||||||
|
return(Bin);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
uint16 salsa20_8_2(uint16 Bx)
|
||||||
|
{
|
||||||
|
uint t;
|
||||||
|
uint16 st = Bx;
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
uSALSA_CORE(st);
|
||||||
|
return(st + Bx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ulong16 blockmix_salsa8_small2(ulong16 Bin)
|
||||||
|
{
|
||||||
|
uint16 X = as_uint16(Bin.hi);
|
||||||
|
X ^= as_uint16(Bin.lo);
|
||||||
|
X = salsa20_8_2(as_uint16(X));
|
||||||
|
Bin.lo = as_ulong8(X);
|
||||||
|
X ^= as_uint16(Bin.hi);
|
||||||
|
X = salsa20_8_2(as_uint16(X));
|
||||||
|
Bin.hi = as_ulong8(X);
|
||||||
|
return(Bin);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
inline ulong2 madd4long2(uint4 a, uint4 b)
|
||||||
|
{
|
||||||
|
uint4 result;
|
||||||
|
result.x = a.x*a.y + b.x;
|
||||||
|
result.y = b.y + mad_hi(a.x, a.y, b.x);
|
||||||
|
result.z = a.z*a.w + b.z;
|
||||||
|
result.w = b.w + mad_hi(a.z, a.w, b.z);
|
||||||
|
return as_ulong2(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 madd4long3(uint4 a, ulong2 b)
|
||||||
|
{
|
||||||
|
ulong2 result;
|
||||||
|
result.x = (ulong)a.x*(ulong)a.y + b.x;
|
||||||
|
result.y = (ulong)a.z*(ulong)a.w + b.y;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong2 vec = Bout.lo.lo;
|
||||||
|
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = ((__global ulong2*)(prevstate ))[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.lo.lo = vec;
|
||||||
|
vec = Bout.lo.hi;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.lo.hi = vec;
|
||||||
|
|
||||||
|
vec = Bout.hi.lo;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.hi.lo = vec;
|
||||||
|
vec = Bout.hi.hi;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = ((__global ulong2*)(prevstate))[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.hi.hi = vec;
|
||||||
|
|
||||||
|
return(Bout);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong2 vec = Bout.lo.lo;
|
||||||
|
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = prevstate[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = (prevstate + 32*8)[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.lo.lo = vec;
|
||||||
|
vec = Bout.lo.hi;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = prevstate[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = (prevstate + 32 * 8)[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.lo.hi = vec;
|
||||||
|
|
||||||
|
vec = Bout.hi.lo;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = prevstate[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = (prevstate + 32 * 8)[x.y];
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.hi.lo = vec;
|
||||||
|
vec = Bout.hi.hi;
|
||||||
|
for (int i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
ulong2 p0, p1;
|
||||||
|
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
|
||||||
|
p0 = prevstate[x.x];
|
||||||
|
vec = madd4long3(as_uint4(vec), p0);
|
||||||
|
p1 = (prevstate + 32 * 8)[x.y];
|
||||||
|
|
||||||
|
vec ^= p1;
|
||||||
|
}
|
||||||
|
Bout.hi.hi = vec;
|
||||||
|
|
||||||
|
return(Bout);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline void blockmix_pwxform( ulong8 *Bin, __global ulong16 *prevstate)
|
||||||
|
{
|
||||||
|
Bin[0] ^= Bin[15];
|
||||||
|
Bin[0] = block_pwxform_long_old(Bin[0], prevstate);
|
||||||
|
#pragma unroll 1
|
||||||
|
for (int i = 1; i < 16; i++)
|
||||||
|
{
|
||||||
|
Bin[i] ^= Bin[i - 1];
|
||||||
|
Bin[i] = block_pwxform_long_old(Bin[i], prevstate);
|
||||||
|
}
|
||||||
|
Bin[15] = salsa20_8(as_uint16(Bin[15]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SHR(x, n) ((x) >> n)
|
||||||
|
|
||||||
|
|
||||||
|
#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3))
|
||||||
|
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10))
|
||||||
|
|
||||||
|
#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
|
||||||
|
#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
|
||||||
|
|
||||||
|
#define P(a,b,c,d,e,f,g,h,x,K) \
|
||||||
|
{ \
|
||||||
|
temp1 = h + S3(e) + F1(e,f,g) + (K + x); \
|
||||||
|
d += temp1; h = temp1 + S2(a) + F0(a,b,c); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PLAST(a,b,c,d,e,f,g,h,x,K) \
|
||||||
|
{ \
|
||||||
|
d += h + S3(e) + F1(e,f,g) + (x + K); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define F0(y, x, z) bitselect(z, y, z ^ x)
|
||||||
|
#define F1(x, y, z) bitselect(z, y, x)
|
||||||
|
|
||||||
|
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
|
||||||
|
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
|
||||||
|
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
|
||||||
|
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
|
||||||
|
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
|
||||||
|
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
|
||||||
|
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
|
||||||
|
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
|
||||||
|
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
|
||||||
|
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
|
||||||
|
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
|
||||||
|
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
|
||||||
|
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
|
||||||
|
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
|
||||||
|
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
|
||||||
|
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
|
||||||
|
|
||||||
|
#define RD14 (S1(W12) + W7 + S0(W15) + W14)
|
||||||
|
#define RD15 (S1(W13) + W8 + S0(W0) + W15)
|
||||||
|
|
||||||
|
/// generic sha transform
|
||||||
|
inline uint8 sha256_Transform(uint16 data, uint8 state)
|
||||||
|
{
|
||||||
|
uint temp1;
|
||||||
|
uint8 res = state;
|
||||||
|
uint W0 = data.s0;
|
||||||
|
uint W1 = data.s1;
|
||||||
|
uint W2 = data.s2;
|
||||||
|
uint W3 = data.s3;
|
||||||
|
uint W4 = data.s4;
|
||||||
|
uint W5 = data.s5;
|
||||||
|
uint W6 = data.s6;
|
||||||
|
uint W7 = data.s7;
|
||||||
|
uint W8 = data.s8;
|
||||||
|
uint W9 = data.s9;
|
||||||
|
uint W10 = data.sA;
|
||||||
|
uint W11 = data.sB;
|
||||||
|
uint W12 = data.sC;
|
||||||
|
uint W13 = data.sD;
|
||||||
|
uint W14 = data.sE;
|
||||||
|
uint W15 = data.sF;
|
||||||
|
|
||||||
|
#define v0 res.s0
|
||||||
|
#define v1 res.s1
|
||||||
|
#define v2 res.s2
|
||||||
|
#define v3 res.s3
|
||||||
|
#define v4 res.s4
|
||||||
|
#define v5 res.s5
|
||||||
|
#define v6 res.s6
|
||||||
|
#define v7 res.s7
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
|
||||||
|
#undef v0
|
||||||
|
#undef v1
|
||||||
|
#undef v2
|
||||||
|
#undef v3
|
||||||
|
#undef v4
|
||||||
|
#undef v5
|
||||||
|
#undef v6
|
||||||
|
#undef v7
|
||||||
|
return (res+state);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint8 sha256_round1(uint16 data)
|
||||||
|
{
|
||||||
|
uint temp1;
|
||||||
|
uint8 res;
|
||||||
|
uint W0 = data.s0;
|
||||||
|
uint W1 = data.s1;
|
||||||
|
uint W2 = data.s2;
|
||||||
|
uint W3 = data.s3;
|
||||||
|
uint W4 = data.s4;
|
||||||
|
uint W5 = data.s5;
|
||||||
|
uint W6 = data.s6;
|
||||||
|
uint W7 = data.s7;
|
||||||
|
uint W8 = data.s8;
|
||||||
|
uint W9 = data.s9;
|
||||||
|
uint W10 = data.sA;
|
||||||
|
uint W11 = data.sB;
|
||||||
|
uint W12 = data.sC;
|
||||||
|
uint W13 = data.sD;
|
||||||
|
uint W14 = data.sE;
|
||||||
|
uint W15 = data.sF;
|
||||||
|
|
||||||
|
uint v0 = 0x6A09E667;
|
||||||
|
uint v1 = 0xBB67AE85;
|
||||||
|
uint v2 = 0x3C6EF372;
|
||||||
|
uint v3 = 0xA54FF53A;
|
||||||
|
uint v4 = 0x510E527F;
|
||||||
|
uint v5 = 0x9B05688C;
|
||||||
|
uint v6 = 0x1F83D9AB;
|
||||||
|
uint v7 = 0x5BE0CD19;
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
|
||||||
|
|
||||||
|
res.s0 = v0 + 0x6A09E667;
|
||||||
|
res.s1 = v1 + 0xBB67AE85;
|
||||||
|
res.s2 = v2 + 0x3C6EF372;
|
||||||
|
res.s3 = v3 + 0xA54FF53A;
|
||||||
|
res.s4 = v4 + 0x510E527F;
|
||||||
|
res.s5 = v5 + 0x9B05688C;
|
||||||
|
res.s6 = v6 + 0x1F83D9AB;
|
||||||
|
res.s7 = v7 + 0x5BE0CD19;
|
||||||
|
return (res);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint8 sha256_round2(uint16 data,uint8 buf)
|
||||||
|
{
|
||||||
|
uint temp1;
|
||||||
|
uint8 res;
|
||||||
|
uint W0 = data.s0;
|
||||||
|
uint W1 = data.s1;
|
||||||
|
uint W2 = data.s2;
|
||||||
|
uint W3 = data.s3;
|
||||||
|
uint W4 = data.s4;
|
||||||
|
uint W5 = data.s5;
|
||||||
|
uint W6 = data.s6;
|
||||||
|
uint W7 = data.s7;
|
||||||
|
uint W8 = data.s8;
|
||||||
|
uint W9 = data.s9;
|
||||||
|
uint W10 = data.sA;
|
||||||
|
uint W11 = data.sB;
|
||||||
|
uint W12 = data.sC;
|
||||||
|
uint W13 = data.sD;
|
||||||
|
uint W14 = data.sE;
|
||||||
|
uint W15 = data.sF;
|
||||||
|
|
||||||
|
uint v0 = buf.s0;
|
||||||
|
uint v1 = buf.s1;
|
||||||
|
uint v2 = buf.s2;
|
||||||
|
uint v3 = buf.s3;
|
||||||
|
uint v4 = buf.s4;
|
||||||
|
uint v5 = buf.s5;
|
||||||
|
uint v6 = buf.s6;
|
||||||
|
uint v7 = buf.s7;
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
|
||||||
|
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
|
||||||
|
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
|
||||||
|
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
|
||||||
|
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
|
||||||
|
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
|
||||||
|
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
|
||||||
|
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
|
||||||
|
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
|
||||||
|
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
|
||||||
|
|
||||||
|
res.s0 = (v0 + buf.s0);
|
||||||
|
res.s1 = (v1 + buf.s1);
|
||||||
|
res.s2 = (v2 + buf.s2);
|
||||||
|
res.s3 = (v3 + buf.s3);
|
||||||
|
res.s4 = (v4 + buf.s4);
|
||||||
|
res.s5 = (v5 + buf.s5);
|
||||||
|
res.s6 = (v6 + buf.s6);
|
||||||
|
res.s7 = (v7 + buf.s7);
|
||||||
|
return (res);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint8 sha256_80(uint* data,uint nonce)
|
||||||
|
{
|
||||||
|
|
||||||
|
uint8 buf = sha256_round1( ((uint16*)data)[0]);
|
||||||
|
uint16 in = padsha80;
|
||||||
|
in.s0 = data[16];
|
||||||
|
in.s1 = data[17];
|
||||||
|
in.s2 = data[18];
|
||||||
|
in.s3 = nonce;
|
||||||
|
|
||||||
|
return(sha256_round2(in,buf));
|
||||||
|
}
|
||||||
|
|
4
miner.h
4
miner.h
@ -1136,8 +1136,8 @@ extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user
|
|||||||
#define MAX_GPUDEVICES 16
|
#define MAX_GPUDEVICES 16
|
||||||
#define MAX_DEVICES 4096
|
#define MAX_DEVICES 4096
|
||||||
|
|
||||||
#define MIN_INTENSITY 8
|
#define MIN_INTENSITY 4
|
||||||
#define MIN_INTENSITY_STR "8"
|
#define MIN_INTENSITY_STR "4"
|
||||||
#define MAX_INTENSITY 31
|
#define MAX_INTENSITY 31
|
||||||
#define MAX_INTENSITY_STR "31"
|
#define MAX_INTENSITY_STR "31"
|
||||||
#define MIN_XINTENSITY 1
|
#define MIN_XINTENSITY 1
|
||||||
|
123
ocl.c
123
ocl.c
@ -36,6 +36,7 @@
|
|||||||
#include "ocl/binary_kernel.h"
|
#include "ocl/binary_kernel.h"
|
||||||
#include "algorithm/neoscrypt.h"
|
#include "algorithm/neoscrypt.h"
|
||||||
#include "algorithm/pluck.h"
|
#include "algorithm/pluck.h"
|
||||||
|
#include "algorithm/yescrypt.h"
|
||||||
|
|
||||||
/* FIXME: only here for global config vars, replace with configuration.h
|
/* FIXME: only here for global config vars, replace with configuration.h
|
||||||
* or similar as soon as config is in a struct instead of littered all
|
* or similar as soon as config is in a struct instead of littered all
|
||||||
@ -514,7 +515,90 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
cgpu->thread_concurrency = glob_thread_count;
|
cgpu->thread_concurrency = glob_thread_count;
|
||||||
|
|
||||||
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
||||||
|
}
|
||||||
|
else if (!safe_cmp(cgpu->algorithm.name, "yescrypt") && !cgpu->opt_tc) {
|
||||||
|
size_t glob_thread_count;
|
||||||
|
long max_int;
|
||||||
|
unsigned char type = 0;
|
||||||
|
|
||||||
|
// determine which intensity type to use
|
||||||
|
if (cgpu->rawintensity > 0) {
|
||||||
|
glob_thread_count = cgpu->rawintensity;
|
||||||
|
max_int = glob_thread_count;
|
||||||
|
type = 2;
|
||||||
|
}
|
||||||
|
else if (cgpu->xintensity > 0) {
|
||||||
|
glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
|
||||||
|
max_int = cgpu->xintensity;
|
||||||
|
type = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
|
||||||
|
max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
|
||||||
|
|
||||||
|
// if TC * scratchbuf size is too big for memory... reduce to max
|
||||||
|
if ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
|
||||||
|
|
||||||
|
/* Selected intensity will not run on this GPU. Not enough memory.
|
||||||
|
* Adapt the memory setting. */
|
||||||
|
// depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
|
||||||
|
switch (type) {
|
||||||
|
//raw intensity
|
||||||
|
case 2:
|
||||||
|
while ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
|
||||||
|
--glob_thread_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
max_int = glob_thread_count;
|
||||||
|
cgpu->rawintensity = glob_thread_count;
|
||||||
|
break;
|
||||||
|
|
||||||
|
//x intensity
|
||||||
|
case 1:
|
||||||
|
glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
|
||||||
|
max_int = glob_thread_count / clState->compute_shaders;
|
||||||
|
|
||||||
|
while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
|
||||||
|
--max_int;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if max_intensity is >0. */
|
||||||
|
if (max_int < MIN_XINTENSITY) {
|
||||||
|
applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
|
||||||
|
max_int = MIN_XINTENSITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
cgpu->xintensity = max_int;
|
||||||
|
glob_thread_count = clState->compute_shaders * (1UL << max_int);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
|
||||||
|
while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
|
||||||
|
--max_int;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if max_intensity is >0. */
|
||||||
|
if (max_int < MIN_INTENSITY) {
|
||||||
|
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
|
||||||
|
max_int = MIN_INTENSITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
cgpu->intensity = max_int;
|
||||||
|
glob_thread_count = 1UL << max_int;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TC is glob thread count
|
||||||
|
cgpu->thread_concurrency = glob_thread_count;
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
||||||
|
|
||||||
|
|
||||||
} else if (!cgpu->opt_tc) {
|
} else if (!cgpu->opt_tc) {
|
||||||
unsigned int sixtyfours;
|
unsigned int sixtyfours;
|
||||||
|
|
||||||
@ -611,7 +695,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
|
snprintf(kernel_name, 9, "%s%d", "search", i + 1);
|
||||||
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
|
clState->extra_kernels[i] = clCreateKernel(clState->program, kernel_name, &status);
|
||||||
if (status != CL_SUCCESS) {
|
if (status != CL_SUCCESS) {
|
||||||
applog(LOG_ERR, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
|
applog(LOG_DEBUG, "Error %d: Creating ExtraKernel #%d from program. (clCreateKernel)", status, i);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -633,6 +717,20 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
||||||
// scrypt/n-scrypt
|
// scrypt/n-scrypt
|
||||||
}
|
}
|
||||||
|
else if (!safe_cmp(algorithm->name, "yescrypt")) {
|
||||||
|
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
||||||
|
bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
|
||||||
|
|
||||||
|
/* This is the input buffer. For yescrypt this is guaranteed to be
|
||||||
|
* 80 bytes only. */
|
||||||
|
readbufsize = 80;
|
||||||
|
|
||||||
|
applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
||||||
|
// scrypt/n-scrypt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
else if (!safe_cmp(algorithm->name, "pluck")) {
|
else if (!safe_cmp(algorithm->name, "pluck")) {
|
||||||
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
||||||
bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
|
bufsize = PLUCK_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
|
||||||
@ -666,6 +764,29 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
|||||||
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
|
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (!safe_cmp(algorithm->name, "yescrypt")) {
|
||||||
|
// need additionnal buffers
|
||||||
|
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, PLUCK_SECBUF_SIZE * cgpu->thread_concurrency, NULL, &status);
|
||||||
|
if (status != CL_SUCCESS && !clState->buffer1) {
|
||||||
|
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
|
||||||
|
return NULL;}
|
||||||
|
|
||||||
|
clState->buffer2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, 128 * 8 * 8 * cgpu->thread_concurrency, NULL, &status);
|
||||||
|
if (status != CL_SUCCESS && !clState->buffer2) {
|
||||||
|
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer2), decrease TC or increase LG", status);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
clState->buffer3 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, 8 * 8 * 4 * cgpu->thread_concurrency, NULL, &status);
|
||||||
|
if (status != CL_SUCCESS && !clState->buffer3) {
|
||||||
|
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer3), decrease TC or increase LG", status);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* This buffer is weird and might work to some degree even if
|
/* This buffer is weird and might work to some degree even if
|
||||||
* the create buffer call has apparently failed, so check if we
|
* the create buffer call has apparently failed, so check if we
|
||||||
* get anything back before we call it a failure. */
|
* get anything back before we call it a failure. */
|
||||||
|
3
ocl.h
3
ocl.h
@ -22,6 +22,9 @@ typedef struct __clState {
|
|||||||
cl_mem outputBuffer;
|
cl_mem outputBuffer;
|
||||||
cl_mem CLbuffer0;
|
cl_mem CLbuffer0;
|
||||||
cl_mem padbuffer8;
|
cl_mem padbuffer8;
|
||||||
|
cl_mem buffer1;
|
||||||
|
cl_mem buffer2;
|
||||||
|
cl_mem buffer3;
|
||||||
unsigned char cldata[80];
|
unsigned char cldata[80];
|
||||||
bool hasBitAlign;
|
bool hasBitAlign;
|
||||||
bool goffset;
|
bool goffset;
|
||||||
|
@ -7081,7 +7081,8 @@ bool test_nonce(struct work *work, uint32_t nonce)
|
|||||||
rebuild_nonce(work, nonce);
|
rebuild_nonce(work, nonce);
|
||||||
|
|
||||||
// for Neoscrypt, the diff1targ value is in work->target
|
// for Neoscrypt, the diff1targ value is in work->target
|
||||||
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")) {
|
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt") || !safe_cmp(work->pool->algorithm.name, "pluck")
|
||||||
|
|| !safe_cmp(work->pool->algorithm.name, "yescrypt") ) {
|
||||||
diff1targ = ((uint32_t *)work->target)[7];
|
diff1targ = ((uint32_t *)work->target)[7];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -8725,7 +8726,8 @@ int main(int argc, char *argv[])
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Default algorithm specified in algorithm.c ATM */
|
/* Default algorithm specified in algorithm.c ATM */
|
||||||
set_algorithm(&default_profile.algorithm, "scrypt");
|
/* changed to x11 which won't cause crash*/
|
||||||
|
set_algorithm(&default_profile.algorithm, "x11");
|
||||||
|
|
||||||
devcursor = 8;
|
devcursor = 8;
|
||||||
logstart = devcursor + 1;
|
logstart = devcursor + 1;
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
noinst_LIBRARIES = libsph.a
|
noinst_LIBRARIES = libsph.a
|
||||||
|
|
||||||
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c
|
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c sha256_Y.c
|
||||||
|
418
sph/sha256_Y.c
Normal file
418
sph/sha256_Y.c
Normal file
@ -0,0 +1,418 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "algorithm/sysendian.h"
|
||||||
|
|
||||||
|
#include "sph/sha256_Y.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||||
|
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < len / 4; i++)
|
||||||
|
be32enc(dst + i * 4, src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decode a big-endian length len vector of (unsigned char) into a length
|
||||||
|
* len/4 vector of (uint32_t). Assumes len is a multiple of 4.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < len / 4; i++)
|
||||||
|
dst[i] = be32dec(src + i * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Elementary functions used by SHA256 */
|
||||||
|
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||||
|
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||||
|
#define SHR(x, n) (x >> n)
|
||||||
|
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||||
|
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||||
|
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||||
|
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||||
|
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||||
|
|
||||||
|
/* SHA256 round function */
|
||||||
|
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||||
|
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||||
|
t1 = S0(a) + Maj(a, b, c); \
|
||||||
|
d += t0; \
|
||||||
|
h = t0 + t1;
|
||||||
|
|
||||||
|
/* Adjusted round function for rotating state */
|
||||||
|
#define RNDr(S, W, i, k) \
|
||||||
|
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||||
|
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||||
|
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||||
|
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||||
|
W[i] + k)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SHA256 block compression function. The 256-bit state is transformed via
|
||||||
|
* the 512-bit input block to produce a new state.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
SHA256_Transform(uint32_t * state, const unsigned char block[64])
|
||||||
|
{
|
||||||
|
uint32_t W[64];
|
||||||
|
uint32_t S[8];
|
||||||
|
uint32_t t0, t1;
|
||||||
|
int i;
|
||||||
|
/* 1. Prepare message schedule W. */
|
||||||
|
be32dec_vect(W, block, 64);
|
||||||
|
|
||||||
|
for (i = 16; i < 64; i++)
|
||||||
|
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||||
|
|
||||||
|
/* 2. Initialize working variables. */
|
||||||
|
memcpy(S, state, 32);
|
||||||
|
|
||||||
|
/* 3. Mix. */
|
||||||
|
RNDr(S, W, 0, 0x428a2f98);
|
||||||
|
RNDr(S, W, 1, 0x71374491);
|
||||||
|
RNDr(S, W, 2, 0xb5c0fbcf);
|
||||||
|
RNDr(S, W, 3, 0xe9b5dba5);
|
||||||
|
RNDr(S, W, 4, 0x3956c25b);
|
||||||
|
RNDr(S, W, 5, 0x59f111f1);
|
||||||
|
RNDr(S, W, 6, 0x923f82a4);
|
||||||
|
RNDr(S, W, 7, 0xab1c5ed5);
|
||||||
|
RNDr(S, W, 8, 0xd807aa98);
|
||||||
|
RNDr(S, W, 9, 0x12835b01);
|
||||||
|
RNDr(S, W, 10, 0x243185be);
|
||||||
|
RNDr(S, W, 11, 0x550c7dc3);
|
||||||
|
RNDr(S, W, 12, 0x72be5d74);
|
||||||
|
RNDr(S, W, 13, 0x80deb1fe);
|
||||||
|
RNDr(S, W, 14, 0x9bdc06a7);
|
||||||
|
RNDr(S, W, 15, 0xc19bf174);
|
||||||
|
RNDr(S, W, 16, 0xe49b69c1);
|
||||||
|
RNDr(S, W, 17, 0xefbe4786);
|
||||||
|
RNDr(S, W, 18, 0x0fc19dc6);
|
||||||
|
RNDr(S, W, 19, 0x240ca1cc);
|
||||||
|
RNDr(S, W, 20, 0x2de92c6f);
|
||||||
|
RNDr(S, W, 21, 0x4a7484aa);
|
||||||
|
RNDr(S, W, 22, 0x5cb0a9dc);
|
||||||
|
RNDr(S, W, 23, 0x76f988da);
|
||||||
|
RNDr(S, W, 24, 0x983e5152);
|
||||||
|
RNDr(S, W, 25, 0xa831c66d);
|
||||||
|
RNDr(S, W, 26, 0xb00327c8);
|
||||||
|
RNDr(S, W, 27, 0xbf597fc7);
|
||||||
|
RNDr(S, W, 28, 0xc6e00bf3);
|
||||||
|
RNDr(S, W, 29, 0xd5a79147);
|
||||||
|
RNDr(S, W, 30, 0x06ca6351);
|
||||||
|
RNDr(S, W, 31, 0x14292967);
|
||||||
|
RNDr(S, W, 32, 0x27b70a85);
|
||||||
|
RNDr(S, W, 33, 0x2e1b2138);
|
||||||
|
RNDr(S, W, 34, 0x4d2c6dfc);
|
||||||
|
RNDr(S, W, 35, 0x53380d13);
|
||||||
|
RNDr(S, W, 36, 0x650a7354);
|
||||||
|
RNDr(S, W, 37, 0x766a0abb);
|
||||||
|
RNDr(S, W, 38, 0x81c2c92e);
|
||||||
|
RNDr(S, W, 39, 0x92722c85);
|
||||||
|
RNDr(S, W, 40, 0xa2bfe8a1);
|
||||||
|
RNDr(S, W, 41, 0xa81a664b);
|
||||||
|
RNDr(S, W, 42, 0xc24b8b70);
|
||||||
|
RNDr(S, W, 43, 0xc76c51a3);
|
||||||
|
RNDr(S, W, 44, 0xd192e819);
|
||||||
|
RNDr(S, W, 45, 0xd6990624);
|
||||||
|
RNDr(S, W, 46, 0xf40e3585);
|
||||||
|
RNDr(S, W, 47, 0x106aa070);
|
||||||
|
RNDr(S, W, 48, 0x19a4c116);
|
||||||
|
RNDr(S, W, 49, 0x1e376c08);
|
||||||
|
RNDr(S, W, 50, 0x2748774c);
|
||||||
|
RNDr(S, W, 51, 0x34b0bcb5);
|
||||||
|
RNDr(S, W, 52, 0x391c0cb3);
|
||||||
|
RNDr(S, W, 53, 0x4ed8aa4a);
|
||||||
|
RNDr(S, W, 54, 0x5b9cca4f);
|
||||||
|
RNDr(S, W, 55, 0x682e6ff3);
|
||||||
|
RNDr(S, W, 56, 0x748f82ee);
|
||||||
|
RNDr(S, W, 57, 0x78a5636f);
|
||||||
|
RNDr(S, W, 58, 0x84c87814);
|
||||||
|
RNDr(S, W, 59, 0x8cc70208);
|
||||||
|
RNDr(S, W, 60, 0x90befffa);
|
||||||
|
RNDr(S, W, 61, 0xa4506ceb);
|
||||||
|
RNDr(S, W, 62, 0xbef9a3f7);
|
||||||
|
RNDr(S, W, 63, 0xc67178f2);
|
||||||
|
|
||||||
|
/* 4. Mix local working variables into global state */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
state[i] += S[i];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean the stack. */
|
||||||
|
memset(W, 0, 256);
|
||||||
|
memset(S, 0, 32);
|
||||||
|
t0 = t1 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned char PAD[64] = {
|
||||||
|
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Add padding and terminating bit-count. */
|
||||||
|
static void
|
||||||
|
SHA256_Pad(SHA256_CTX_Y * ctx)
|
||||||
|
{
|
||||||
|
unsigned char len[8];
|
||||||
|
uint32_t r, plen;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert length to a vector of bytes -- we do this now rather
|
||||||
|
* than later because the length will change after we pad.
|
||||||
|
*/
|
||||||
|
be32enc_vect(len, ctx->count, 8);
|
||||||
|
|
||||||
|
/* Add 1--64 bytes so that the resulting length is 56 mod 64 */
|
||||||
|
r = (ctx->count[1] >> 3) & 0x3f;
|
||||||
|
plen = (r < 56) ? (56 - r) : (120 - r);
|
||||||
|
SHA256_Update_Y(ctx, PAD, (size_t)plen);
|
||||||
|
|
||||||
|
/* Add the terminating bit-count */
|
||||||
|
SHA256_Update_Y(ctx, len, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* SHA-256 initialization. Begins a SHA-256 operation. */
|
||||||
|
void
|
||||||
|
SHA256_Init_Y(SHA256_CTX_Y * ctx)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Zero bits processed so far */
|
||||||
|
ctx->count[0] = ctx->count[1] = 0;
|
||||||
|
|
||||||
|
/* Magic initialization constants */
|
||||||
|
ctx->state[0] = 0x6A09E667;
|
||||||
|
ctx->state[1] = 0xBB67AE85;
|
||||||
|
ctx->state[2] = 0x3C6EF372;
|
||||||
|
ctx->state[3] = 0xA54FF53A;
|
||||||
|
ctx->state[4] = 0x510E527F;
|
||||||
|
ctx->state[5] = 0x9B05688C;
|
||||||
|
ctx->state[6] = 0x1F83D9AB;
|
||||||
|
ctx->state[7] = 0x5BE0CD19;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add bytes into the hash */
|
||||||
|
void
|
||||||
|
SHA256_Update_Y(SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||||
|
{
|
||||||
|
uint32_t bitlen[2];
|
||||||
|
uint32_t r;
|
||||||
|
const unsigned char *src = in;
|
||||||
|
|
||||||
|
/* Number of bytes left in the buffer from previous updates */
|
||||||
|
r = (ctx->count[1] >> 3) & 0x3f;
|
||||||
|
|
||||||
|
/* Convert the length into a number of bits */
|
||||||
|
bitlen[1] = ((uint32_t)len) << 3;
|
||||||
|
bitlen[0] = (uint32_t)(len >> 29);
|
||||||
|
|
||||||
|
/* Update number of bits */
|
||||||
|
if ((ctx->count[1] += bitlen[1]) < bitlen[1])
|
||||||
|
ctx->count[0]++;
|
||||||
|
ctx->count[0] += bitlen[0];
|
||||||
|
|
||||||
|
/* Handle the case where we don't need to perform any transforms */
|
||||||
|
if (len < 64 - r) {
|
||||||
|
|
||||||
|
memcpy(&ctx->buf[r], src, len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish the current block */
|
||||||
|
memcpy(&ctx->buf[r], src, 64 - r);
|
||||||
|
|
||||||
|
SHA256_Transform(ctx->state, ctx->buf);
|
||||||
|
src += 64 - r;
|
||||||
|
len -= 64 - r;
|
||||||
|
|
||||||
|
/* Perform complete blocks */
|
||||||
|
|
||||||
|
while (len >= 64) {
|
||||||
|
SHA256_Transform(ctx->state, src);
|
||||||
|
src += 64;
|
||||||
|
len -= 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy left over data into buffer */
|
||||||
|
memcpy(ctx->buf, src, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SHA-256 finalization. Pads the input data, exports the hash value,
|
||||||
|
* and clears the context state.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SHA256_Final_Y(unsigned char digest[32], SHA256_CTX_Y * ctx)
|
||||||
|
{
|
||||||
|
/* Add padding */
|
||||||
|
SHA256_Pad(ctx);
|
||||||
|
|
||||||
|
/* Write the hash */
|
||||||
|
be32enc_vect(digest, ctx->state, 32);
|
||||||
|
|
||||||
|
/* Clear the context state */
|
||||||
|
memset((void *)ctx, 0, sizeof(*ctx));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y * ctx, const void * _K, size_t Klen)
|
||||||
|
{
|
||||||
|
unsigned char pad[64];
|
||||||
|
unsigned char khash[32];
|
||||||
|
const unsigned char * K = _K;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */
|
||||||
|
if (Klen > 64) {
|
||||||
|
SHA256_Init_Y(&ctx->ictx);
|
||||||
|
SHA256_Update_Y(&ctx->ictx, K, Klen);
|
||||||
|
SHA256_Final_Y(khash, &ctx->ictx);
|
||||||
|
K = khash;
|
||||||
|
Klen = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
|
SHA256_Init_Y(&ctx->ictx);
|
||||||
|
memset(pad, 0x36, 64);
|
||||||
|
for (i = 0; i < Klen; i++) {
|
||||||
|
pad[i] ^= K[i];
|
||||||
|
}
|
||||||
|
SHA256_Update_Y(&ctx->ictx, pad, 64);
|
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
|
SHA256_Init_Y(&ctx->octx);
|
||||||
|
memset(pad, 0x5c, 64);
|
||||||
|
for (i = 0; i < Klen; i++)
|
||||||
|
{
|
||||||
|
pad[i] ^= K[i];
|
||||||
|
}
|
||||||
|
SHA256_Update_Y(&ctx->octx, pad, 64);
|
||||||
|
|
||||||
|
/* Clean the stack. */
|
||||||
|
memset(khash, 0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add bytes to the HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y * ctx, const void *in, size_t len)
|
||||||
|
{
|
||||||
|
/* Feed data to the inner SHA256 operation. */
|
||||||
|
SHA256_Update_Y(&ctx->ictx, in, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
HMAC_SHA256_Final_Y(unsigned char digest[32], HMAC_SHA256_CTX_Y * ctx)
|
||||||
|
{
|
||||||
|
unsigned char ihash[32];
|
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */
|
||||||
|
SHA256_Final_Y(ihash, &ctx->ictx);
|
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */
|
||||||
|
SHA256_Update_Y(&ctx->octx, ihash, 32);
|
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */
|
||||||
|
SHA256_Final_Y(digest, &ctx->octx);
|
||||||
|
|
||||||
|
/* Clean the stack. */
|
||||||
|
memset(ihash, 0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
|
||||||
|
size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
|
||||||
|
{
|
||||||
|
HMAC_SHA256_CTX_Y PShctx, hctx;
|
||||||
|
size_t i;
|
||||||
|
uint8_t ivec[4];
|
||||||
|
uint8_t U[32];
|
||||||
|
uint8_t T[32];
|
||||||
|
uint64_t j;
|
||||||
|
int k;
|
||||||
|
size_t clen;
|
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */
|
||||||
|
HMAC_SHA256_Init_Y(&PShctx, passwd, passwdlen);
|
||||||
|
HMAC_SHA256_Update_Y(&PShctx, salt, saltlen);
|
||||||
|
|
||||||
|
/* Iterate through the blocks. */
|
||||||
|
for (i = 0; i * 32 < dkLen; i++) {
|
||||||
|
/* Generate INT(i + 1). */
|
||||||
|
be32enc(ivec, (uint32_t)(i + 1));
|
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
|
memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX_Y));
|
||||||
|
HMAC_SHA256_Update_Y(&hctx, ivec, 4);
|
||||||
|
HMAC_SHA256_Final_Y(U, &hctx);
|
||||||
|
|
||||||
|
/* T_i = U_1 ... */
|
||||||
|
memcpy(T, U, 32);
|
||||||
|
|
||||||
|
for (j = 2; j <= c; j++) {
|
||||||
|
/* Compute U_j. */
|
||||||
|
HMAC_SHA256_Init_Y(&hctx, passwd, passwdlen);
|
||||||
|
HMAC_SHA256_Update_Y(&hctx, U, 32);
|
||||||
|
HMAC_SHA256_Final_Y(U, &hctx);
|
||||||
|
|
||||||
|
/* ... xor U_j ... */
|
||||||
|
for (k = 0; k < 32; k++)
|
||||||
|
T[k] ^= U[k];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */
|
||||||
|
clen = dkLen - i * 32;
|
||||||
|
if (clen > 32)
|
||||||
|
clen = 32;
|
||||||
|
memcpy(&buf[i * 32], T, clen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean PShctx, since we never called _Final on it. */
|
||||||
|
memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX_Y));
|
||||||
|
}
|
63
sph/sha256_Y.h
Normal file
63
sph/sha256_Y.h
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SHA256_H_
|
||||||
|
#define _SHA256_H_
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef struct SHA256Context {
|
||||||
|
uint32_t state[8];
|
||||||
|
uint32_t count[2];
|
||||||
|
unsigned char buf[64];
|
||||||
|
} SHA256_CTX_Y;
|
||||||
|
|
||||||
|
typedef struct HMAC_SHA256Context {
|
||||||
|
SHA256_CTX_Y ictx;
|
||||||
|
SHA256_CTX_Y octx;
|
||||||
|
} HMAC_SHA256_CTX_Y;
|
||||||
|
|
||||||
|
void SHA256_Init_Y(SHA256_CTX_Y *);
|
||||||
|
void SHA256_Update_Y(SHA256_CTX_Y *, const void *, size_t);
|
||||||
|
void SHA256_Final_Y(unsigned char [32], SHA256_CTX_Y *);
|
||||||
|
void HMAC_SHA256_Init_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||||
|
void HMAC_SHA256_Update_Y(HMAC_SHA256_CTX_Y *, const void *, size_t);
|
||||||
|
void HMAC_SHA256_Final_Y(unsigned char [32], HMAC_SHA256_CTX_Y *);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t,
|
||||||
|
uint64_t, uint8_t *, size_t);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* !_SHA256_H_ */
|
Loading…
x
Reference in New Issue
Block a user