mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-10 23:08:07 +00:00
3eccf326a3
this is a temporary kernel a somewhat faster should be implemented soon
1365 lines
33 KiB
C
1365 lines
33 KiB
C
/*-
|
|
* Copyright 2009 Colin Percival
|
|
* Copyright 2013,2014 Alexander Peslyak
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* This file was originally written by Colin Percival as part of the Tarsnap
|
|
* online backup system.
|
|
*/
|
|
|
|
#ifdef __i386__
|
|
#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Enable at least SSE2 in the C compiler and use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (portability to older CPUs or testing)."
|
|
#elif defined(__x86_64__)
|
|
#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (for testing only)."
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include "algorithm/yescrypt_core.h"
|
|
#include "sph/sha256_Y.h"
|
|
#include "algorithm/sysendian.h"
|
|
|
|
// #include "sph/yescrypt-platform.c"
|
|
#define HUGEPAGE_THRESHOLD (12 * 1024 * 1024)
|
|
|
|
#ifdef __x86_64__
|
|
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
|
|
#else
|
|
#undef HUGEPAGE_SIZE
|
|
#endif
|
|
|
|
|
|
static void *
|
|
alloc_region(yescrypt_region_t * region, size_t size)
|
|
{
|
|
size_t base_size = size;
|
|
uint8_t * base, *aligned;
|
|
#ifdef MAP_ANON
|
|
int flags =
|
|
#ifdef MAP_NOCORE
|
|
MAP_NOCORE |
|
|
#endif
|
|
MAP_ANON | MAP_PRIVATE;
|
|
#if defined(MAP_HUGETLB) && defined(HUGEPAGE_SIZE)
|
|
size_t new_size = size;
|
|
const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1;
|
|
if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) {
|
|
flags |= MAP_HUGETLB;
|
|
/*
|
|
* Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of
|
|
* huge page size, so let's round up to huge page size here.
|
|
*/
|
|
new_size = size + hugepage_mask;
|
|
new_size &= ~hugepage_mask;
|
|
}
|
|
base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0);
|
|
if (base != MAP_FAILED) {
|
|
base_size = new_size;
|
|
}
|
|
else
|
|
if (flags & MAP_HUGETLB) {
|
|
flags &= ~MAP_HUGETLB;
|
|
base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
|
|
}
|
|
|
|
#else
|
|
base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
|
|
#endif
|
|
if (base == MAP_FAILED)
|
|
base = NULL;
|
|
aligned = base;
|
|
#elif defined(HAVE_POSIX_MEMALIGN)
|
|
if ((errno = posix_memalign((void **)&base, 64, size)) != 0)
|
|
base = NULL;
|
|
aligned = base;
|
|
#else
|
|
base = aligned = NULL;
|
|
if (size + 63 < size) {
|
|
errno = ENOMEM;
|
|
}
|
|
else if ((base = malloc(size + 63)) != NULL) {
|
|
aligned = base + 63;
|
|
aligned -= (uintptr_t)aligned & 63;
|
|
}
|
|
#endif
|
|
region->base = base;
|
|
region->aligned = aligned;
|
|
region->base_size = base ? base_size : 0;
|
|
region->aligned_size = base ? size : 0;
|
|
return aligned;
|
|
}
|
|
|
|
static void init_region(yescrypt_region_t * region)
|
|
{
|
|
region->base = region->aligned = NULL;
|
|
region->base_size = region->aligned_size = 0;
|
|
}
|
|
|
|
static int
|
|
free_region(yescrypt_region_t * region)
|
|
{
|
|
if (region->base) {
|
|
#ifdef MAP_ANON
|
|
if (munmap(region->base, region->base_size))
|
|
return -1;
|
|
#else
|
|
free(region->base);
|
|
#endif
|
|
}
|
|
init_region(region);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
yescrypt_init_shared(yescrypt_shared_t * shared,
|
|
const uint8_t * param, size_t paramlen,
|
|
uint64_t N, uint32_t r, uint32_t p,
|
|
yescrypt_init_shared_flags_t flags, uint32_t mask,
|
|
uint8_t * buf, size_t buflen)
|
|
{
|
|
yescrypt_shared1_t * shared1 = &shared->shared1;
|
|
yescrypt_shared_t dummy, half1, half2;
|
|
// yescrypt_shared_t * half2;
|
|
uint8_t salt[32];
|
|
|
|
if (flags & YESCRYPT_SHARED_PREALLOCATED) {
|
|
if (!shared1->aligned || !shared1->aligned_size)
|
|
return -1;
|
|
}
|
|
else {
|
|
init_region(shared1);
|
|
}
|
|
shared->mask1 = 1;
|
|
if (!param && !paramlen && !N && !r && !p && !buf && !buflen)
|
|
return 0;
|
|
|
|
init_region(&dummy.shared1);
|
|
dummy.mask1 = 1;
|
|
if (yescrypt_kdf(&dummy, shared1,
|
|
param, paramlen, NULL, 0, N, r, p, 0,
|
|
YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
|
|
salt, sizeof(salt)))
|
|
goto out;
|
|
|
|
half1 = half2 = *shared;
|
|
half1.shared1.aligned_size /= 2;
|
|
half2.shared1.aligned_size = half1.shared1.aligned_size;
|
|
half2.shared1.aligned = (char*)half2.shared1.aligned + half1.shared1.aligned_size;
|
|
|
|
N /= 2;
|
|
|
|
if (p > 1 && yescrypt_kdf(&half1, &half2.shared1,
|
|
param, paramlen, salt, sizeof(salt), N, r, p, 0,
|
|
YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2,
|
|
salt, sizeof(salt)))
|
|
goto out;
|
|
|
|
if (yescrypt_kdf(&half2, &half1.shared1,
|
|
param, paramlen, salt, sizeof(salt), N, r, p, 0,
|
|
YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
|
|
salt, sizeof(salt)))
|
|
goto out;
|
|
|
|
if (yescrypt_kdf(&half1, &half2.shared1,
|
|
param, paramlen, salt, sizeof(salt), N, r, p, 0,
|
|
YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1,
|
|
buf, buflen))
|
|
goto out;
|
|
|
|
shared->mask1 = mask;
|
|
|
|
return 0;
|
|
|
|
out:
|
|
if (!(flags & YESCRYPT_SHARED_PREALLOCATED))
|
|
free_region(shared1);
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
yescrypt_free_shared(yescrypt_shared_t * shared)
|
|
{
|
|
return free_region(&shared->shared1);
|
|
}
|
|
|
|
int
|
|
yescrypt_init_local(yescrypt_local_t * local)
|
|
{
|
|
init_region(local);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
yescrypt_free_local(yescrypt_local_t * local)
|
|
{
|
|
return free_region(local);
|
|
}
|
|
|
|
|
|
static void
|
|
blkcpy(uint64_t * dest, const uint64_t * src, size_t count)
|
|
{
|
|
do {
|
|
*dest++ = *src++; *dest++ = *src++;
|
|
*dest++ = *src++; *dest++ = *src++;
|
|
} while (count -= 4);
|
|
};
|
|
|
|
static void
|
|
blkxor(uint64_t * dest, const uint64_t * src, size_t count)
|
|
{
|
|
do {
|
|
*dest++ ^= *src++; *dest++ ^= *src++;
|
|
*dest++ ^= *src++; *dest++ ^= *src++;
|
|
} while (count -= 4);
|
|
};
|
|
|
|
typedef union {
|
|
uint32_t w[16];
|
|
uint64_t d[8];
|
|
} salsa20_blk_t;
|
|
|
|
static void
|
|
salsa20_simd_shuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout)
|
|
{
|
|
#define COMBINE(out, in1, in2) \
|
|
Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32);
|
|
COMBINE(0, 0, 2)
|
|
COMBINE(1, 5, 7)
|
|
COMBINE(2, 2, 4)
|
|
COMBINE(3, 7, 1)
|
|
COMBINE(4, 4, 6)
|
|
COMBINE(5, 1, 3)
|
|
COMBINE(6, 6, 0)
|
|
COMBINE(7, 3, 5)
|
|
#undef COMBINE
|
|
}
|
|
|
|
static void
|
|
salsa20_simd_unshuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout)
|
|
{
|
|
#define COMBINE(out, in1, in2) \
|
|
Bout->w[out * 2] = Bin->d[in1]; \
|
|
Bout->w[out * 2 + 1] = Bin->d[in2] >> 32;
|
|
COMBINE(0, 0, 6)
|
|
COMBINE(1, 5, 3)
|
|
COMBINE(2, 2, 0)
|
|
COMBINE(3, 7, 5)
|
|
COMBINE(4, 4, 2)
|
|
COMBINE(5, 1, 7)
|
|
COMBINE(6, 6, 4)
|
|
COMBINE(7, 3, 1)
|
|
#undef COMBINE
|
|
}
|
|
|
|
/**
|
|
* salsa20_8(B):
|
|
* Apply the salsa20/8 core to the provided block.
|
|
*/
|
|
|
|
static void
|
|
salsa20_8(uint64_t B[8])
|
|
{
|
|
size_t i;
|
|
salsa20_blk_t X;
|
|
|
|
#define x X.w
|
|
|
|
salsa20_simd_unshuffle((const salsa20_blk_t *)B, &X);
|
|
|
|
for (i = 0; i < 8; i += 2) {
|
|
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
|
|
/* Operate on columns */
|
|
x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9);
|
|
x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18);
|
|
|
|
x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9);
|
|
x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18);
|
|
|
|
x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9);
|
|
x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18);
|
|
|
|
x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9);
|
|
x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18);
|
|
|
|
/* Operate on rows */
|
|
x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9);
|
|
x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18);
|
|
|
|
x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9);
|
|
x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18);
|
|
|
|
x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9);
|
|
x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18);
|
|
|
|
x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9);
|
|
x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18);
|
|
#undef R
|
|
}
|
|
#undef x
|
|
|
|
{
|
|
salsa20_blk_t Y;
|
|
salsa20_simd_shuffle(&X, &Y);
|
|
for (i = 0; i < 16; i += 4) {
|
|
((salsa20_blk_t *)B)->w[i] += Y.w[i];
|
|
((salsa20_blk_t *)B)->w[i + 1] += Y.w[i + 1];
|
|
((salsa20_blk_t *)B)->w[i + 2] += Y.w[i + 2];
|
|
((salsa20_blk_t *)B)->w[i + 3] += Y.w[i + 3];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* blockmix_salsa8(Bin, Bout, X, r):
|
|
* Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
|
|
* bytes in length; the output Bout must also be the same size. The
|
|
* temporary space X must be 64 bytes.
|
|
*/
|
|
static void
|
|
blockmix_salsa8(const uint64_t * Bin, uint64_t * Bout, uint64_t * X, size_t r)
|
|
{
|
|
size_t i;
|
|
|
|
/* 1: X <-- B_{2r - 1} */
|
|
blkcpy(X, &Bin[(2 * r - 1) * 8], 8);
|
|
|
|
/* 2: for i = 0 to 2r - 1 do */
|
|
for (i = 0; i < 2 * r; i += 2) {
|
|
/* 3: X <-- H(X \xor B_i) */
|
|
blkxor(X, &Bin[i * 8], 8);
|
|
salsa20_8(X);
|
|
|
|
/* 4: Y_i <-- X */
|
|
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
|
|
blkcpy(&Bout[i * 4], X, 8);
|
|
|
|
/* 3: X <-- H(X \xor B_i) */
|
|
blkxor(X, &Bin[i * 8 + 8], 8);
|
|
salsa20_8(X);
|
|
|
|
/* 4: Y_i <-- X */
|
|
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
|
|
blkcpy(&Bout[i * 4 + r * 8], X, 8);
|
|
}
|
|
|
|
}
|
|
|
|
/* These are tunable */
|
|
#define S_BITS 8
|
|
#define S_SIMD 2
|
|
#define S_P 4
|
|
#define S_ROUNDS 6
|
|
|
|
/* Number of S-boxes. Not tunable, hard-coded in a few places. */
|
|
#define S_N 2
|
|
|
|
/* Derived values. Not tunable on their own. */
|
|
#define S_SIZE1 (1 << S_BITS)
|
|
#define S_MASK ((S_SIZE1 - 1) * S_SIMD * 8)
|
|
#define S_MASK2 (((uint64_t)S_MASK << 32) | S_MASK)
|
|
#define S_SIZE_ALL (S_N * S_SIZE1 * S_SIMD)
|
|
#define S_P_SIZE (S_P * S_SIMD)
|
|
#define S_MIN_R ((S_P * S_SIMD + 15) / 16)
|
|
|
|
/**
|
|
* pwxform(B):
|
|
* Transform the provided block using the provided S-boxes.
|
|
*/
|
|
|
|
static void
|
|
block_pwxform(uint64_t * B, const uint64_t * S)
|
|
{
|
|
uint64_t(*X)[S_SIMD] = (uint64_t(*)[S_SIMD])B;
|
|
const uint8_t *S0 = (const uint8_t *)S;
|
|
const uint8_t *S1 = (const uint8_t *)(S + S_SIZE1 * S_SIMD);
|
|
size_t i, j;
|
|
|
|
for (j = 0; j < S_P; j++) {
|
|
|
|
uint64_t *Xj = X[j];
|
|
uint64_t x0 = Xj[0];
|
|
uint64_t x1 = Xj[1];
|
|
|
|
for (i = 0; i < S_ROUNDS; i++) {
|
|
uint64_t x = x0 & S_MASK2;
|
|
const uint64_t *p0, *p1;
|
|
|
|
p0 = (const uint64_t *)(S0 + (uint32_t)x);
|
|
p1 = (const uint64_t *)(S1 + (x >> 32));
|
|
|
|
x0 = (uint64_t)(x0 >> 32) * (uint32_t)x0;
|
|
x0 += p0[0];
|
|
x0 ^= p1[0];
|
|
|
|
x1 = (uint64_t)(x1 >> 32) * (uint32_t)x1;
|
|
x1 += p0[1];
|
|
x1 ^= p1[1];
|
|
}
|
|
Xj[0] = x0;
|
|
Xj[1] = x1;
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* blockmix_pwxform(Bin, Bout, S, r):
|
|
* Compute Bout = BlockMix_pwxform{salsa20/8, S, r}(Bin). The input Bin must
|
|
* be 128r bytes in length; the output Bout must also be the same size.
|
|
*
|
|
* S lacks const qualifier to match blockmix_salsa8()'s prototype, which we
|
|
* need to refer to both functions via the same function pointers.
|
|
*/
|
|
static void
|
|
blockmix_pwxform(const uint64_t * Bin, uint64_t * Bout, uint64_t * S, size_t r)
|
|
{
|
|
size_t r1, r2, i;
|
|
// S_P_SIZE = 8;
|
|
/* Convert 128-byte blocks to (S_P_SIZE * 64-bit) blocks */
|
|
|
|
r1 = r * 128 / (S_P_SIZE * 8);
|
|
/* X <-- B_{r1 - 1} */
|
|
blkcpy(Bout, &Bin[(r1 - 1) * S_P_SIZE], S_P_SIZE);
|
|
|
|
/* X <-- X \xor B_i */
|
|
blkxor(Bout, Bin, S_P_SIZE);
|
|
|
|
/* X <-- H'(X) */
|
|
/* B'_i <-- X */
|
|
block_pwxform(Bout, S);
|
|
|
|
/* for i = 0 to r1 - 1 do */
|
|
for (i = 1; i < r1; i++) {
|
|
/* X <-- X \xor B_i */
|
|
blkcpy(&Bout[i * S_P_SIZE], &Bout[(i - 1) * S_P_SIZE],S_P_SIZE);
|
|
blkxor(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE], S_P_SIZE);
|
|
|
|
/* X <-- H'(X) */
|
|
/* B'_i <-- X */
|
|
block_pwxform(&Bout[i * S_P_SIZE], S);
|
|
}
|
|
|
|
/* Handle partial blocks */
|
|
if (i * S_P_SIZE < r * 16) {
|
|
blkcpy(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE],r * 16 - i * S_P_SIZE);
|
|
}
|
|
|
|
i = (r1 - 1) * S_P_SIZE / 8;
|
|
/* Convert 128-byte blocks to 64-byte blocks */
|
|
r2 = r * 2;
|
|
|
|
/* B'_i <-- H(B'_i) */
|
|
salsa20_8(&Bout[i * 8]);
|
|
|
|
|
|
i++;
|
|
/// not used yescrypt
|
|
|
|
for (; i < r2; i++) {
|
|
/* B'_i <-- H(B'_i \xor B'_{i-1}) */
|
|
blkxor(&Bout[i * 8], &Bout[(i - 1) * 8], 8);
|
|
salsa20_8(&Bout[i * 8]);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* integerify(B, r):
|
|
* Return the result of parsing B_{2r-1} as a little-endian integer.
|
|
*/
|
|
static uint64_t
|
|
integerify(const uint64_t * B, size_t r)
|
|
{
|
|
/*
|
|
* Our 64-bit words are in host byte order, and word 6 holds the second 32-bit
|
|
* word of B_{2r-1} due to SIMD shuffling. The 64-bit value we return is also
|
|
* in host byte order, as it should be.
|
|
*/
|
|
const uint64_t * X = &B[(2 * r - 1) * 8];
|
|
uint32_t lo = X[0];
|
|
uint32_t hi = X[6] >> 32;
|
|
return ((uint64_t)hi << 32) + lo;
|
|
}
|
|
|
|
/**
|
|
* smix1(B, r, N, flags, V, NROM, shared, XY, S):
|
|
* Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
|
|
* length; the temporary storage V must be 128rN bytes in length; the temporary
|
|
* storage XY must be 256r + 64 bytes in length. The value N must be even and
|
|
* no smaller than 2.
|
|
*/
|
|
static void
|
|
smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
|
|
uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
|
|
uint64_t * XY, uint64_t * S)
|
|
{
|
|
void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8);
|
|
const uint64_t * VROM = shared->shared1.aligned;
|
|
uint32_t VROM_mask = shared->mask1;
|
|
size_t s = 16 * r;
|
|
uint64_t * X = V;
|
|
uint64_t * Y = &XY[s];
|
|
uint64_t * Z = S ? S : &XY[2 * s];
|
|
uint64_t n, i, j;
|
|
size_t k;
|
|
|
|
/* 1: X <-- B */
|
|
/* 3: V_i <-- X */
|
|
for (i = 0; i < 2 * r; i++) {
|
|
const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8];
|
|
salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
|
|
salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8];
|
|
for (k = 0; k < 16; k++)
|
|
tmp->w[k] = le32dec(&src->w[k]);
|
|
|
|
salsa20_simd_shuffle(tmp, dst);
|
|
}
|
|
|
|
/* 4: X <-- H(X) */
|
|
/* 3: V_i <-- X */
|
|
|
|
blockmix(X, Y, Z, r);
|
|
blkcpy(&V[s], Y, s);
|
|
X = XY;
|
|
|
|
if (NROM && (VROM_mask & 1)) {
|
|
if ((1 & VROM_mask) == 1) {
|
|
/* j <-- Integerify(X) mod NROM */
|
|
j = integerify(Y, r) & (NROM - 1);
|
|
|
|
/* X <-- H(X \xor VROM_j) */
|
|
blkxor(Y, &VROM[j * s], s);
|
|
}
|
|
|
|
blockmix(Y, X, Z, r);
|
|
|
|
|
|
/* 2: for i = 0 to N - 1 do */
|
|
for (n = 1, i = 2; i < N; i += 2) {
|
|
/* 3: V_i <-- X */
|
|
blkcpy(&V[i * s], X, s);
|
|
|
|
if ((i & (i - 1)) == 0)
|
|
n <<= 1;
|
|
|
|
/* j <-- Wrap(Integerify(X), i) */
|
|
j = integerify(X, r) & (n - 1);
|
|
j += i - n;
|
|
|
|
/* X <-- X \xor V_j */
|
|
blkxor(X, &V[j * s], s);
|
|
|
|
/* 4: X <-- H(X) */
|
|
blockmix(X, Y, Z, r);
|
|
|
|
/* 3: V_i <-- X */
|
|
blkcpy(&V[(i + 1) * s], Y, s);
|
|
|
|
j = integerify(Y, r);
|
|
if (((i + 1) & VROM_mask) == 1) {
|
|
/* j <-- Integerify(X) mod NROM */
|
|
j &= NROM - 1;
|
|
|
|
/* X <-- H(X \xor VROM_j) */
|
|
blkxor(Y, &VROM[j * s], s);
|
|
} else {
|
|
/* j <-- Wrap(Integerify(X), i) */
|
|
j &= n - 1;
|
|
j += i + 1 - n;
|
|
|
|
/* X <-- H(X \xor V_j) */
|
|
blkxor(Y, &V[j * s], s);
|
|
}
|
|
|
|
blockmix(Y, X, Z, r);
|
|
}
|
|
} else {
|
|
yescrypt_flags_t rw = flags & YESCRYPT_RW;
|
|
/* 4: X <-- H(X) */
|
|
blockmix(Y, X, Z, r);
|
|
|
|
/* 2: for i = 0 to N - 1 do */
|
|
for (n = 1, i = 2; i < N; i += 2) {
|
|
/* 3: V_i <-- X */
|
|
blkcpy(&V[i * s], X, s);
|
|
|
|
if (rw) {
|
|
if ((i & (i - 1)) == 0)
|
|
n <<= 1;
|
|
|
|
/* j <-- Wrap(Integerify(X), i) */
|
|
j = integerify(X, r) & (n - 1);
|
|
j += i - n;
|
|
|
|
/* X <-- X \xor V_j */
|
|
blkxor(X, &V[j * s], s);
|
|
}
|
|
|
|
/* 4: X <-- H(X) */
|
|
blockmix(X, Y, Z, r);
|
|
|
|
/* 3: V_i <-- X */
|
|
blkcpy(&V[(i + 1) * s], Y, s);
|
|
|
|
if (rw) {
|
|
/* j <-- Wrap(Integerify(X), i) */
|
|
j = integerify(Y, r) & (n - 1);
|
|
j += (i + 1) - n;
|
|
|
|
|
|
/* X <-- X \xor V_j */
|
|
blkxor(Y, &V[j * s], s);
|
|
}
|
|
|
|
/* 4: X <-- H(X) */
|
|
blockmix(Y, X, Z, r);
|
|
}
|
|
}
|
|
|
|
/* B' <-- X */
|
|
for (i = 0; i < 2 * r; i++) {
|
|
const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8];
|
|
salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
|
|
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8];
|
|
for (k = 0; k < 16; k++)
|
|
le32enc(&tmp->w[k], src->w[k]);
|
|
salsa20_simd_unshuffle(tmp, dst);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* smix2(B, r, N, Nloop, flags, V, NROM, shared, XY, S):
|
|
* Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
|
|
* length; the temporary storage V must be 128rN bytes in length; the temporary
|
|
* storage XY must be 256r + 64 bytes in length. The value N must be a
|
|
* power of 2 greater than 1. The value Nloop must be even.
|
|
*/
|
|
static void
|
|
smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
|
|
yescrypt_flags_t flags,
|
|
uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
|
|
uint64_t * XY, uint64_t * S)
|
|
{
|
|
|
|
void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) =
|
|
(S ? blockmix_pwxform : blockmix_salsa8);
|
|
const uint64_t * VROM = shared->shared1.aligned;
|
|
uint32_t VROM_mask = shared->mask1 | 1;
|
|
size_t s = 16 * r;
|
|
yescrypt_flags_t rw = flags & YESCRYPT_RW;
|
|
uint64_t * X = XY;
|
|
uint64_t * Y = &XY[s];
|
|
uint64_t * Z = S ? S : &XY[2 * s];
|
|
uint64_t i, j;
|
|
size_t k;
|
|
|
|
if (Nloop == 0)
|
|
return;
|
|
|
|
/* X <-- B' */
|
|
for (i = 0; i < 2 * r; i++) {
|
|
const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8];
|
|
salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
|
|
salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8];
|
|
for (k = 0; k < 16; k++)
|
|
tmp->w[k] = le32dec(&src->w[k]);
|
|
salsa20_simd_shuffle(tmp, dst);
|
|
}
|
|
if (NROM) {
|
|
|
|
/* 6: for i = 0 to N - 1 do */
|
|
for (i = 0; i < Nloop; i += 2) {
|
|
/* 7: j <-- Integerify(X) mod N */
|
|
j = integerify(X, r) & (N - 1);
|
|
|
|
/* 8: X <-- H(X \xor V_j) */
|
|
blkxor(X, &V[j * s], s);
|
|
/* V_j <-- Xprev \xor V_j */
|
|
if (rw)
|
|
blkcpy(&V[j * s], X, s);
|
|
blockmix(X, Y, Z, r);
|
|
|
|
j = integerify(Y, r);
|
|
if (((i + 1) & VROM_mask) == 1) {
|
|
/* j <-- Integerify(X) mod NROM */
|
|
j &= NROM - 1;
|
|
|
|
/* X <-- H(X \xor VROM_j) */
|
|
blkxor(Y, &VROM[j * s], s);
|
|
} else {
|
|
/* 7: j <-- Integerify(X) mod N */
|
|
j &= N - 1;
|
|
|
|
/* 8: X <-- H(X \xor V_j) */
|
|
blkxor(Y, &V[j * s], s);
|
|
/* V_j <-- Xprev \xor V_j */
|
|
if (rw)
|
|
blkcpy(&V[j * s], Y, s);
|
|
}
|
|
|
|
blockmix(Y, X, Z, r);
|
|
}
|
|
} else {
|
|
|
|
/* 6: for i = 0 to N - 1 do */
|
|
i = Nloop / 2;
|
|
do {
|
|
/* 7: j <-- Integerify(X) mod N */
|
|
j = integerify(X, r) & (N - 1);
|
|
|
|
/* 8: X <-- H(X \xor V_j) */
|
|
blkxor(X, &V[j * s], s);
|
|
/* V_j <-- Xprev \xor V_j */
|
|
if (rw)
|
|
blkcpy(&V[j * s], X, s);
|
|
blockmix(X, Y, Z, r);
|
|
|
|
/* 7: j <-- Integerify(X) mod N */
|
|
j = integerify(Y, r) & (N - 1);
|
|
|
|
/* 8: X <-- H(X \xor V_j) */
|
|
blkxor(Y, &V[j * s], s);
|
|
/* V_j <-- Xprev \xor V_j */
|
|
if (rw)
|
|
blkcpy(&V[j * s], Y, s);
|
|
blockmix(Y, X, Z, r);
|
|
} while (--i);
|
|
}
|
|
|
|
/* 10: B' <-- X */
|
|
for (i = 0; i < 2 * r; i++) {
|
|
const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8];
|
|
salsa20_blk_t *tmp = (salsa20_blk_t *)Y;
|
|
salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8];
|
|
for (k = 0; k < 16; k++)
|
|
le32enc(&tmp->w[k], src->w[k]);
|
|
salsa20_simd_unshuffle(tmp, dst);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
* p2floor(x):
|
|
* Largest power of 2 not greater than argument.
|
|
*/
|
|
static uint64_t
|
|
p2floor(uint64_t x)
|
|
{
|
|
uint64_t y;
|
|
while ((y = x & (x - 1)))
|
|
x = y;
|
|
return x;
|
|
}
|
|
|
|
/**
|
|
* smix(B, r, N, p, t, flags, V, NROM, shared, XY, S):
|
|
* Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
|
|
* temporary storage V must be 128rN bytes in length; the temporary storage
|
|
* XY must be 256r+64 or (256r+64)*p bytes in length (the larger size is
|
|
* required with OpenMP-enabled builds). The value N must be a power of 2
|
|
* greater than 1.
|
|
*/
|
|
static void
|
|
smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
|
|
yescrypt_flags_t flags,
|
|
uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
|
|
uint64_t * XY, uint64_t * S)
|
|
{
|
|
size_t s = 16 * r;
|
|
uint64_t Nchunk = N / p, Nloop_all, Nloop_rw;
|
|
uint32_t i;
|
|
|
|
Nloop_all = Nchunk;
|
|
if (flags & YESCRYPT_RW) {
|
|
if (t <= 1) {
|
|
if (t)
|
|
Nloop_all *= 2; /* 2/3 */
|
|
Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
|
|
} else {
|
|
Nloop_all *= t - 1;
|
|
}
|
|
} else if (t) {
|
|
if (t == 1)
|
|
Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
|
|
Nloop_all *= t;
|
|
}
|
|
|
|
Nloop_rw = 0;
|
|
if (flags & __YESCRYPT_INIT_SHARED)
|
|
Nloop_rw = Nloop_all;
|
|
else if (flags & YESCRYPT_RW)
|
|
Nloop_rw = Nloop_all / p;
|
|
|
|
Nchunk &= ~(uint64_t)1; /* round down to even */
|
|
Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
|
|
Nloop_rw &= ~(uint64_t)1; /* round down to even */
|
|
|
|
|
|
for (i = 0; i < p; i++) {
|
|
uint64_t Vchunk = i * Nchunk;
|
|
uint64_t * Bp = &B[i * s];
|
|
uint64_t * Vp = &V[Vchunk * s];
|
|
uint64_t * XYp = XY;
|
|
|
|
uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
|
|
uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
|
|
|
|
if (Sp)
|
|
smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);
|
|
|
|
|
|
|
|
if (!(flags & __YESCRYPT_INIT_SHARED_2))
|
|
smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp);
|
|
|
|
|
|
smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp);
|
|
|
|
|
|
|
|
}
|
|
if (Nloop_all > Nloop_rw) {
|
|
|
|
for (i = 0; i < p; i++) {
|
|
uint64_t * Bp = &B[i * s];
|
|
|
|
uint64_t * XYp = XY;
|
|
|
|
uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
|
|
smix2(Bp, r, N, Nloop_all - Nloop_rw,flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
static void
|
|
smix_old(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
|
|
yescrypt_flags_t flags,
|
|
uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared,
|
|
uint64_t * XY, uint64_t * S)
|
|
{
|
|
size_t s = 16 * r;
|
|
uint64_t Nchunk = N / p, Nloop_all, Nloop_rw;
|
|
uint32_t i;
|
|
|
|
Nloop_all = Nchunk;
|
|
if (flags & YESCRYPT_RW) {
|
|
if (t <= 1) {
|
|
if (t)
|
|
Nloop_all *= 2; /* 2/3 */
|
|
Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
|
|
}
|
|
else {
|
|
Nloop_all *= t - 1;
|
|
}
|
|
}
|
|
else if (t) {
|
|
if (t == 1)
|
|
Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
|
|
Nloop_all *= t;
|
|
}
|
|
|
|
Nloop_rw = 0;
|
|
if (flags & __YESCRYPT_INIT_SHARED)
|
|
Nloop_rw = Nloop_all;
|
|
else if (flags & YESCRYPT_RW)
|
|
Nloop_rw = Nloop_all / p;
|
|
|
|
Nchunk &= ~(uint64_t)1; /* round down to even */
|
|
Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
|
|
Nloop_rw &= ~(uint64_t)1; /* round down to even */
|
|
|
|
|
|
for (i = 0; i < p; i++) {
|
|
uint64_t Vchunk = i * Nchunk;
|
|
uint64_t * Bp = &B[i * s];
|
|
uint64_t * Vp = &V[Vchunk * s];
|
|
uint64_t * XYp = XY;
|
|
|
|
uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
|
|
uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
|
|
|
|
if (Sp) {
|
|
smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM, Sp, NROM, shared, XYp, NULL);
|
|
|
|
|
|
}
|
|
if (!(flags & __YESCRYPT_INIT_SHARED_2)) {
|
|
smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp);
|
|
}
|
|
|
|
|
|
smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp);
|
|
}
|
|
|
|
if (Nloop_all > Nloop_rw) {
|
|
|
|
for (i = 0; i < p; i++) {
|
|
uint64_t * Bp = &B[i * s];
|
|
|
|
uint64_t * XYp = XY;
|
|
|
|
uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
|
|
smix2(Bp, r, N, Nloop_all - Nloop_rw, flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
|
|
* N, r, p, t, flags, buf, buflen):
|
|
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
|
|
* p, buflen), or a revision of scrypt as requested by flags and shared, and
|
|
* write the result into buf. The parameters r, p, and buflen must satisfy
|
|
* r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N must be a power
|
|
* of 2 greater than 1.
|
|
*
|
|
* t controls computation time while not affecting peak memory usage. shared
|
|
* and flags may request special modes as described in yescrypt.h. local is
|
|
* the thread-local data structure, allowing to preserve and reuse a memory
|
|
* allocation across calls, thereby reducing its overhead.
|
|
*
|
|
* Return 0 on success; or -1 on error.
|
|
*/
|
|
int
|
|
yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
|
const uint8_t * passwd, size_t passwdlen,
|
|
const uint8_t * salt, size_t saltlen,
|
|
uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
|
|
uint8_t * buf, size_t buflen)
|
|
{
|
|
yescrypt_region_t tmp;
|
|
uint64_t NROM;
|
|
size_t B_size, V_size, XY_size, need;
|
|
uint64_t * B, * V, * XY, * S;
|
|
uint64_t sha256[4];
|
|
|
|
/*
|
|
* YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
|
|
* so don't let it have side-effects. Without this adjustment, it'd
|
|
* enable the SHA-256 password pre-hashing and output post-hashing,
|
|
* because any deviation from classic scrypt implies those.
|
|
*/
|
|
if (p == 1)
|
|
flags &= ~YESCRYPT_PARALLEL_SMIX;
|
|
|
|
/* Sanity-check parameters */
|
|
if (flags & ~YESCRYPT_KNOWN_FLAGS) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#if SIZE_MAX > UINT32_MAX
|
|
if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
#endif
|
|
if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#if S_MIN_R > 1
|
|
if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#endif
|
|
if ((p > SIZE_MAX / ((size_t)256 * r + 64)) ||
|
|
#if SIZE_MAX / 256 <= UINT32_MAX
|
|
(r > SIZE_MAX / 256) ||
|
|
#endif
|
|
(N > SIZE_MAX / 128 / r)) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
if (N > UINT64_MAX / ((uint64_t)t + 1)) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
|
|
if ((flags & YESCRYPT_PWXFORM) &&
|
|
p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
NROM = 0;
|
|
if (shared->shared1.aligned) {
|
|
NROM = shared->shared1.aligned_size / ((size_t)128 * r);
|
|
if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) ||
|
|
!(flags & YESCRYPT_RW)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Allocate memory */
|
|
V = NULL;
|
|
V_size = (size_t)128 * r * N;
|
|
|
|
need = V_size;
|
|
if (flags & __YESCRYPT_INIT_SHARED) {
|
|
if (local->aligned_size < need) {
|
|
if (local->base || local->aligned ||
|
|
local->base_size || local->aligned_size) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if (!alloc_region(local, need))
|
|
return -1;
|
|
}
|
|
V = (uint64_t *)local->aligned;
|
|
need = 0;
|
|
}
|
|
B_size = (size_t)128 * r * p;
|
|
need += B_size;
|
|
if (need < B_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
XY_size = (size_t)256 * r + 64;
|
|
|
|
need += XY_size;
|
|
if (need < XY_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
if (flags & YESCRYPT_PWXFORM) {
|
|
size_t S_size = S_SIZE_ALL * sizeof(*S);
|
|
|
|
if (flags & YESCRYPT_PARALLEL_SMIX)
|
|
S_size *= p;
|
|
|
|
need += S_size;
|
|
if (need < S_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
}
|
|
if (flags & __YESCRYPT_INIT_SHARED) {
|
|
if (!alloc_region(&tmp, need))
|
|
return -1;
|
|
B = (uint64_t *)tmp.aligned;
|
|
XY = (uint64_t *)((uint8_t *)B + B_size);
|
|
} else {
|
|
init_region(&tmp);
|
|
if (local->aligned_size < need) {
|
|
if (free_region(local))
|
|
return -1;
|
|
if (!alloc_region(local, need))
|
|
return -1;
|
|
}
|
|
B = (uint64_t *)local->aligned;
|
|
V = (uint64_t *)((uint8_t *)B + B_size);
|
|
XY = (uint64_t *)((uint8_t *)V + V_size);
|
|
}
|
|
S = NULL;
|
|
if (flags & YESCRYPT_PWXFORM)
|
|
S = (uint64_t *)((uint8_t *)XY + XY_size);
|
|
|
|
|
|
if (t || flags) {
|
|
SHA256_CTX_Y ctx;
|
|
SHA256_Init_Y(&ctx);
|
|
SHA256_Update_Y(&ctx, passwd, passwdlen);
|
|
SHA256_Final_Y((uint8_t *)sha256, &ctx);
|
|
passwd = (uint8_t *)sha256;
|
|
passwdlen = sizeof(sha256);
|
|
}
|
|
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
|
|
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1,(uint8_t *)B, B_size);
|
|
|
|
if (t || flags)
|
|
{
|
|
blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0]));
|
|
}
|
|
if (p == 1 || (flags & YESCRYPT_PARALLEL_SMIX)) {
|
|
smix(B, r, N, p, t, flags, V, NROM, shared, XY, S);
|
|
} else {
|
|
uint32_t i;
|
|
/* 2: for i = 0 to p - 1 do */
|
|
for (i = 0; i < p; i++) {
|
|
/* 3: B_i <-- MF(B_i, N) */
|
|
smix(&B[(size_t)16 * r * i], r, N, 1, t, flags, V, NROM, shared, XY, S);
|
|
}
|
|
}
|
|
|
|
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
|
|
|
|
PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen);
|
|
/*
|
|
* Except when computing classic scrypt, allow all computation so far
|
|
* to be performed on the client. The final steps below match those of
|
|
* SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
|
|
* far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
|
|
* SCRAM's use of SHA-1) would be usable with yescrypt hashes.
|
|
*/
|
|
if ((t || flags) && buflen == sizeof(sha256)) {
|
|
/* Compute ClientKey */
|
|
|
|
{
|
|
HMAC_SHA256_CTX_Y ctx;
|
|
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
|
|
HMAC_SHA256_Update_Y(&ctx, salt, saltlen);
|
|
HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx);
|
|
}
|
|
/* Compute StoredKey */
|
|
{
|
|
SHA256_CTX_Y ctx;
|
|
SHA256_Init_Y(&ctx);
|
|
SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256));
|
|
SHA256_Final_Y(buf, &ctx);
|
|
}
|
|
}
|
|
|
|
if (free_region(&tmp))
|
|
return -1;
|
|
|
|
/* Success! */
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
yescrypt_kdf_old(const yescrypt_shared_t * shared, yescrypt_local_t * local,
|
|
const uint8_t * passwd, size_t passwdlen,
|
|
const uint8_t * salt, size_t saltlen,
|
|
uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags,
|
|
uint8_t * buf, size_t buflen)
|
|
{
|
|
yescrypt_region_t tmp;
|
|
uint64_t NROM;
|
|
size_t B_size, V_size, XY_size, need;
|
|
uint64_t * B, *V, *XY, *S;
|
|
uint64_t sha256[4];
|
|
|
|
/*
|
|
* YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose,
|
|
* so don't let it have side-effects. Without this adjustment, it'd
|
|
* enable the SHA-256 password pre-hashing and output post-hashing,
|
|
* because any deviation from classic scrypt implies those.
|
|
*/
|
|
if (p == 1)
|
|
flags &= ~YESCRYPT_PARALLEL_SMIX;
|
|
|
|
/* Sanity-check parameters */
|
|
if (flags & ~YESCRYPT_KNOWN_FLAGS) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#if SIZE_MAX > UINT32_MAX
|
|
if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
#endif
|
|
if ((uint64_t)(r)* (uint64_t)(p) >= (1 << 30)) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#if S_MIN_R > 1
|
|
if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
#endif
|
|
if ((p > SIZE_MAX / ((size_t)256 * r + 64)) ||
|
|
#if SIZE_MAX / 256 <= UINT32_MAX
|
|
(r > SIZE_MAX / 256) ||
|
|
#endif
|
|
(N > SIZE_MAX / 128 / r)) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
if (N > UINT64_MAX / ((uint64_t)t + 1)) {
|
|
errno = EFBIG;
|
|
return -1;
|
|
}
|
|
|
|
if ((flags & YESCRYPT_PWXFORM) &&
|
|
p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
NROM = 0;
|
|
if (shared->shared1.aligned) {
|
|
NROM = shared->shared1.aligned_size / ((size_t)128 * r);
|
|
if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) ||
|
|
!(flags & YESCRYPT_RW)) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Allocate memory */
|
|
V = NULL;
|
|
V_size = (size_t)128 * r * N;
|
|
|
|
need = V_size;
|
|
if (flags & __YESCRYPT_INIT_SHARED) {
|
|
if (local->aligned_size < need) {
|
|
if (local->base || local->aligned ||
|
|
local->base_size || local->aligned_size) {
|
|
errno = EINVAL;
|
|
return -1;
|
|
}
|
|
if (!alloc_region(local, need))
|
|
return -1;
|
|
}
|
|
V = (uint64_t *)local->aligned;
|
|
need = 0;
|
|
}
|
|
B_size = (size_t)128 * r * p;
|
|
need += B_size;
|
|
if (need < B_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
XY_size = (size_t)256 * r + 64;
|
|
|
|
need += XY_size;
|
|
if (need < XY_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
if (flags & YESCRYPT_PWXFORM) {
|
|
size_t S_size = S_SIZE_ALL * sizeof(*S);
|
|
|
|
if (flags & YESCRYPT_PARALLEL_SMIX)
|
|
S_size *= p;
|
|
|
|
need += S_size;
|
|
if (need < S_size) {
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
}
|
|
if (flags & __YESCRYPT_INIT_SHARED) {
|
|
if (!alloc_region(&tmp, need))
|
|
return -1;
|
|
B = (uint64_t *)tmp.aligned;
|
|
XY = (uint64_t *)((uint8_t *)B + B_size);
|
|
}
|
|
else {
|
|
init_region(&tmp);
|
|
if (local->aligned_size < need) {
|
|
if (free_region(local))
|
|
return -1;
|
|
if (!alloc_region(local, need))
|
|
return -1;
|
|
}
|
|
B = (uint64_t *)local->aligned;
|
|
V = (uint64_t *)((uint8_t *)B + B_size);
|
|
XY = (uint64_t *)((uint8_t *)V + V_size);
|
|
}
|
|
S = NULL;
|
|
if (flags & YESCRYPT_PWXFORM)
|
|
S = (uint64_t *)((uint8_t *)XY + XY_size);
|
|
|
|
|
|
if (t || flags) {
|
|
SHA256_CTX_Y ctx;
|
|
SHA256_Init_Y(&ctx);
|
|
SHA256_Update_Y(&ctx, passwd, passwdlen);
|
|
SHA256_Final_Y((uint8_t *)sha256, &ctx);
|
|
passwd = (uint8_t *)sha256;
|
|
passwdlen = sizeof(sha256);
|
|
}
|
|
|
|
/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
|
|
PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, (uint8_t *)B, B_size);
|
|
|
|
|
|
if (t || flags)
|
|
{
|
|
blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0]));
|
|
}
|
|
smix(B, r, N, p, t, flags, V, NROM, shared, XY, S);
|
|
|
|
|
|
/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
|
|
PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen);
|
|
|
|
/*
|
|
* Except when computing classic scrypt, allow all computation so far
|
|
* to be performed on the client. The final steps below match those of
|
|
* SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
|
|
* far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
|
|
* SCRAM's use of SHA-1) would be usable with yescrypt hashes.
|
|
*/
|
|
if ((t || flags) && buflen == sizeof(sha256)) {
|
|
/* Compute ClientKey */
|
|
|
|
{
|
|
HMAC_SHA256_CTX_Y ctx;
|
|
HMAC_SHA256_Init_Y(&ctx, buf, buflen);
|
|
HMAC_SHA256_Update_Y(&ctx, salt, saltlen);
|
|
HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx);
|
|
}
|
|
/* Compute StoredKey */
|
|
{
|
|
SHA256_CTX_Y ctx;
|
|
SHA256_Init_Y(&ctx);
|
|
SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256));
|
|
SHA256_Final_Y(buf, &ctx);
|
|
}
|
|
}
|
|
|
|
if (free_region(&tmp))
|
|
return -1;
|
|
|
|
/* Success! */
|
|
return 0;
|
|
}
|
|
|