diff --git a/Makefile.am b/Makefile.am index 80207b7b..b7f32788 100644 --- a/Makefile.am +++ b/Makefile.am @@ -78,12 +78,10 @@ sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/sia.c algorithm/sia.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h -sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h -sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h sgminer_SOURCES += algorithm/gostcoin.c algorithm/gostcoin.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index 87e59b48..5c6282c8 100644 --- a/algorithm.c +++ b/algorithm.c @@ -36,14 +36,12 @@ #include "algorithm/lyra2re.h" #include "algorithm/lyra2rev2.h" #include "algorithm/pluck.h" -#include "algorithm/yescrypt.h" #include "algorithm/credits.h" #include "algorithm/blake256.h" #include "algorithm/blakecoin.h" #include "algorithm/sia.h" #include "algorithm/decred.h" #include "algorithm/pascal.h" -#include "algorithm/lbry.h" #include "algorithm/sibcoin.h" #include "algorithm/gostcoin.h" @@ -72,16 +70,13 @@ const char *algorithm_type_str[] = { "Neoscrypt", "WhirlpoolX", "Lyra2RE", - "Lyra2REV2" - "Pluck" - "Yescrypt", - "Yescrypt-multi", + "Lyra2REV2", + "Pluck", "Blakecoin", "Blake", "Sia", "Decred", "Vanilla", - "Lbry", "Sibcoin", "Gostcoin" }; @@ -284,100 +279,6 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_ return status; } -static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) -{ - cl_kernel *kernel = &clState->kernel; - unsigned int num = 0; - cl_uint le_target; - cl_int status = 0; - - -// le_target = (*(cl_uint *)(blk->work->device_target + 28)); - le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); -// le_target = (cl_uint)((uint32_t *)blk->work->target)[7]; - - -// memcpy(clState->cldata, blk->work->data, 80); - flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); - - CL_SET_ARG(clState->CLbuffer0); - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(clState->padbuffer8); - CL_SET_ARG(clState->buffer1); - CL_SET_ARG(clState->buffer2); - CL_SET_ARG(le_target); - - return status; -} - -static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) -{ -// cl_kernel *kernel = &clState->kernel; - cl_kernel *kernel; - unsigned int num = 0; - cl_uint le_target; - cl_int status = 0; - - - // le_target = (*(cl_uint *)(blk->work->device_target + 28)); - le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]); - memcpy(clState->cldata, blk->work->data, 80); -// flip80(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL); -//pbkdf and initial sha - kernel = &clState->kernel; - - CL_SET_ARG(clState->CLbuffer0); - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(clState->padbuffer8); - CL_SET_ARG(clState->buffer1); - CL_SET_ARG(clState->buffer2); - CL_SET_ARG(clState->buffer3); - CL_SET_ARG(le_target); - -//inactive kernel - num = 0; - kernel = clState->extra_kernels; - CL_SET_ARG_N(0,clState->buffer1); - CL_SET_ARG_N(1,clState->buffer2); -// CL_SET_ARG_N(3, clState->buffer3); - -//mix2_2 - num = 0; - CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); - CL_SET_ARG_N(1,clState->buffer1); - CL_SET_ARG_N(2,clState->buffer2); - //mix2_2 -//inactive kernel - num = 0; - CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); - CL_SET_ARG_N(1, clState->buffer2); - //mix2_2 - - num = 0; - CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8); - CL_SET_ARG_N(1, clState->buffer1); - CL_SET_ARG_N(2, clState->buffer2); - - //inactive kernel - num = 0; - CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1); - CL_SET_ARG_N(1, clState->buffer2); - //mix2_2 - - -//pbkdf and finalization - num=0; - CL_NEXTKERNEL_SET_ARG(clState->CLbuffer0); - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(clState->buffer2); - CL_SET_ARG(clState->buffer3); - CL_SET_ARG(le_target); - - return status; -} - static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -1127,31 +1028,6 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u return status; } -static cl_int queue_lbry_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) -{ - cl_kernel *kernel = &clState->kernel; - unsigned int num = 0; - cl_ulong le_target; - cl_int status = 0; - - le_target = *(cl_ulong *)(blk->work->target + 24); - flip112(clState->cldata, blk->work->data); - status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 112, clState->cldata, 0, NULL, NULL); - - CL_SET_ARG(clState->CLbuffer0); - CL_SET_ARG(clState->padbuffer8); - num = 0; - kernel = clState->extra_kernels; - CL_SET_ARG_0(clState->padbuffer8); - num = 0; - - CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); - CL_SET_ARG(clState->outputBuffer); - CL_SET_ARG(le_target); - - return status; -} - static algorithm_settings_t algos[] = { // kernels starting from this will have difficulty calculated by using litecoin algorithm #define A_SCRYPT(a) \ @@ -1185,16 +1061,6 @@ static algorithm_settings_t algos[] = { A_DECRED("decred"), #undef A_DECRED -#define A_YESCRYPT(a) \ - { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} - A_YESCRYPT("yescrypt"), -#undef A_YESCRYPT - -#define A_YESCRYPT_MULTI(a) \ - { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options} - A_YESCRYPT_MULTI("yescrypt-multi"), -#undef A_YESCRYPT_MULTI - // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } @@ -1253,8 +1119,6 @@ static algorithm_settings_t algos[] = { { "sia", ALGO_SIA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 128, 0, sia_regenhash, NULL, NULL, queue_sia_kernel, NULL, NULL }, { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, - { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, - { "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL }, diff --git a/algorithm/lbry.c b/algorithm/lbry.c deleted file mode 100644 index 163c2e73..00000000 --- a/algorithm/lbry.c +++ /dev/null @@ -1,60 +0,0 @@ -#include "config.h" -#include "miner.h" - -#include -#include -#include - -#include "sph/sph_sha2.h" -#include "sph/sph_ripemd.h" - -typedef struct { - sph_sha256_context sha256; - sph_sha512_context sha512; - sph_ripemd160_context ripemd; -} lbryhash_context_holder; - -void lbryhash(void* output, const void* input) -{ - uint32_t hashA[16], hashB[16], hashC[16]; - lbryhash_context_holder ctx; - - sph_sha256_init(&ctx.sha256); - sph_sha512_init(&ctx.sha512); - sph_ripemd160_init(&ctx.ripemd); - - sph_sha256 (&ctx.sha256, input, 112); - sph_sha256_close(&ctx.sha256, hashA); - - sph_sha256 (&ctx.sha256, hashA, 32); - sph_sha256_close(&ctx.sha256, hashA); - - sph_sha512 (&ctx.sha512, hashA, 32); - sph_sha512_close(&ctx.sha512, hashA); - - sph_ripemd160 (&ctx.ripemd, hashA, 32); - sph_ripemd160_close(&ctx.ripemd, hashB); - - sph_ripemd160 (&ctx.ripemd, hashA+8, 32); - sph_ripemd160_close(&ctx.ripemd, hashC); - - sph_sha256 (&ctx.sha256, hashB, 20); - sph_sha256 (&ctx.sha256, hashC, 20); - sph_sha256_close(&ctx.sha256, hashA); - - sph_sha256 (&ctx.sha256, hashA, 32); - sph_sha256_close(&ctx.sha256, hashA); - - memcpy(output, hashA, 32); -} - -void lbry_regenhash(struct work *work) -{ - uint32_t data[28]; - uint32_t *nonce = (uint32_t *)(work->data + 108); - uint32_t *ohash = (uint32_t *)(work->hash); - - be32enc_vect(data, (const uint32_t *)work->data, 27); - data[27] = htobe32(*nonce); - lbryhash(ohash, data); -} diff --git a/algorithm/lbry.h b/algorithm/lbry.h deleted file mode 100644 index c9bbdbdf..00000000 --- a/algorithm/lbry.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LBRY_H -#define LBRY_H - -#include "miner.h" - -extern void lbry_regenhash(struct work *work); - -#endif diff --git a/algorithm/yescrypt-opt.c b/algorithm/yescrypt-opt.c deleted file mode 100644 index 6adef7e7..00000000 --- a/algorithm/yescrypt-opt.c +++ /dev/null @@ -1,1364 +0,0 @@ -/*- - * Copyright 2009 Colin Percival - * Copyright 2013,2014 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ - -#ifdef __i386__ -#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Enable at least SSE2 in the C compiler and use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (portability to older CPUs or testing)." -#elif defined(__x86_64__) -#warning "This implementation does not use SIMD, and thus it runs a lot slower than the SIMD-enabled implementation. Use yescrypt-best.c instead unless you're building this SIMD-less implementation on purpose (for testing only)." -#endif - -#include -#include -#include -#include "algorithm/yescrypt_core.h" -#include "sph/sha256_Y.h" -#include "algorithm/sysendian.h" - -// #include "sph/yescrypt-platform.c" -#define HUGEPAGE_THRESHOLD (12 * 1024 * 1024) - -#ifdef __x86_64__ -#define HUGEPAGE_SIZE (2 * 1024 * 1024) -#else -#undef HUGEPAGE_SIZE -#endif - - -static void * -alloc_region(yescrypt_region_t * region, size_t size) -{ - size_t base_size = size; - uint8_t * base, *aligned; -#ifdef MAP_ANON - int flags = -#ifdef MAP_NOCORE - MAP_NOCORE | -#endif - MAP_ANON | MAP_PRIVATE; -#if defined(MAP_HUGETLB) && defined(HUGEPAGE_SIZE) - size_t new_size = size; - const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1; - if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) { - flags |= MAP_HUGETLB; - /* - * Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of - * huge page size, so let's round up to huge page size here. - */ - new_size = size + hugepage_mask; - new_size &= ~hugepage_mask; - } - base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (base != MAP_FAILED) { - base_size = new_size; - } - else - if (flags & MAP_HUGETLB) { - flags &= ~MAP_HUGETLB; - base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); - } - -#else - base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); -#endif - if (base == MAP_FAILED) - base = NULL; - aligned = base; -#elif defined(HAVE_POSIX_MEMALIGN) - if ((errno = posix_memalign((void **)&base, 64, size)) != 0) - base = NULL; - aligned = base; -#else - base = aligned = NULL; - if (size + 63 < size) { - errno = ENOMEM; - } - else if ((base = (uint8_t *)malloc(size + 63)) != NULL) { - aligned = base + 63; - aligned -= (uintptr_t)aligned & 63; - } -#endif - region->base = base; - region->aligned = aligned; - region->base_size = base ? base_size : 0; - region->aligned_size = base ? size : 0; - return aligned; -} - -static void init_region(yescrypt_region_t * region) -{ - region->base = region->aligned = NULL; - region->base_size = region->aligned_size = 0; -} - -static int -free_region(yescrypt_region_t * region) -{ - if (region->base) { -#ifdef MAP_ANON - if (munmap(region->base, region->base_size)) - return -1; -#else - free(region->base); -#endif - } - init_region(region); - return 0; -} - -int -yescrypt_init_shared(yescrypt_shared_t * shared, -const uint8_t * param, size_t paramlen, -uint64_t N, uint32_t r, uint32_t p, -yescrypt_init_shared_flags_t flags, uint32_t mask, -uint8_t * buf, size_t buflen) -{ - yescrypt_shared1_t * shared1 = &shared->shared1; - yescrypt_shared_t dummy, half1, half2; - // yescrypt_shared_t * half2; - uint8_t salt[32]; - - if (flags & YESCRYPT_SHARED_PREALLOCATED) { - if (!shared1->aligned || !shared1->aligned_size) - return -1; - } - else { - init_region(shared1); - } - shared->mask1 = 1; - if (!param && !paramlen && !N && !r && !p && !buf && !buflen) - return 0; - - init_region(&dummy.shared1); - dummy.mask1 = 1; - if (yescrypt_kdf(&dummy, shared1, - param, paramlen, NULL, 0, N, r, p, 0, - YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - salt, sizeof(salt))) - goto out; - - half1 = half2 = *shared; - half1.shared1.aligned_size /= 2; - half2.shared1.aligned_size = half1.shared1.aligned_size; - half2.shared1.aligned = (char*)half2.shared1.aligned + half1.shared1.aligned_size; - - N /= 2; - - if (p > 1 && yescrypt_kdf(&half1, &half2.shared1, - param, paramlen, salt, sizeof(salt), N, r, p, 0, - YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2, - salt, sizeof(salt))) - goto out; - - if (yescrypt_kdf(&half2, &half1.shared1, - param, paramlen, salt, sizeof(salt), N, r, p, 0, - YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - salt, sizeof(salt))) - goto out; - - if (yescrypt_kdf(&half1, &half2.shared1, - param, paramlen, salt, sizeof(salt), N, r, p, 0, - YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - buf, buflen)) - goto out; - - shared->mask1 = mask; - - return 0; - -out: - if (!(flags & YESCRYPT_SHARED_PREALLOCATED)) - free_region(shared1); - return -1; -} - -int -yescrypt_free_shared(yescrypt_shared_t * shared) -{ - return free_region(&shared->shared1); -} - -int -yescrypt_init_local(yescrypt_local_t * local) -{ - init_region(local); - return 0; -} - -int -yescrypt_free_local(yescrypt_local_t * local) -{ - return free_region(local); -} - - -static void -blkcpy(uint64_t * dest, const uint64_t * src, size_t count) -{ - do { - *dest++ = *src++; *dest++ = *src++; - *dest++ = *src++; *dest++ = *src++; - } while (count -= 4); -}; - -static void -blkxor(uint64_t * dest, const uint64_t * src, size_t count) -{ - do { - *dest++ ^= *src++; *dest++ ^= *src++; - *dest++ ^= *src++; *dest++ ^= *src++; - } while (count -= 4); -}; - -typedef union { - uint32_t w[16]; - uint64_t d[8]; -} salsa20_blk_t; - -static void -salsa20_simd_shuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout) -{ -#define COMBINE(out, in1, in2) \ - Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); - COMBINE(0, 0, 2) - COMBINE(1, 5, 7) - COMBINE(2, 2, 4) - COMBINE(3, 7, 1) - COMBINE(4, 4, 6) - COMBINE(5, 1, 3) - COMBINE(6, 6, 0) - COMBINE(7, 3, 5) -#undef COMBINE -} - -static void -salsa20_simd_unshuffle(const salsa20_blk_t * Bin, salsa20_blk_t * Bout) -{ -#define COMBINE(out, in1, in2) \ - Bout->w[out * 2] = Bin->d[in1]; \ - Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; - COMBINE(0, 0, 6) - COMBINE(1, 5, 3) - COMBINE(2, 2, 0) - COMBINE(3, 7, 5) - COMBINE(4, 4, 2) - COMBINE(5, 1, 7) - COMBINE(6, 6, 4) - COMBINE(7, 3, 1) -#undef COMBINE -} - -/** - * salsa20_8(B): - * Apply the salsa20/8 core to the provided block. - */ - -static void -salsa20_8(uint64_t B[8]) -{ - size_t i; - salsa20_blk_t X; - -#define x X.w - - salsa20_simd_unshuffle((const salsa20_blk_t *)B, &X); - - for (i = 0; i < 8; i += 2) { -#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) - /* Operate on columns */ - x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); - x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); - - x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); - x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); - - x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); - x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); - - x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); - x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); - - /* Operate on rows */ - x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); - x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); - - x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); - x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); - - x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); - x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); - - x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); - x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); -#undef R - } -#undef x - - { - salsa20_blk_t Y; - salsa20_simd_shuffle(&X, &Y); - for (i = 0; i < 16; i += 4) { - ((salsa20_blk_t *)B)->w[i] += Y.w[i]; - ((salsa20_blk_t *)B)->w[i + 1] += Y.w[i + 1]; - ((salsa20_blk_t *)B)->w[i + 2] += Y.w[i + 2]; - ((salsa20_blk_t *)B)->w[i + 3] += Y.w[i + 3]; - } - } -} - -/** - * blockmix_salsa8(Bin, Bout, X, r): - * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r - * bytes in length; the output Bout must also be the same size. The - * temporary space X must be 64 bytes. - */ -static void -blockmix_salsa8(const uint64_t * Bin, uint64_t * Bout, uint64_t * X, size_t r) -{ - size_t i; - - /* 1: X <-- B_{2r - 1} */ - blkcpy(X, &Bin[(2 * r - 1) * 8], 8); - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < 2 * r; i += 2) { - /* 3: X <-- H(X \xor B_i) */ - blkxor(X, &Bin[i * 8], 8); - salsa20_8(X); - - /* 4: Y_i <-- X */ - /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - blkcpy(&Bout[i * 4], X, 8); - - /* 3: X <-- H(X \xor B_i) */ - blkxor(X, &Bin[i * 8 + 8], 8); - salsa20_8(X); - - /* 4: Y_i <-- X */ - /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - blkcpy(&Bout[i * 4 + r * 8], X, 8); - } - -} - -/* These are tunable */ -#define S_BITS 8 -#define S_SIMD 2 -#define S_P 4 -#define S_ROUNDS 6 - -/* Number of S-boxes. Not tunable, hard-coded in a few places. */ -#define S_N 2 - -/* Derived values. Not tunable on their own. */ -#define S_SIZE1 (1 << S_BITS) -#define S_MASK ((S_SIZE1 - 1) * S_SIMD * 8) -#define S_MASK2 (((uint64_t)S_MASK << 32) | S_MASK) -#define S_SIZE_ALL (S_N * S_SIZE1 * S_SIMD) -#define S_P_SIZE (S_P * S_SIMD) -#define S_MIN_R ((S_P * S_SIMD + 15) / 16) - -/** - * pwxform(B): - * Transform the provided block using the provided S-boxes. - */ - -static void -block_pwxform(uint64_t * B, const uint64_t * S) -{ - uint64_t(*X)[S_SIMD] = (uint64_t(*)[S_SIMD])B; - const uint8_t *S0 = (const uint8_t *)S; - const uint8_t *S1 = (const uint8_t *)(S + S_SIZE1 * S_SIMD); - size_t i, j; - - for (j = 0; j < S_P; j++) { - - uint64_t *Xj = X[j]; - uint64_t x0 = Xj[0]; - uint64_t x1 = Xj[1]; - - for (i = 0; i < S_ROUNDS; i++) { - uint64_t x = x0 & S_MASK2; - const uint64_t *p0, *p1; - - p0 = (const uint64_t *)(S0 + (uint32_t)x); - p1 = (const uint64_t *)(S1 + (x >> 32)); - - x0 = (uint64_t)(x0 >> 32) * (uint32_t)x0; - x0 += p0[0]; - x0 ^= p1[0]; - - x1 = (uint64_t)(x1 >> 32) * (uint32_t)x1; - x1 += p0[1]; - x1 ^= p1[1]; - } - Xj[0] = x0; - Xj[1] = x1; - } - - - -} - - -/** - * blockmix_pwxform(Bin, Bout, S, r): - * Compute Bout = BlockMix_pwxform{salsa20/8, S, r}(Bin). The input Bin must - * be 128r bytes in length; the output Bout must also be the same size. - * - * S lacks const qualifier to match blockmix_salsa8()'s prototype, which we - * need to refer to both functions via the same function pointers. - */ -static void -blockmix_pwxform(const uint64_t * Bin, uint64_t * Bout, uint64_t * S, size_t r) -{ - size_t r1, r2, i; - // S_P_SIZE = 8; - /* Convert 128-byte blocks to (S_P_SIZE * 64-bit) blocks */ - - r1 = r * 128 / (S_P_SIZE * 8); - /* X <-- B_{r1 - 1} */ - blkcpy(Bout, &Bin[(r1 - 1) * S_P_SIZE], S_P_SIZE); - - /* X <-- X \xor B_i */ - blkxor(Bout, Bin, S_P_SIZE); - - /* X <-- H'(X) */ - /* B'_i <-- X */ - block_pwxform(Bout, S); - - /* for i = 0 to r1 - 1 do */ - for (i = 1; i < r1; i++) { - /* X <-- X \xor B_i */ - blkcpy(&Bout[i * S_P_SIZE], &Bout[(i - 1) * S_P_SIZE],S_P_SIZE); - blkxor(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE], S_P_SIZE); - - /* X <-- H'(X) */ - /* B'_i <-- X */ - block_pwxform(&Bout[i * S_P_SIZE], S); - } - - /* Handle partial blocks */ - if (i * S_P_SIZE < r * 16) { - blkcpy(&Bout[i * S_P_SIZE], &Bin[i * S_P_SIZE],r * 16 - i * S_P_SIZE); -} - - i = (r1 - 1) * S_P_SIZE / 8; - /* Convert 128-byte blocks to 64-byte blocks */ - r2 = r * 2; - - /* B'_i <-- H(B'_i) */ - salsa20_8(&Bout[i * 8]); - - - i++; -/// not used yescrypt - - for (; i < r2; i++) { - /* B'_i <-- H(B'_i \xor B'_{i-1}) */ - blkxor(&Bout[i * 8], &Bout[(i - 1) * 8], 8); - salsa20_8(&Bout[i * 8]); - } -} - - - -/** - * integerify(B, r): - * Return the result of parsing B_{2r-1} as a little-endian integer. - */ -static uint64_t -integerify(const uint64_t * B, size_t r) -{ -/* - * Our 64-bit words are in host byte order, and word 6 holds the second 32-bit - * word of B_{2r-1} due to SIMD shuffling. The 64-bit value we return is also - * in host byte order, as it should be. - */ - const uint64_t * X = &B[(2 * r - 1) * 8]; - uint32_t lo = X[0]; - uint32_t hi = X[6] >> 32; - return ((uint64_t)hi << 32) + lo; -} - -/** - * smix1(B, r, N, flags, V, NROM, shared, XY, S): - * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage XY must be 256r + 64 bytes in length. The value N must be even and - * no smaller than 2. - */ -static void -smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags, - uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, - uint64_t * XY, uint64_t * S) -{ - void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; - uint32_t VROM_mask = shared->mask1; - size_t s = 16 * r; - uint64_t * X = V; - uint64_t * Y = &XY[s]; - uint64_t * Z = S ? S : &XY[2 * s]; - uint64_t n, i, j; - size_t k; - - /* 1: X <-- B */ - /* 3: V_i <-- X */ - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8]; - salsa20_blk_t *tmp = (salsa20_blk_t *)Y; - salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8]; - for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); - - salsa20_simd_shuffle(tmp, dst); - } - - /* 4: X <-- H(X) */ - /* 3: V_i <-- X */ - - blockmix(X, Y, Z, r); - blkcpy(&V[s], Y, s); - X = XY; - - if (NROM && (VROM_mask & 1)) { - if ((1 & VROM_mask) == 1) { - /* j <-- Integerify(X) mod NROM */ - j = integerify(Y, r) & (NROM - 1); - - /* X <-- H(X \xor VROM_j) */ - blkxor(Y, &VROM[j * s], s); - } - - blockmix(Y, X, Z, r); - - - /* 2: for i = 0 to N - 1 do */ - for (n = 1, i = 2; i < N; i += 2) { - /* 3: V_i <-- X */ - blkcpy(&V[i * s], X, s); - - if ((i & (i - 1)) == 0) - n <<= 1; - - /* j <-- Wrap(Integerify(X), i) */ - j = integerify(X, r) & (n - 1); - j += i - n; - - /* X <-- X \xor V_j */ - blkxor(X, &V[j * s], s); - - /* 4: X <-- H(X) */ - blockmix(X, Y, Z, r); - - /* 3: V_i <-- X */ - blkcpy(&V[(i + 1) * s], Y, s); - - j = integerify(Y, r); - if (((i + 1) & VROM_mask) == 1) { - /* j <-- Integerify(X) mod NROM */ - j &= NROM - 1; - - /* X <-- H(X \xor VROM_j) */ - blkxor(Y, &VROM[j * s], s); - } else { - /* j <-- Wrap(Integerify(X), i) */ - j &= n - 1; - j += i + 1 - n; - - /* X <-- H(X \xor V_j) */ - blkxor(Y, &V[j * s], s); - } - - blockmix(Y, X, Z, r); - } - } else { - yescrypt_flags_t rw = flags & YESCRYPT_RW; - /* 4: X <-- H(X) */ - blockmix(Y, X, Z, r); - - /* 2: for i = 0 to N - 1 do */ - for (n = 1, i = 2; i < N; i += 2) { - /* 3: V_i <-- X */ - blkcpy(&V[i * s], X, s); - - if (rw) { - if ((i & (i - 1)) == 0) - n <<= 1; - - /* j <-- Wrap(Integerify(X), i) */ - j = integerify(X, r) & (n - 1); - j += i - n; - - /* X <-- X \xor V_j */ - blkxor(X, &V[j * s], s); - } - - /* 4: X <-- H(X) */ - blockmix(X, Y, Z, r); - - /* 3: V_i <-- X */ - blkcpy(&V[(i + 1) * s], Y, s); - - if (rw) { - /* j <-- Wrap(Integerify(X), i) */ - j = integerify(Y, r) & (n - 1); - j += (i + 1) - n; - - - /* X <-- X \xor V_j */ - blkxor(Y, &V[j * s], s); - } - - /* 4: X <-- H(X) */ - blockmix(Y, X, Z, r); - } - } - - /* B' <-- X */ - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8]; - salsa20_blk_t *tmp = (salsa20_blk_t *)Y; - salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8]; - for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); - salsa20_simd_unshuffle(tmp, dst); - } -} - - - -/** - * smix2(B, r, N, Nloop, flags, V, NROM, shared, XY, S): - * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage XY must be 256r + 64 bytes in length. The value N must be a - * power of 2 greater than 1. The value Nloop must be even. - */ -static void -smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop, - yescrypt_flags_t flags, - uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, - uint64_t * XY, uint64_t * S) -{ - - void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = - (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; - uint32_t VROM_mask = shared->mask1 | 1; - size_t s = 16 * r; - yescrypt_flags_t rw = flags & YESCRYPT_RW; - uint64_t * X = XY; - uint64_t * Y = &XY[s]; - uint64_t * Z = S ? S : &XY[2 * s]; - uint64_t i, j; - size_t k; - - if (Nloop == 0) - return; - - /* X <-- B' */ - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = (const salsa20_blk_t *)&B[i * 8]; - salsa20_blk_t *tmp = (salsa20_blk_t *)Y; - salsa20_blk_t *dst = (salsa20_blk_t *)&X[i * 8]; - for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); - salsa20_simd_shuffle(tmp, dst); - } - if (NROM) { - - /* 6: for i = 0 to N - 1 do */ - for (i = 0; i < Nloop; i += 2) { - /* 7: j <-- Integerify(X) mod N */ - j = integerify(X, r) & (N - 1); - - /* 8: X <-- H(X \xor V_j) */ - blkxor(X, &V[j * s], s); - /* V_j <-- Xprev \xor V_j */ - if (rw) - blkcpy(&V[j * s], X, s); - blockmix(X, Y, Z, r); - - j = integerify(Y, r); - if (((i + 1) & VROM_mask) == 1) { - /* j <-- Integerify(X) mod NROM */ - j &= NROM - 1; - - /* X <-- H(X \xor VROM_j) */ - blkxor(Y, &VROM[j * s], s); - } else { - /* 7: j <-- Integerify(X) mod N */ - j &= N - 1; - - /* 8: X <-- H(X \xor V_j) */ - blkxor(Y, &V[j * s], s); - /* V_j <-- Xprev \xor V_j */ - if (rw) - blkcpy(&V[j * s], Y, s); - } - - blockmix(Y, X, Z, r); - } - } else { - - /* 6: for i = 0 to N - 1 do */ - i = Nloop / 2; - do { - /* 7: j <-- Integerify(X) mod N */ - j = integerify(X, r) & (N - 1); - - /* 8: X <-- H(X \xor V_j) */ - blkxor(X, &V[j * s], s); - /* V_j <-- Xprev \xor V_j */ - if (rw) - blkcpy(&V[j * s], X, s); - blockmix(X, Y, Z, r); - - /* 7: j <-- Integerify(X) mod N */ - j = integerify(Y, r) & (N - 1); - - /* 8: X <-- H(X \xor V_j) */ - blkxor(Y, &V[j * s], s); - /* V_j <-- Xprev \xor V_j */ - if (rw) - blkcpy(&V[j * s], Y, s); - blockmix(Y, X, Z, r); - } while (--i); - } - - /* 10: B' <-- X */ - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = (const salsa20_blk_t *)&X[i * 8]; - salsa20_blk_t *tmp = (salsa20_blk_t *)Y; - salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 8]; - for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); - salsa20_simd_unshuffle(tmp, dst); - } -} - - - - -/** - * p2floor(x): - * Largest power of 2 not greater than argument. - */ -static uint64_t -p2floor(uint64_t x) -{ - uint64_t y; - while ((y = x & (x - 1))) - x = y; - return x; -} - -/** - * smix(B, r, N, p, t, flags, V, NROM, shared, XY, S): - * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the - * temporary storage V must be 128rN bytes in length; the temporary storage - * XY must be 256r+64 or (256r+64)*p bytes in length (the larger size is - * required with OpenMP-enabled builds). The value N must be a power of 2 - * greater than 1. - */ -static void -smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, - yescrypt_flags_t flags, - uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, - uint64_t * XY, uint64_t * S) -{ - size_t s = 16 * r; - uint64_t Nchunk = N / p, Nloop_all, Nloop_rw; - uint32_t i; - - Nloop_all = Nchunk; - if (flags & YESCRYPT_RW) { - if (t <= 1) { - if (t) - Nloop_all *= 2; /* 2/3 */ - Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ - } else { - Nloop_all *= t - 1; - } - } else if (t) { - if (t == 1) - Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ - Nloop_all *= t; - } - - Nloop_rw = 0; - if (flags & __YESCRYPT_INIT_SHARED) - Nloop_rw = Nloop_all; - else if (flags & YESCRYPT_RW) - Nloop_rw = Nloop_all / p; - - Nchunk &= ~(uint64_t)1; /* round down to even */ - Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ - Nloop_rw &= ~(uint64_t)1; /* round down to even */ - - - for (i = 0; i < p; i++) { - uint64_t Vchunk = i * Nchunk; - uint64_t * Bp = &B[i * s]; - uint64_t * Vp = &V[Vchunk * s]; - uint64_t * XYp = XY; - - uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); - uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; - - if (Sp) - smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); - - - - if (!(flags & __YESCRYPT_INIT_SHARED_2)) - smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp); - - - smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp); - - - - } - if (Nloop_all > Nloop_rw) { - - for (i = 0; i < p; i++) { - uint64_t * Bp = &B[i * s]; - - uint64_t * XYp = XY; - - uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; - smix2(Bp, r, N, Nloop_all - Nloop_rw,flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp); - - } - } - - - - -} - -static void -smix_old(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, -yescrypt_flags_t flags, -uint64_t * V, uint64_t NROM, const yescrypt_shared_t * shared, -uint64_t * XY, uint64_t * S) -{ - size_t s = 16 * r; - uint64_t Nchunk = N / p, Nloop_all, Nloop_rw; - uint32_t i; - - Nloop_all = Nchunk; - if (flags & YESCRYPT_RW) { - if (t <= 1) { - if (t) - Nloop_all *= 2; /* 2/3 */ - Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ - } - else { - Nloop_all *= t - 1; - } - } - else if (t) { - if (t == 1) - Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ - Nloop_all *= t; - } - - Nloop_rw = 0; - if (flags & __YESCRYPT_INIT_SHARED) - Nloop_rw = Nloop_all; - else if (flags & YESCRYPT_RW) - Nloop_rw = Nloop_all / p; - - Nchunk &= ~(uint64_t)1; /* round down to even */ - Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ - Nloop_rw &= ~(uint64_t)1; /* round down to even */ - - - for (i = 0; i < p; i++) { - uint64_t Vchunk = i * Nchunk; - uint64_t * Bp = &B[i * s]; - uint64_t * Vp = &V[Vchunk * s]; - uint64_t * XYp = XY; - - uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); - uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; - - if (Sp) { - smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM, Sp, NROM, shared, XYp, NULL); - - - } - if (!(flags & __YESCRYPT_INIT_SHARED_2)) { - smix1(Bp, r, Np, flags, Vp, NROM, shared, XYp, Sp); - } - - - smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, NROM, shared, XYp, Sp); - } - - if (Nloop_all > Nloop_rw) { - - for (i = 0; i < p; i++) { - uint64_t * Bp = &B[i * s]; - - uint64_t * XYp = XY; - - uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; - smix2(Bp, r, N, Nloop_all - Nloop_rw, flags & ~YESCRYPT_RW, V, NROM, shared, XYp, Sp); - } - } -} - -/** - * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, - * N, r, p, t, flags, buf, buflen): - * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, - * p, buflen), or a revision of scrypt as requested by flags and shared, and - * write the result into buf. The parameters r, p, and buflen must satisfy - * r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N must be a power - * of 2 greater than 1. - * - * t controls computation time while not affecting peak memory usage. shared - * and flags may request special modes as described in yescrypt.h. local is - * the thread-local data structure, allowing to preserve and reuse a memory - * allocation across calls, thereby reducing its overhead. - * - * Return 0 on success; or -1 on error. - */ -int -yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, - const uint8_t * passwd, size_t passwdlen, - const uint8_t * salt, size_t saltlen, - uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags, - uint8_t * buf, size_t buflen) -{ - yescrypt_region_t tmp; - uint64_t NROM; - size_t B_size, V_size, XY_size, need; - uint64_t * B, * V, * XY, * S; - uint64_t sha256[4]; - - /* - * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose, - * so don't let it have side-effects. Without this adjustment, it'd - * enable the SHA-256 password pre-hashing and output post-hashing, - * because any deviation from classic scrypt implies those. - */ - if (p == 1) - flags &= ~YESCRYPT_PARALLEL_SMIX; - - /* Sanity-check parameters */ - if (flags & ~YESCRYPT_KNOWN_FLAGS) { - errno = EINVAL; - return -1; - } -#if SIZE_MAX > UINT32_MAX - if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { - errno = EFBIG; - return -1; - } -#endif - if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) { - errno = EFBIG; - return -1; - } - if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) { - errno = EINVAL; - return -1; - } - if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) { - errno = EINVAL; - return -1; - } -#if S_MIN_R > 1 - if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) { - errno = EINVAL; - return -1; - } -#endif - if ((p > SIZE_MAX / ((size_t)256 * r + 64)) || -#if SIZE_MAX / 256 <= UINT32_MAX - (r > SIZE_MAX / 256) || -#endif - (N > SIZE_MAX / 128 / r)) { - errno = ENOMEM; - return -1; - } - if (N > UINT64_MAX / ((uint64_t)t + 1)) { - errno = EFBIG; - return -1; - } - - if ((flags & YESCRYPT_PWXFORM) && - p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) { - errno = ENOMEM; - return -1; - } - - NROM = 0; - if (shared->shared1.aligned) { - NROM = shared->shared1.aligned_size / ((size_t)128 * r); - if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) || - !(flags & YESCRYPT_RW)) { - errno = EINVAL; - return -1; - } - } - - /* Allocate memory */ - V = NULL; - V_size = (size_t)128 * r * N; - - need = V_size; - if (flags & __YESCRYPT_INIT_SHARED) { - if (local->aligned_size < need) { - if (local->base || local->aligned || - local->base_size || local->aligned_size) { - errno = EINVAL; - return -1; - } - if (!alloc_region(local, need)) - return -1; - } - V = (uint64_t *)local->aligned; - need = 0; - } - B_size = (size_t)128 * r * p; - need += B_size; - if (need < B_size) { - errno = ENOMEM; - return -1; - } - XY_size = (size_t)256 * r + 64; - - need += XY_size; - if (need < XY_size) { - errno = ENOMEM; - return -1; - } - if (flags & YESCRYPT_PWXFORM) { - size_t S_size = S_SIZE_ALL * sizeof(*S); - - if (flags & YESCRYPT_PARALLEL_SMIX) - S_size *= p; - - need += S_size; - if (need < S_size) { - errno = ENOMEM; - return -1; - } - } - if (flags & __YESCRYPT_INIT_SHARED) { - if (!alloc_region(&tmp, need)) - return -1; - B = (uint64_t *)tmp.aligned; - XY = (uint64_t *)((uint8_t *)B + B_size); - } else { - init_region(&tmp); - if (local->aligned_size < need) { - if (free_region(local)) - return -1; - if (!alloc_region(local, need)) - return -1; - } - B = (uint64_t *)local->aligned; - V = (uint64_t *)((uint8_t *)B + B_size); - XY = (uint64_t *)((uint8_t *)V + V_size); - } - S = NULL; - if (flags & YESCRYPT_PWXFORM) - S = (uint64_t *)((uint8_t *)XY + XY_size); - - - if (t || flags) { - SHA256_CTX_Y ctx; - SHA256_Init_Y(&ctx); - SHA256_Update_Y(&ctx, passwd, passwdlen); - SHA256_Final_Y((uint8_t *)sha256, &ctx); - passwd = (uint8_t *)sha256; - passwdlen = sizeof(sha256); - } - /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ - PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1,(uint8_t *)B, B_size); - - if (t || flags) - { - blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); - } - if (p == 1 || (flags & YESCRYPT_PARALLEL_SMIX)) { - smix(B, r, N, p, t, flags, V, NROM, shared, XY, S); - } else { - uint32_t i; - /* 2: for i = 0 to p - 1 do */ - for (i = 0; i < p; i++) { - /* 3: B_i <-- MF(B_i, N) */ - smix(&B[(size_t)16 * r * i], r, N, 1, t, flags, V, NROM, shared, XY, S); - } - } - - /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ - - PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen); - /* - * Except when computing classic scrypt, allow all computation so far - * to be performed on the client. The final steps below match those of - * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so - * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of - * SCRAM's use of SHA-1) would be usable with yescrypt hashes. - */ - if ((t || flags) && buflen == sizeof(sha256)) { - /* Compute ClientKey */ - - { - HMAC_SHA256_CTX_Y ctx; - HMAC_SHA256_Init_Y(&ctx, buf, buflen); - HMAC_SHA256_Update_Y(&ctx, salt, saltlen); - HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx); - } - /* Compute StoredKey */ - { - SHA256_CTX_Y ctx; - SHA256_Init_Y(&ctx); - SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256)); - SHA256_Final_Y(buf, &ctx); - } - } - - if (free_region(&tmp)) - return -1; - - /* Success! */ - return 0; -} - -int -yescrypt_kdf_old(const yescrypt_shared_t * shared, yescrypt_local_t * local, -const uint8_t * passwd, size_t passwdlen, -const uint8_t * salt, size_t saltlen, -uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags, -uint8_t * buf, size_t buflen) -{ - yescrypt_region_t tmp; - uint64_t NROM; - size_t B_size, V_size, XY_size, need; - uint64_t * B, *V, *XY, *S; - uint64_t sha256[4]; - - /* - * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose, - * so don't let it have side-effects. Without this adjustment, it'd - * enable the SHA-256 password pre-hashing and output post-hashing, - * because any deviation from classic scrypt implies those. - */ - if (p == 1) - flags &= ~YESCRYPT_PARALLEL_SMIX; - - /* Sanity-check parameters */ - if (flags & ~YESCRYPT_KNOWN_FLAGS) { - errno = EINVAL; - return -1; - } -#if SIZE_MAX > UINT32_MAX - if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { - errno = EFBIG; - return -1; - } -#endif - if ((uint64_t)(r)* (uint64_t)(p) >= (1 << 30)) { - errno = EFBIG; - return -1; - } - if (((N & (N - 1)) != 0) || (N <= 1) || (r < 1) || (p < 1)) { - errno = EINVAL; - return -1; - } - if ((flags & YESCRYPT_PARALLEL_SMIX) && (N / p <= 1)) { - errno = EINVAL; - return -1; - } -#if S_MIN_R > 1 - if ((flags & YESCRYPT_PWXFORM) && (r < S_MIN_R)) { - errno = EINVAL; - return -1; - } -#endif - if ((p > SIZE_MAX / ((size_t)256 * r + 64)) || -#if SIZE_MAX / 256 <= UINT32_MAX - (r > SIZE_MAX / 256) || -#endif - (N > SIZE_MAX / 128 / r)) { - errno = ENOMEM; - return -1; - } - if (N > UINT64_MAX / ((uint64_t)t + 1)) { - errno = EFBIG; - return -1; - } - - if ((flags & YESCRYPT_PWXFORM) && - p > SIZE_MAX / (S_SIZE_ALL * sizeof(*S))) { - errno = ENOMEM; - return -1; - } - - NROM = 0; - if (shared->shared1.aligned) { - NROM = shared->shared1.aligned_size / ((size_t)128 * r); - if (((NROM & (NROM - 1)) != 0) || (NROM <= 1) || - !(flags & YESCRYPT_RW)) { - errno = EINVAL; - return -1; - } - } - - /* Allocate memory */ - V = NULL; - V_size = (size_t)128 * r * N; - - need = V_size; - if (flags & __YESCRYPT_INIT_SHARED) { - if (local->aligned_size < need) { - if (local->base || local->aligned || - local->base_size || local->aligned_size) { - errno = EINVAL; - return -1; - } - if (!alloc_region(local, need)) - return -1; - } - V = (uint64_t *)local->aligned; - need = 0; - } - B_size = (size_t)128 * r * p; - need += B_size; - if (need < B_size) { - errno = ENOMEM; - return -1; - } - XY_size = (size_t)256 * r + 64; - - need += XY_size; - if (need < XY_size) { - errno = ENOMEM; - return -1; - } - if (flags & YESCRYPT_PWXFORM) { - size_t S_size = S_SIZE_ALL * sizeof(*S); - - if (flags & YESCRYPT_PARALLEL_SMIX) - S_size *= p; - - need += S_size; - if (need < S_size) { - errno = ENOMEM; - return -1; - } - } - if (flags & __YESCRYPT_INIT_SHARED) { - if (!alloc_region(&tmp, need)) - return -1; - B = (uint64_t *)tmp.aligned; - XY = (uint64_t *)((uint8_t *)B + B_size); - } - else { - init_region(&tmp); - if (local->aligned_size < need) { - if (free_region(local)) - return -1; - if (!alloc_region(local, need)) - return -1; - } - B = (uint64_t *)local->aligned; - V = (uint64_t *)((uint8_t *)B + B_size); - XY = (uint64_t *)((uint8_t *)V + V_size); - } - S = NULL; - if (flags & YESCRYPT_PWXFORM) - S = (uint64_t *)((uint8_t *)XY + XY_size); - - - if (t || flags) { - SHA256_CTX_Y ctx; - SHA256_Init_Y(&ctx); - SHA256_Update_Y(&ctx, passwd, passwdlen); - SHA256_Final_Y((uint8_t *)sha256, &ctx); - passwd = (uint8_t *)sha256; - passwdlen = sizeof(sha256); - } - - /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ - PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, (uint8_t *)B, B_size); - - - if (t || flags) - { - blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); - } - smix(B, r, N, p, t, flags, V, NROM, shared, XY, S); - - - /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ - PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen); - - /* - * Except when computing classic scrypt, allow all computation so far - * to be performed on the client. The final steps below match those of - * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so - * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of - * SCRAM's use of SHA-1) would be usable with yescrypt hashes. - */ - if ((t || flags) && buflen == sizeof(sha256)) { - /* Compute ClientKey */ - - { - HMAC_SHA256_CTX_Y ctx; - HMAC_SHA256_Init_Y(&ctx, buf, buflen); - HMAC_SHA256_Update_Y(&ctx, salt, saltlen); - HMAC_SHA256_Final_Y((uint8_t *)sha256, &ctx); - } - /* Compute StoredKey */ - { - SHA256_CTX_Y ctx; - SHA256_Init_Y(&ctx); - SHA256_Update_Y(&ctx, (uint8_t *)sha256, sizeof(sha256)); - SHA256_Final_Y(buf, &ctx); - } - } - - if (free_region(&tmp)) - return -1; - - /* Success! */ - return 0; -} - diff --git a/algorithm/yescrypt.c b/algorithm/yescrypt.c deleted file mode 100644 index 31e0c623..00000000 --- a/algorithm/yescrypt.c +++ /dev/null @@ -1,119 +0,0 @@ -/*- - * Copyright 2015 djm34 - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "config.h" -#include "miner.h" - -#include -#include -#include - -#include "algorithm/yescrypt_core.h" - -static const uint32_t diff1targ = 0x0000ffff; - -/* Used externally as confirmation of correct OCL code */ -int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) -{ - uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); - uint32_t data[20], ohash[8]; - - be32enc_vect(data, (const uint32_t *)pdata, 19); - data[19] = htobe32(nonce); - yescrypt_hash((unsigned char*)data,(unsigned char*)ohash); - - tmp_hash7 = be32toh(ohash[7]); - - applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", - (long unsigned int)Htarg, - (long unsigned int)diff1targ, - (long unsigned int)tmp_hash7); - - if (tmp_hash7 > diff1targ) - return -1; - - if (tmp_hash7 > Htarg) - return 0; - - return 1; -} - -void yescrypt_regenhash(struct work *work) -{ - uint32_t data[20]; - uint32_t *nonce = (uint32_t *)(work->data + 76); - uint32_t *ohash = (uint32_t *)(work->hash); - - be32enc_vect(data, (const uint32_t *)work->data, 19); - data[19] = htobe32(*nonce); - - yescrypt_hash((unsigned char*)data, (unsigned char*)ohash); - -} - - -bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, - unsigned char *pdata, unsigned char __maybe_unused *phash1, - unsigned char __maybe_unused *phash, const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) -{ - uint32_t *nonce = (uint32_t *)(pdata + 76); - uint32_t data[20]; - uint32_t tmp_hash7; - uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); - bool ret = false; - - be32enc_vect(data, (const uint32_t *)pdata, 19); - - while (1) - { - uint32_t ostate[8]; - - *nonce = ++n; - data[19] = (n); - - yescrypt_hash((unsigned char*)data, (unsigned char*)ostate); - tmp_hash7 = (ostate[7]); - - applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); - - if (unlikely(tmp_hash7 <= Htarg)) - { - ((uint32_t *)pdata)[19] = htobe32(n); - *last_nonce = n; - ret = true; - break; - } - - if (unlikely((n >= max_nonce) || thr->work_restart)) - { - *last_nonce = n; - break; - } - } - - return ret; -} \ No newline at end of file diff --git a/algorithm/yescrypt.h b/algorithm/yescrypt.h deleted file mode 100644 index b51cb495..00000000 --- a/algorithm/yescrypt.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef YESCRYPT_H -#define YESCRYPT_H - -#include "miner.h" -#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar -#define YESCRYP_SECBUF_SIZE (128*64*8) -extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); -extern void yescrypt_regenhash(struct work *work); - -#endif /* YESCRYPT_H */ diff --git a/algorithm/yescrypt_core.h b/algorithm/yescrypt_core.h deleted file mode 100644 index 64b9a11f..00000000 --- a/algorithm/yescrypt_core.h +++ /dev/null @@ -1,376 +0,0 @@ -/*- - * Copyright 2009 Colin Percival - * Copyright 2013,2014 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ -#ifndef _YESCRYPT_H_ -#define _YESCRYPT_H_ - -#include -#include /* for size_t */ -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output); -extern void yescrypt_hash(const unsigned char *input, unsigned char *output); - - - -/** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): - * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, - * p, buflen) and write the result into buf. The parameters r, p, and buflen - * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N - * must be a power of 2 greater than 1. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as buf is local to the thread. - */ -extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen, - const uint8_t * __salt, size_t __saltlen, - uint64_t __N, uint32_t __r, uint32_t __p, - uint8_t * __buf, size_t __buflen); - -/** - * Internal type used by the memory allocator. Please do not use it directly. - * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since - * they might differ from each other in a future version. - */ -typedef struct { - void * base, * aligned; - size_t base_size, aligned_size; -} yescrypt_region_t; - -/** - * Types for shared (ROM) and thread-local (RAM) data structures. - */ -typedef yescrypt_region_t yescrypt_shared1_t; -typedef struct { - yescrypt_shared1_t shared1; - uint32_t mask1; -} yescrypt_shared_t; -typedef yescrypt_region_t yescrypt_local_t; - -/** - * Possible values for yescrypt_init_shared()'s flags argument. - */ -typedef enum { - YESCRYPT_SHARED_DEFAULTS = 0, - YESCRYPT_SHARED_PREALLOCATED = 0x100 -} yescrypt_init_shared_flags_t; - -/** - * Possible values for the flags argument of yescrypt_kdf(), - * yescrypt_gensalt_r(), yescrypt_gensalt(). These may be OR'ed together, - * except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive. - * Please refer to the description of yescrypt_kdf() below for the meaning of - * these flags. - */ -typedef enum { -/* public */ - YESCRYPT_WORM = 0, - YESCRYPT_RW = 1, - YESCRYPT_PARALLEL_SMIX = 2, - YESCRYPT_PWXFORM = 4, -/* private */ - __YESCRYPT_INIT_SHARED_1 = 0x10000, - __YESCRYPT_INIT_SHARED_2 = 0x20000, - __YESCRYPT_INIT_SHARED = 0x30000 -} yescrypt_flags_t; - -#define YESCRYPT_KNOWN_FLAGS \ - (YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \ - __YESCRYPT_INIT_SHARED) - -/** - * yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask, - * buf, buflen): - * Optionally allocate memory for and initialize the shared (ROM) data - * structure. The parameters N, r, and p must satisfy the same conditions as - * with crypto_scrypt(). param and paramlen specify a local parameter with - * which the ROM is seeded. If buf is not NULL, then it is used to return - * buflen bytes of message digest for the initialized ROM (the caller may use - * this to verify that the ROM has been computed in the same way that it was on - * a previous run). - * - * Return 0 on success; or -1 on error. - * - * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the - * ROM is assumed to have been preallocated by the caller, with - * shared->shared1.aligned being the start address of the ROM and - * shared->shared1.aligned_size being its size (which must be consistent with - * N, r, and p). This may be used e.g. when the ROM is to be placed in a SysV - * shared memory segment allocated by the caller. - * - * mask controls the frequency of ROM accesses by yescrypt_kdf(). Normally it - * should be set to 1, to interleave RAM and ROM accesses, which works well - * when both regions reside in the machine's RAM anyway. Other values may be - * used e.g. when the ROM is memory-mapped from a disk file. Recommended mask - * values are powers of 2 minus 1 or minus 2. Here's the effect of some mask - * values: - * mask value ROM accesses in SMix 1st loop ROM accesses in SMix 2nd loop - * 0 0 1/2 - * 1 1/2 1/2 - * 2 0 1/4 - * 3 1/4 1/4 - * 6 0 1/8 - * 7 1/8 1/8 - * 14 0 1/16 - * 15 1/16 1/16 - * 1022 0 1/1024 - * 1023 1/1024 1/1024 - * - * Actual computation of the ROM contents may be avoided, if you don't intend - * to use a ROM but need a dummy shared structure, by calling this function - * with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the - * arguments starting with param and on. - * - * MT-safe as long as shared is local to the thread. - */ -extern int yescrypt_init_shared(yescrypt_shared_t * __shared, - const uint8_t * __param, size_t __paramlen, - uint64_t __N, uint32_t __r, uint32_t __p, - yescrypt_init_shared_flags_t __flags, uint32_t __mask, - uint8_t * __buf, size_t __buflen); - -/** - * yescrypt_free_shared(shared): - * Free memory that had been allocated with yescrypt_init_shared(). - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as shared is local to the thread. - */ -extern int yescrypt_free_shared(yescrypt_shared_t * __shared); - -/** - * yescrypt_init_local(local): - * Initialize the thread-local (RAM) data structure. Actual memory allocation - * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yescrypt_init_local(yescrypt_local_t * __local); - -/** - * yescrypt_free_local(local): - * Free memory that may have been allocated for an initialized thread-local - * (RAM) data structure. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yescrypt_free_local(yescrypt_local_t * __local); - -/** - * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, - * N, r, p, t, flags, buf, buflen): - * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, - * p, buflen), or a revision of scrypt as requested by flags and shared, and - * write the result into buf. The parameters N, r, p, and buflen must satisfy - * the same conditions as with crypto_scrypt(). t controls computation time - * while not affecting peak memory usage. shared and flags may request - * special modes as described below. local is the thread-local data - * structure, allowing to preserve and reuse a memory allocation across calls, - * thereby reducing its overhead. - * - * Return 0 on success; or -1 on error. - * - * t controls computation time. t = 0 is optimal in terms of achieving the - * highest area-time for ASIC attackers. Thus, higher computation time, if - * affordable, is best achieved by increasing N rather than by increasing t. - * However, if the higher memory usage (which goes along with higher N) is not - * affordable, or if fine-tuning of the time is needed (recall that N must be a - * power of 2), then t = 1 or above may be used to increase time while staying - * at the same peak memory usage. t = 1 increases the time by 25% and - * decreases the normalized area-time to 96% of optimal. (Of course, in - * absolute terms the area-time increases with higher t. It's just that it - * would increase slightly more with higher N*r rather than with higher t.) - * t = 2 increases the time by another 20% and decreases the normalized - * area-time to 89% of optimal. Thus, these two values are reasonable to use - * for fine-tuning. Values of t higher than 2 result in further increase in - * time while reducing the efficiency much further (e.g., down to around 50% of - * optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact - * numbers varying by the flags settings). - * - * Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and - * passing a dummy shared structure (see the description of - * yescrypt_init_shared() above for how to produce one). In this mode, the - * thread-local memory region (RAM) is first sequentially written to and then - * randomly read from. This algorithm is friendly towards time-memory - * tradeoffs (TMTO), available both to defenders (albeit not in this - * implementation) and to attackers. - * - * Setting YESCRYPT_RW adds extra random reads and writes to the thread-local - * memory region (RAM), which makes TMTO a lot less efficient. This may be - * used to slow down the kinds of attackers who would otherwise benefit from - * classic scrypt's efficient TMTO. Since classic scrypt's TMTO allows not - * only for the tradeoff, but also for a decrease of attacker's area-time (by - * up to a constant factor), setting YESCRYPT_RW substantially increases the - * cost of attacks in area-time terms as well. Yet another benefit of it is - * that optimal area-time is reached at an earlier time than with classic - * scrypt, and t = 0 actually corresponds to this earlier completion time, - * resulting in quicker hash computations (and thus in higher request rate - * capacity). Due to these properties, YESCRYPT_RW should almost always be - * set, except when compatibility with classic scrypt or TMTO-friendliness are - * desired. - * - * YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a - * lower level as compared to where it is in classic scrypt. This reduces - * flexibility for efficient computation (for both attackers and defenders) by - * requiring that, short of resorting to TMTO, the full amount of memory be - * allocated as needed for the specified p, regardless of whether that - * parallelism is actually being fully made use of or not. (For comparison, a - * single instance of classic scrypt may be computed in less memory without any - * CPU time overhead, but in more real time, by not making full use of the - * parallelism.) This may be desirable when the defender has enough memory - * with sufficiently low latency and high bandwidth for efficient full parallel - * execution, yet the required memory size is high enough that some likely - * attackers might end up being forced to choose between using higher latency - * memory than they could use otherwise (waiting for data longer) or using TMTO - * (waiting for data more times per one hash computation). The area-time cost - * for other kinds of attackers (who would use the same memory type and TMTO - * factor or no TMTO either way) remains roughly the same, given the same - * running time for the defender. In the TMTO-friendly YESCRYPT_WORM mode, as - * long as the defender has enough memory that is just as fast as the smaller - * per-thread regions would be, doesn't expect to ever need greater - * flexibility (except possibly via TMTO), and doesn't need backwards - * compatibility with classic scrypt, there are no other serious drawbacks to - * this setting. In the YESCRYPT_RW mode, which is meant to discourage TMTO, - * this new approach to parallelization makes TMTO less inefficient. (This is - * an unfortunate side-effect of avoiding some random writes, as we have to in - * order to allow for parallel threads to access a common memory region without - * synchronization overhead.) Thus, in this mode this setting poses an extra - * tradeoff of its own (higher area-time cost for a subset of attackers vs. - * better TMTO resistance). Setting YESCRYPT_PARALLEL_SMIX also changes the - * way the running time is to be controlled from N*r*p (for classic scrypt) to - * N*r (in this modification). All of this applies only when p > 1. For - * p = 1, this setting is a no-op. - * - * Passing a real shared structure, with ROM contents previously computed by - * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for - * the thread-local RAM region. In order to allow for initialization of the - * ROM to be split into a separate program, the shared->shared1.aligned and - * shared->shared1.aligned_size fields may be set by the caller of - * yescrypt_kdf() manually rather than with yescrypt_init_shared(). - * - * local must be initialized with yescrypt_init_local(). - * - * MT-safe as long as local and buf are local to the thread. - */ -extern int yescrypt_kdf(const yescrypt_shared_t * __shared, - yescrypt_local_t * __local, - const uint8_t * __passwd, size_t __passwdlen, - const uint8_t * __salt, size_t __saltlen, - uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t, - yescrypt_flags_t __flags, - uint8_t * __buf, size_t __buflen); - -/** - * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen): - * Compute and encode an scrypt or enhanced scrypt hash of passwd given the - * parameters and salt value encoded in setting. If the shared structure is - * not dummy, a ROM is used and YESCRYPT_RW is required. Otherwise, whether to - * use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff - * discouraging modification) is determined by the setting string. shared and - * local must be initialized as described above for yescrypt_kdf(). buf must - * be large enough (as indicated by buflen) to hold the encoded hash string. - * - * Return the encoded hash string on success; or NULL on error. - * - * MT-safe as long as local and buf are local to the thread. - */ -extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared, - yescrypt_local_t * __local, - const uint8_t * __passwd, size_t __passwdlen, - const uint8_t * __setting, - uint8_t * __buf, size_t __buflen); - -/** - * yescrypt(passwd, setting): - * Compute and encode an scrypt or enhanced scrypt hash of passwd given the - * parameters and salt value encoded in setting. Whether to use the - * YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff - * discouraging modification) is determined by the setting string. - * - * Return the encoded hash string on success; or NULL on error. - * - * This is a crypt(3)-like interface, which is simpler to use than - * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, - * and it is slower than yescrypt_r() for repeated calls because it allocates - * and frees memory on each call. - * - * MT-unsafe. - */ -extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting); - -/** - * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen): - * Generate a setting string for use with yescrypt_r() and yescrypt() by - * encoding into it the parameters N_log2 (which is to be set to base 2 - * logarithm of the desired value for N), r, p, flags, and a salt given by src - * (of srclen bytes). buf must be large enough (as indicated by buflen) to - * hold the setting string. - * - * Return the setting string on success; or NULL on error. - * - * MT-safe as long as buf is local to the thread. - */ -extern uint8_t * yescrypt_gensalt_r( - uint32_t __N_log2, uint32_t __r, uint32_t __p, - yescrypt_flags_t __flags, - const uint8_t * __src, size_t __srclen, - uint8_t * __buf, size_t __buflen); - -/** - * yescrypt_gensalt(N_log2, r, p, flags, src, srclen): - * Generate a setting string for use with yescrypt_r() and yescrypt(). This - * function is the same as yescrypt_gensalt_r() except that it uses a static - * buffer and thus is not MT-safe. - * - * Return the setting string on success; or NULL on error. - * - * MT-unsafe. - */ -extern uint8_t * yescrypt_gensalt( - uint32_t __N_log2, uint32_t __r, uint32_t __p, - yescrypt_flags_t __flags, - const uint8_t * __src, size_t __srclen); - -#ifdef __cplusplus -} -#endif - -#endif /* !_YESCRYPT_H_ */ diff --git a/algorithm/yescryptcommon.c b/algorithm/yescryptcommon.c deleted file mode 100644 index cf7067d0..00000000 --- a/algorithm/yescryptcommon.c +++ /dev/null @@ -1,360 +0,0 @@ -/*- - * Copyright 2013,2014 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include "algorithm/yescrypt_core.h" - -#define BYTES2CHARS(bytes) \ - ((((bytes) * 8) + 5) / 6) - -#define HASH_SIZE 32 /* bytes */ -#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */ -#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM) -static const char * const itoa64 = - "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - -static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen, - uint32_t src, uint32_t srcbits) -{ - uint32_t bit; - - for (bit = 0; bit < srcbits; bit += 6) { - if (dstlen < 1) - return NULL; - *dst++ = itoa64[src & 0x3f]; - dstlen--; - src >>= 6; - } - - return dst; -} - -static uint8_t * encode64(uint8_t * dst, size_t dstlen, - const uint8_t * src, size_t srclen) -{ - size_t i; - - for (i = 0; i < srclen; ) { - uint8_t * dnext; - uint32_t value = 0, bits = 0; - do { - value |= (uint32_t)src[i++] << bits; - bits += 8; - } while (bits < 24 && i < srclen); - dnext = encode64_uint32(dst, dstlen, value, bits); - if (!dnext) - return NULL; - dstlen -= dnext - dst; - dst = dnext; - } - - return dst; -} - -static int decode64_one(uint32_t * dst, uint8_t src) -{ - const char * ptr = strchr(itoa64, src); - if (ptr) { - *dst = ptr - itoa64; - return 0; - } - *dst = 0; - return -1; -} - -static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits, - const uint8_t * src) -{ - uint32_t bit; - uint32_t value; - - value = 0; - for (bit = 0; bit < dstbits; bit += 6) { - uint32_t one; - if (decode64_one(&one, *src)) { - *dst = 0; - return NULL; - } - src++; - value |= one << bit; - } - - *dst = value; - return src; -} - -uint8_t * -yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local, - const uint8_t * passwd, size_t passwdlen, - const uint8_t * setting, - uint8_t * buf, size_t buflen) -{ - uint8_t hash[HASH_SIZE]; - const uint8_t * src, * salt; - uint8_t * dst; - size_t prefixlen, saltlen, need; - uint8_t version; - uint64_t N; - uint32_t r, p; - yescrypt_flags_t flags = YESCRYPT_WORM; - fflush(stdout); - if (setting[0] != '$' || setting[1] != '7') - { - fflush(stdout); - return NULL; - } - fflush(stdout); - src = setting + 2; - fflush(stdout); - switch ((version = *src)) { - case '$': - fflush(stdout); - break; - case 'X': - src++; - flags = YESCRYPT_RW; - fflush(stdout); - break; - default: - { - fflush(stdout); - return NULL; - } - } - - fflush(stdout); - if (*src != '$') { - uint32_t decoded_flags; - if (decode64_one(&decoded_flags, *src)) - - { - fflush(stdout); - return NULL; - } - flags = decoded_flags; - if (*++src != '$') - { - fflush(stdout); - return NULL; - } - } - src++; - - { - uint32_t N_log2; - if (decode64_one(&N_log2, *src)) - { - return NULL; - } - src++; - N = (uint64_t)1 << N_log2; - } - - src = decode64_uint32(&r, 30, src); - if (!src) - { - return NULL; - } - - src = decode64_uint32(&p, 30, src); - if (!src) - { - return NULL; - } - - prefixlen = src - setting; - - salt = src; - src = (uint8_t *)strrchr((char *)salt, '$'); - if (src) - saltlen = src - salt; - else - saltlen = strlen((char *)salt); - - need = prefixlen + saltlen + 1 + HASH_LEN + 1; - if (need > buflen || need < saltlen) - - { - fflush(stdout); - return NULL; - } - -fflush(stdout); - if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, - N, r, p, 0, flags, hash, sizeof(hash))) - { - fflush(stdout); - return NULL; - } - - dst = buf; - memcpy(dst, setting, prefixlen + saltlen); - dst += prefixlen + saltlen; - *dst++ = '$'; - - dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash)); - /* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its - * memory allocations yet anyway. */ - if (!dst || dst >= buf + buflen) /* Can't happen */ - { - return NULL; - } - - *dst = 0; /* NUL termination */ - fflush(stdout); - return buf; -} - -uint8_t * -yescrypt(const uint8_t * passwd, const uint8_t * setting) -{ - static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1]; - yescrypt_shared_t shared; - yescrypt_local_t local; - uint8_t * retval; - if (yescrypt_init_shared(&shared, NULL, 0, - 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) - return NULL; - if (yescrypt_init_local(&local)) { - yescrypt_free_shared(&shared); - return NULL; - } - retval = yescrypt_r(&shared, &local, - passwd, 80, setting, buf, sizeof(buf)); - // printf("hashse='%s'\n", (char *)retval); - if (yescrypt_free_local(&local)) { - yescrypt_free_shared(&shared); - return NULL; - } - if (yescrypt_free_shared(&shared)) - return NULL; - return retval; - -} - -uint8_t * -yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p, - yescrypt_flags_t flags, - const uint8_t * src, size_t srclen, - uint8_t * buf, size_t buflen) -{ - uint8_t * dst; - size_t prefixlen = 3 + 1 + 5 + 5; - size_t saltlen = BYTES2CHARS(srclen); - size_t need; - - if (p == 1) - flags &= ~YESCRYPT_PARALLEL_SMIX; - - if (flags) { - if (flags & ~0x3f) - return NULL; - - prefixlen++; - if (flags != YESCRYPT_RW) - prefixlen++; - } - - need = prefixlen + saltlen + 1; - if (need > buflen || need < saltlen || saltlen < srclen) - return NULL; - - if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30))) - return NULL; - - dst = buf; - *dst++ = '$'; - *dst++ = '7'; - if (flags) { - *dst++ = 'X'; /* eXperimental, subject to change */ - if (flags != YESCRYPT_RW) - *dst++ = itoa64[flags]; - } - *dst++ = '$'; - - *dst++ = itoa64[N_log2]; - - dst = encode64_uint32(dst, buflen - (dst - buf), r, 30); - if (!dst) /* Can't happen */ - return NULL; - - dst = encode64_uint32(dst, buflen - (dst - buf), p, 30); - if (!dst) /* Can't happen */ - return NULL; - - dst = encode64(dst, buflen - (dst - buf), src, srclen); - if (!dst || dst >= buf + buflen) /* Can't happen */ - return NULL; - - *dst = 0; /* NUL termination */ - - return buf; -} - -uint8_t * -yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, - yescrypt_flags_t flags, - const uint8_t * src, size_t srclen) -{ - static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1]; - return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, - buf, sizeof(buf)); -} - -static int -yescrypt_bsty(const uint8_t * passwd, size_t passwdlen, - const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) -{ - static __thread int initialized = 0; - static __thread yescrypt_shared_t shared; - static __thread yescrypt_local_t local; - -// static __declspec(thread) int initialized = 0; -// static __declspec(thread) yescrypt_shared_t shared; -// static __declspec(thread) yescrypt_local_t local; - - int retval; - if (!initialized) { -/* "shared" could in fact be shared, but it's simpler to keep it private - * along with "local". It's dummy and tiny anyway. */ - if (yescrypt_init_shared(&shared, NULL, 0, - 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0)) - return -1; - if (yescrypt_init_local(&local)) { - yescrypt_free_shared(&shared); - return -1; - } - initialized = 1; - } - retval = yescrypt_kdf(&shared, &local, - passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS, - buf, buflen); - - return retval; -} - -void yescrypt_hash(const unsigned char *input, unsigned char *output) -{ - - yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32); -} diff --git a/kernel/lbry.cl b/kernel/lbry.cl deleted file mode 100644 index fef4f90e..00000000 --- a/kernel/lbry.cl +++ /dev/null @@ -1,179 +0,0 @@ -#include "sha256.cl" -#include "wolf-sha512.cl" -#include "ripemd160.cl" - -#define SWAP32(x) as_uint(as_uchar4(x).s3210) -#define SWAP64(x) as_ulong(as_uchar8(x).s76543210) - - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(__global const uint *input, __global uint8 *ctx) -{ - // SHA256 takes 16 uints of input per block - we have 112 bytes to process - // 8 * 16 == 64, meaning two block transforms. - - uint SHA256Buf[16]; - uint gid = get_global_id(0); - - // Remember the last four is the nonce - so 108 bytes / 4 bytes per dword - #pragma unroll - for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]); - - - - // SHA256 initialization constants - uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - - #pragma unroll - for(int i = 0; i < 3; ++i) - { - if(i == 1) - { - #pragma unroll - for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); - SHA256Buf[11] = SWAP32(gid); - SHA256Buf[12] = 0x80000000; - SHA256Buf[13] = 0x00000000; - SHA256Buf[14] = 0x00000000; - SHA256Buf[15] = 0x00000380; - } - if(i == 2) - { - ((uint8 *)SHA256Buf)[0] = outbuf; - SHA256Buf[8] = 0x80000000; - #pragma unroll - for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; - SHA256Buf[15] = 0x00000100; - outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - } - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - } - - /* - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - #pragma unroll - for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]); - SHA256Buf[11] = SWAP32(gid); - SHA256Buf[12] = 0x80000000; - SHA256Buf[13] = 0x00000000; - SHA256Buf[14] = 0x00000000; - SHA256Buf[15] = 0x00000380; - - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - ((uint8 *)SHA256Buf)[0] = outbuf; - SHA256Buf[8] = 0x80000000; - for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; - SHA256Buf[15] = 0x00000100; - outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - */ - - - /* - - //outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - //outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf); - - // outbuf would normall be SWAP32'd here, but it'll need it again - // once we use it as input to the next SHA256, so it negates. - - ((uint8 *)SHA256Buf)[0] = outbuf; - SHA256Buf[8] = 0x80000000; - for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; - SHA256Buf[15] = 0x00000100; - - outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - */ - - - - outbuf.s0 = SWAP32(outbuf.s0); - outbuf.s1 = SWAP32(outbuf.s1); - outbuf.s2 = SWAP32(outbuf.s2); - outbuf.s3 = SWAP32(outbuf.s3); - outbuf.s4 = SWAP32(outbuf.s4); - outbuf.s5 = SWAP32(outbuf.s5); - outbuf.s6 = SWAP32(outbuf.s6); - outbuf.s7 = SWAP32(outbuf.s7); - - ctx[get_global_id(0) - get_global_offset(0)] = outbuf; -} - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search1(__global uint8 *ctx) -{ - ulong W[16] = { 0UL }, SHA512Out[8]; - uint SHA256Buf[16]; - uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; - - ((uint8 *)W)[0] = outbuf; - - for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]); - - W[4] = 0x8000000000000000UL; - W[15] = 0x0000000000000100UL; - - for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i]; - - SHA512Block(W, SHA512Out); - - for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]); - - uint RMD160_0[16] = { 0U }; - uint RMD160_1[16] = { 0U }; - uint RMD160_0_Out[5], RMD160_1_Out[5]; - - for(int i = 0; i < 4; ++i) - { - ((ulong *)RMD160_0)[i] = SHA512Out[i]; - ((ulong *)RMD160_1)[i] = SHA512Out[i + 4]; - } - - RMD160_0[8] = RMD160_1[8] = 0x00000080; - RMD160_0[14] = RMD160_1[14] = 0x00000100; - - for(int i = 0; i < 5; ++i) - { - RMD160_0_Out[i] = RMD160_IV[i]; - RMD160_1_Out[i] = RMD160_IV[i]; - } - - RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out); - RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out); - - for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]); - for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]); - SHA256Buf[10] = 0x80000000; - - for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U; - - SHA256Buf[15] = 0x00000140; - - outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - - ctx[get_global_id(0) - get_global_offset(0)] = outbuf; -} - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target) -{ - uint SHA256Buf[16] = { 0U }; - uint gid = get_global_id(0); - uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)]; - - ((uint8 *)SHA256Buf)[0] = outbuf; - SHA256Buf[8] = 0x80000000; - for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000; - SHA256Buf[15] = 0x00000100; - - outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19); - outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf); - - outbuf.s6 = SWAP32(outbuf.s6); - outbuf.s7 = SWAP32(outbuf.s7); - - if(as_ulong(outbuf.s67) <= target) - output[atomic_inc(output+0xFF)] = SWAP32(gid); -} diff --git a/kernel/yescrypt-multi.cl b/kernel/yescrypt-multi.cl deleted file mode 100644 index 3af7b28a..00000000 --- a/kernel/yescrypt-multi.cl +++ /dev/null @@ -1,314 +0,0 @@ -/* -* "yescrypt" kernel implementation. -* -* ==========================(LICENSE BEGIN)============================ -* -* Copyright (c) 2015 djm34 -* -* Permission is hereby granted, free of charge, to any person obtaining -* a copy of this software and associated documentation files (the -* "Software"), to deal in the Software without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Software, and to -* permit persons to whom the Software is furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -* -* ===========================(LICENSE END)============================= -* -* @author djm34 -*/ -#if !defined(cl_khr_byte_addressable_store) -#error "Device does not support unaligned stores" -#endif - -#include "yescrypt_essential.cl" - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target) -{ - - __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); - __global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - - - - uint nonce = (get_global_id(0)); - uint data[20]; - uint16 in; - uint8 state1, state2; -// uint8 sha256tokeep; - -// ulong16 Bdev[8]; // will require an additional buffer - ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; - ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; -// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } - // if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } - uint8 passwd = sha256_80(data, nonce); - //pbkdf - in.lo = pad1.lo ^ passwd; - in.hi = pad1.hi; - state1 = sha256_Transform(in, H256); - - in.lo = pad2.lo ^ passwd; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - - in = ((uint16*)data)[0]; - state1 = sha256_Transform(in, state1); -#pragma unroll 1 - for (int i = 0; i<8; i++) - { - uint16 result; - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 1; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.lo = swapvec(sha256_Transform(in, state2)); - if (i == 0) sha256tokeep[0] = result.lo; - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 2; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.hi = swapvec(sha256_Transform(in, state2)); - Bdev[i].lo = as_ulong8(shuffle(result)); -// Bdev[i].lo = as_ulong8(result); - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 3; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.lo = swapvec(sha256_Transform(in, state2)); - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 4; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.hi = swapvec(sha256_Transform(in, state2)); - - - Bdev[i].hi = as_ulong8(shuffle(result)); -// Bdev[i].hi = as_ulong8(result); - } - - //mixing1 - - prevstate[0] = Bdev[0]; - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - prevstate[1] = Bdev[0]; - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - - uint n = 1; -#pragma unroll 1 - for (uint i = 2; i < 64; i++) - { - - prevstate[i] = Bdev[0]; - - if ((i&(i - 1)) == 0) n = n << 1; - - uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); - - j += i - n; - Bdev[0] ^= prevstate[j]; - - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - } - - -} - - - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search1(__global uchar *buffer1, __global uchar *buffer2) -{ -} - - - - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) -{ - - __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - - - for (int i = 0; i<8; i++) - hashbuffer[i] = Bdev[i]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - - - for (int i = 0; i<8; i++) - hashbuffer[i + 8] = Bdev[i]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - int n = 1; -#pragma unroll 1 - for (int i = 2; i < 2048; i ++) - { - - for (int k = 0; k<8; k++) - (hashbuffer + 8 * i)[k] = Bdev[k]; - - - if ((i&(i - 1)) == 0) n = n << 1; - - uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); - j += i - n; - - for (int k = 0; k < 8; k++) - Bdev[k] ^= (hashbuffer + 8 * j)[k]; - - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - } -} - -/* -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search3(__global uchar *buffer1, __global uchar *buffer2) -{ -} -*/ - - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) -{ - - __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - - -#pragma unroll 1 - for (int z = 0; z < 684; z++) - { - - uint j = as_uint2(Bdev[7].hi.s0).x & 2047; - - - for (int k = 0; k < 8; k++) - Bdev[k] ^= (hashbuffer + 8 * j)[k]; - - if (z<682) - for (int k = 0; k<8; k++) - (hashbuffer + 8 * j)[k] = Bdev[k]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); -//// - } - -} - -/* -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search5(__global uchar *buffer1, __global uchar *buffer2) -{ -} -*/ - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target) -{ - - __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); - - uint nonce = (get_global_id(0)); - - - uint data[20]; - ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; - ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; -// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } - uint8 swpass = swapvec(sha256tokeep[0]); - uint16 in; - uint8 state1,state2; - in.lo = pad1.lo ^ swpass; - in.hi = pad1.hi; - - - state1 = sha256_Transform(in, H256); - - in.lo = pad2.lo ^ swpass; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - -#pragma unroll 1 - for (int i = 0; i<8; i++) { - in = unshuffle(Bdev[i].lo); - in = swapvec16(in); - state1 = sha256_Transform(in, state1); - in = unshuffle(Bdev[i].hi); - in = swapvec16(in); - state1 = sha256_Transform(in, state1); - } - in = pad5; - state1 = sha256_Transform(in, state1); - in.lo = state1; - in.hi = pad4; - uint8 res = sha256_Transform(in, state2); - - //hmac and final sha - - in.lo = pad1.lo ^ res; - in.hi = pad1.hi; - state1 = sha256_Transform(in, H256); - in.lo = pad2.lo ^ res; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - in = ((uint16*)data)[0]; - state1 = sha256_Transform(in, state1); - in = padsha80; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = get_global_id(0); - in.sf = 0x480; - state1 = sha256_Transform(in, state1); - in.lo = state1; - in.hi = pad4; - state1 = sha256_Transform(in, state2); - // state2 = H256; - in.lo = state1; - in.hi = pad4; - in.sf = 0x100; - res = sha256_Transform(in, H256); - - - if (SWAP32(res.s7) <= (target)) - output[atomic_inc(output + 0xFF)] = (nonce); - -} diff --git a/kernel/yescrypt.cl b/kernel/yescrypt.cl deleted file mode 100644 index 0a94ebca..00000000 --- a/kernel/yescrypt.cl +++ /dev/null @@ -1,253 +0,0 @@ -/* -* "yescrypt" kernel implementation. -* -* ==========================(LICENSE BEGIN)============================ -* -* Copyright (c) 2015 djm34 -* -* Permission is hereby granted, free of charge, to any person obtaining -* a copy of this software and associated documentation files (the -* "Software"), to deal in the Software without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Software, and to -* permit persons to whom the Software is furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -* -* ===========================(LICENSE END)============================= -* -* @author djm34 -*/ -#if !defined(cl_khr_byte_addressable_store) -#error "Device does not support unaligned stores" -#endif - -#include "yescrypt_essential.cl" - - -__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, const uint target) -{ - - __global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); - __global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); - - - - uint nonce = (get_global_id(0)); - uint data[20]; - uint16 in; - uint8 state1, state2; - uint8 sha256tokeep; - - ((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; - ((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; - for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } - // if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } - uint8 passwd = sha256_80(data, nonce); - //pbkdf - in.lo = pad1.lo ^ passwd; - in.hi = pad1.hi; - state1 = sha256_Transform(in, H256); - - in.lo = pad2.lo ^ passwd; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - - in = ((uint16*)data)[0]; - state1 = sha256_Transform(in, state1); -#pragma unroll 1 - for (int i = 0; i<8; i++) - { - uint16 result; - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 1; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.lo = swapvec(sha256_Transform(in, state2)); - if (i == 0) sha256tokeep = result.lo; - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 2; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.hi = swapvec(sha256_Transform(in, state2)); - Bdev[i].lo = as_ulong8(shuffle(result)); - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 3; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.lo = swapvec(sha256_Transform(in, state2)); - in = pad3; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = nonce; - in.s4 = 4 * i + 4; - in.lo = sha256_Transform(in, state1); - in.hi = pad4; - result.hi = swapvec(sha256_Transform(in, state2)); - - - Bdev[i].hi = as_ulong8(shuffle(result)); - } - - //mixing1 - - prevstate[0] = Bdev[0]; - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - prevstate[1] = Bdev[0]; - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - - uint n = 1; -#pragma unroll 1 - for (uint i = 2; i < 64; i++) - { - - prevstate[i] = Bdev[0]; - - if ((i&(i - 1)) == 0) n = n << 1; - - uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); - - j += i - n; - Bdev[0] ^= prevstate[j]; - - Bdev[0] = blockmix_salsa8_small2(Bdev[0]); - } - - - for (int i = 0; i<8; i++) - hashbuffer[i] = Bdev[i]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - - - for (int i = 0; i<8; i++) - hashbuffer[i + 8] = Bdev[i]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - n = 1; -#pragma unroll 1 - for (int i = 2; i < 2048; i++) - { - - for (int k = 0; k<8; k++) - (hashbuffer + 8 * i)[k] = Bdev[k]; - - - if ((i&(i - 1)) == 0) n = n << 1; - - uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); - j += i - n; - - for (int k = 0; k < 8; k++) - Bdev[k] ^= (hashbuffer + 8 * j)[k]; - - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - } - - -#pragma unroll 1 - for (int z = 0; z < 684; z++) - { - - uint j = as_uint2(Bdev[7].hi.s0).x & 2047; - - - for (int k = 0; k < 8; k++) - Bdev[k] ^= (hashbuffer + 8 * j)[k]; - - if (z<682) - for (int k = 0; k<8; k++) - (hashbuffer + 8 * j)[k] = Bdev[k]; - - blockmix_pwxform((__global ulong8*)Bdev, prevstate); - //// - } - - - - uint8 swpass = swapvec(sha256tokeep); -// uint16 in; -// uint8 state1, state2; - in.lo = pad1.lo ^ swpass; - in.hi = pad1.hi; - - - state1 = sha256_Transform(in, H256); - - in.lo = pad2.lo ^ swpass; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - -#pragma unroll 1 - for (int i = 0; i<8; i++) { - in = unshuffle(Bdev[i].lo); - in = swapvec16(in); - state1 = sha256_Transform(in, state1); - in = unshuffle(Bdev[i].hi); - in = swapvec16(in); - state1 = sha256_Transform(in, state1); - } - in = pad5; - state1 = sha256_Transform(in, state1); - in.lo = state1; - in.hi = pad4; - uint8 res = sha256_Transform(in, state2); - - //hmac and final sha - - in.lo = pad1.lo ^ res; - in.hi = pad1.hi; - state1 = sha256_Transform(in, H256); - in.lo = pad2.lo ^ res; - in.hi = pad2.hi; - state2 = sha256_Transform(in, H256); - in = ((uint16*)data)[0]; - state1 = sha256_Transform(in, state1); - in = padsha80; - in.s0 = data[16]; - in.s1 = data[17]; - in.s2 = data[18]; - in.s3 = get_global_id(0); - in.sf = 0x480; - state1 = sha256_Transform(in, state1); - in.lo = state1; - in.hi = pad4; - state1 = sha256_Transform(in, state2); - // state2 = H256; - in.lo = state1; - in.hi = pad4; - in.sf = 0x100; - res = sha256_Transform(in, H256); - - - if (SWAP32(res.s7) <= (target)) - output[atomic_inc(output + 0xFF)] = (nonce); - -} - diff --git a/kernel/yescrypt_essential.cl b/kernel/yescrypt_essential.cl deleted file mode 100644 index ba1816a8..00000000 --- a/kernel/yescrypt_essential.cl +++ /dev/null @@ -1,760 +0,0 @@ -/* -* "yescrypt" kernel implementation. -* -* ==========================(LICENSE BEGIN)============================ -* -* Copyright (c) 2015 djm34 -* -* Permission is hereby granted, free of charge, to any person obtaining -* a copy of this software and associated documentation files (the -* "Software"), to deal in the Software without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Software, and to -* permit persons to whom the Software is furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -* -* ===========================(LICENSE END)============================= -* -* @author djm34 -*/ - -#define ROL32(x, n) rotate(x, (uint) n) -#define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) -//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) -#define HASH_MEMORY 4096 - - -#define SALSA(a,b,c,d) do { \ - t =a+d; b^=ROL32(t, 7U); \ - t =b+a; c^=ROL32(t, 9U); \ - t =c+b; d^=ROL32(t, 13U); \ - t =d+c; a^=ROL32(t, 18U); \ -} while(0) - - -#define SALSA_CORE(state) do { \ -\ -SALSA(state.s0,state.s4,state.s8,state.sc); \ -SALSA(state.s5,state.s9,state.sd,state.s1); \ -SALSA(state.sa,state.se,state.s2,state.s6); \ -SALSA(state.sf,state.s3,state.s7,state.sb); \ -SALSA(state.s0,state.s1,state.s2,state.s3); \ -SALSA(state.s5,state.s6,state.s7,state.s4); \ -SALSA(state.sa,state.sb,state.s8,state.s9); \ -SALSA(state.sf,state.sc,state.sd,state.se); \ - } while(0) - -#define uSALSA_CORE(state) do { \ -\ -SALSA(state.s0,state.s4,state.s8,state.sc); \ -SALSA(state.s1,state.s5,state.s9,state.sd); \ -SALSA(state.s2,state.s6,state.sa,state.se); \ -SALSA(state.s3,state.s7,state.sb,state.sf); \ -SALSA(state.s0,state.sd,state.sa,state.s7); \ -SALSA(state.s1,state.se,state.sb,state.s4); \ -SALSA(state.s2,state.sf,state.s8,state.s5); \ -SALSA(state.s3,state.sc,state.s9,state.s6); \ -} while(0) - - -#define unshuffle(state) (as_uint16(state).s0da741eb852fc963) - -#define shuffle(state) (as_uint16(state).s05af49e38d27c16b) - -static __constant uint16 pad1 = -{ - 0x36363636, 0x36363636, 0x36363636, 0x36363636, - 0x36363636, 0x36363636, 0x36363636, 0x36363636, - 0x36363636, 0x36363636, 0x36363636, 0x36363636, - 0x36363636, 0x36363636, 0x36363636, 0x36363636 -}; - -static __constant uint16 pad2 = -{ - 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, - 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, - 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, - 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c -}; - -static __constant uint16 pad5 = -{ - 0x00000001, 0x80000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00002220 -}; - -static __constant uint16 pad3 = -{ - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x80000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x000004a0 -}; - -static __constant uint16 padsha80 = -{ - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x80000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000280 -}; - -static __constant uint8 pad4 = -{ - 0x80000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000300 -}; - - - -static __constant uint8 H256 = { - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, - 0xA54FF53A, 0x510E527F, 0x9B05688C, - 0x1F83D9AB, 0x5BE0CD19 -}; - -inline uint8 swapvec(uint8 buf) -{ - uint8 vec; - vec.s0 = SWAP32(buf.s0); - vec.s1 = SWAP32(buf.s1); - vec.s2 = SWAP32(buf.s2); - vec.s3 = SWAP32(buf.s3); - vec.s4 = SWAP32(buf.s4); - vec.s5 = SWAP32(buf.s5); - vec.s6 = SWAP32(buf.s6); - vec.s7 = SWAP32(buf.s7); - return vec; -} - - - -inline uint16 swapvec16(uint16 buf) -{ - uint16 vec; - vec.s0 = SWAP32(buf.s0); - vec.s1 = SWAP32(buf.s1); - vec.s2 = SWAP32(buf.s2); - vec.s3 = SWAP32(buf.s3); - vec.s4 = SWAP32(buf.s4); - vec.s5 = SWAP32(buf.s5); - vec.s6 = SWAP32(buf.s6); - vec.s7 = SWAP32(buf.s7); - vec.s8 = SWAP32(buf.s8); - vec.s9 = SWAP32(buf.s9); - vec.sa = SWAP32(buf.sa); - vec.sb = SWAP32(buf.sb); - vec.sc = SWAP32(buf.sc); - vec.sd = SWAP32(buf.sd); - vec.se = SWAP32(buf.se); - vec.sf = SWAP32(buf.sf); - return vec; -} - - ulong8 salsa20_8(uint16 Bx) -{ -uint t; - uint16 st = Bx; - uSALSA_CORE(st); - uSALSA_CORE(st); - uSALSA_CORE(st); - uSALSA_CORE(st); - return(as_ulong8(st + Bx)); -} - - ulong8 salsa20_8n(uint16 Bx) - { - uint t; - uint16 st = Bx; - SALSA_CORE(st); - SALSA_CORE(st); - SALSA_CORE(st); - SALSA_CORE(st); - return(as_ulong8(st + Bx)); - } - - - ulong16 blockmix_salsa8_small2(ulong16 Bin) -{ - ulong8 X = Bin.hi; - X ^= Bin.lo; - X = salsa20_8(as_uint16(X)); - Bin.lo = X; - X ^= Bin.hi; - X = salsa20_8(as_uint16(X)); - Bin.hi = X; - return(Bin); -} -/* - uint16 salsa20_8_2(uint16 Bx) - { - uint t; - uint16 st = Bx; - uSALSA_CORE(st); - uSALSA_CORE(st); - uSALSA_CORE(st); - uSALSA_CORE(st); - return(st + Bx); - } - - ulong16 blockmix_salsa8_small2(ulong16 Bin) - { - uint16 X = as_uint16(Bin.hi); - X ^= as_uint16(Bin.lo); - X = salsa20_8_2(as_uint16(X)); - Bin.lo = as_ulong8(X); - X ^= as_uint16(Bin.hi); - X = salsa20_8_2(as_uint16(X)); - Bin.hi = as_ulong8(X); - return(Bin); - } -*/ - - -inline ulong2 madd4long2(uint4 a, uint4 b) -{ - uint4 result; - result.x = a.x*a.y + b.x; - result.y = b.y + mad_hi(a.x, a.y, b.x); - result.z = a.z*a.w + b.z; - result.w = b.w + mad_hi(a.z, a.w, b.z); - return as_ulong2(result); -} - -inline ulong2 madd4long3(uint4 a, ulong2 b) -{ - ulong2 result; - result.x = (ulong)a.x*(ulong)a.y + b.x; - result.y = (ulong)a.z*(ulong)a.w + b.y; - return result; -} - - -inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate) -{ - - ulong2 vec = Bout.lo.lo; - - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = ((__global ulong2*)(prevstate ))[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = ((__global ulong2*)(prevstate + 32))[x.y]; - - vec ^= p1; - } - Bout.lo.lo = vec; - vec = Bout.lo.hi; - for (int i = 0; i < 6; i++) - { - - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = ((__global ulong2*)(prevstate))[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = ((__global ulong2*)(prevstate + 32))[x.y]; - - vec ^= p1; - } - Bout.lo.hi = vec; - - vec = Bout.hi.lo; - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = ((__global ulong2*)(prevstate))[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = ((__global ulong2*)(prevstate + 32))[x.y]; - vec ^= p1; - } - Bout.hi.lo = vec; - vec = Bout.hi.hi; - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = ((__global ulong2*)(prevstate))[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = ((__global ulong2*)(prevstate + 32))[x.y]; - - vec ^= p1; - } - Bout.hi.hi = vec; - - return(Bout); -} - -inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate) -{ - - ulong2 vec = Bout.lo.lo; - - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = prevstate[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = (prevstate + 32*8)[x.y]; - - vec ^= p1; - } - Bout.lo.lo = vec; - vec = Bout.lo.hi; - for (int i = 0; i < 6; i++) - { - - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = prevstate[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = (prevstate + 32 * 8)[x.y]; - - vec ^= p1; - } - Bout.lo.hi = vec; - - vec = Bout.hi.lo; - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = prevstate[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = (prevstate + 32 * 8)[x.y]; - vec ^= p1; - } - Bout.hi.lo = vec; - vec = Bout.hi.hi; - for (int i = 0; i < 6; i++) - { - ulong2 p0, p1; - uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF); - p0 = prevstate[x.x]; - vec = madd4long3(as_uint4(vec), p0); - p1 = (prevstate + 32 * 8)[x.y]; - - vec ^= p1; - } - Bout.hi.hi = vec; - - return(Bout); -} - - - - -inline void blockmix_pwxform(__global ulong8 *Bin, __global ulong16 *prevstate) -{ - Bin[0] ^= Bin[15]; - Bin[0] = block_pwxform_long_old(Bin[0], prevstate); -#pragma unroll 1 - for (int i = 1; i < 16; i++) - { - Bin[i] ^= Bin[i - 1]; - Bin[i] = block_pwxform_long_old(Bin[i], prevstate); - } - Bin[15] = salsa20_8(as_uint16(Bin[15])); -} - -#define SHR(x, n) ((x) >> n) - - -#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) -#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) - -#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) -#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) - -#define P(a,b,c,d,e,f,g,h,x,K) \ -{ \ - temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ - d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ -} - -#define PLAST(a,b,c,d,e,f,g,h,x,K) \ -{ \ - d += h + S3(e) + F1(e,f,g) + (x + K); \ -} - -#define F0(y, x, z) bitselect(z, y, z ^ x) -#define F1(x, y, z) bitselect(z, y, x) - -#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) -#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) -#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) -#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) -#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) -#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) -#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) -#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) -#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) -#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) -#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) -#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) -#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) -#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) -#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) -#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) - -#define RD14 (S1(W12) + W7 + S0(W15) + W14) -#define RD15 (S1(W13) + W8 + S0(W0) + W15) - -/// generic sha transform -inline uint8 sha256_Transform(uint16 data, uint8 state) -{ -uint temp1; - uint8 res = state; - uint W0 = data.s0; - uint W1 = data.s1; - uint W2 = data.s2; - uint W3 = data.s3; - uint W4 = data.s4; - uint W5 = data.s5; - uint W6 = data.s6; - uint W7 = data.s7; - uint W8 = data.s8; - uint W9 = data.s9; - uint W10 = data.sA; - uint W11 = data.sB; - uint W12 = data.sC; - uint W13 = data.sD; - uint W14 = data.sE; - uint W15 = data.sF; - -#define v0 res.s0 -#define v1 res.s1 -#define v2 res.s2 -#define v3 res.s3 -#define v4 res.s4 -#define v5 res.s5 -#define v6 res.s6 -#define v7 res.s7 - - P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); - P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); - P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); - P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); - P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); - P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); - P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); - P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); - P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); - P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); - P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); - P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); - P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); - P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); - P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); - P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); -#undef v0 -#undef v1 -#undef v2 -#undef v3 -#undef v4 -#undef v5 -#undef v6 -#undef v7 - return (res+state); -} - - -static inline uint8 sha256_round1(uint16 data) -{ - uint temp1; - uint8 res; - uint W0 = data.s0; - uint W1 = data.s1; - uint W2 = data.s2; - uint W3 = data.s3; - uint W4 = data.s4; - uint W5 = data.s5; - uint W6 = data.s6; - uint W7 = data.s7; - uint W8 = data.s8; - uint W9 = data.s9; - uint W10 = data.sA; - uint W11 = data.sB; - uint W12 = data.sC; - uint W13 = data.sD; - uint W14 = data.sE; - uint W15 = data.sF; - - uint v0 = 0x6A09E667; - uint v1 = 0xBB67AE85; - uint v2 = 0x3C6EF372; - uint v3 = 0xA54FF53A; - uint v4 = 0x510E527F; - uint v5 = 0x9B05688C; - uint v6 = 0x1F83D9AB; - uint v7 = 0x5BE0CD19; - - P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); - P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); - P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); - P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); - P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); - P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); - P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); - P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); - P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); - P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); - P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); - P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); - P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); - P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); - P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); - P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); - - res.s0 = v0 + 0x6A09E667; - res.s1 = v1 + 0xBB67AE85; - res.s2 = v2 + 0x3C6EF372; - res.s3 = v3 + 0xA54FF53A; - res.s4 = v4 + 0x510E527F; - res.s5 = v5 + 0x9B05688C; - res.s6 = v6 + 0x1F83D9AB; - res.s7 = v7 + 0x5BE0CD19; - return (res); -} - - -static inline uint8 sha256_round2(uint16 data,uint8 buf) -{ - uint temp1; - uint8 res; - uint W0 = data.s0; - uint W1 = data.s1; - uint W2 = data.s2; - uint W3 = data.s3; - uint W4 = data.s4; - uint W5 = data.s5; - uint W6 = data.s6; - uint W7 = data.s7; - uint W8 = data.s8; - uint W9 = data.s9; - uint W10 = data.sA; - uint W11 = data.sB; - uint W12 = data.sC; - uint W13 = data.sD; - uint W14 = data.sE; - uint W15 = data.sF; - - uint v0 = buf.s0; - uint v1 = buf.s1; - uint v2 = buf.s2; - uint v3 = buf.s3; - uint v4 = buf.s4; - uint v5 = buf.s5; - uint v6 = buf.s6; - uint v7 = buf.s7; - - P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491); - P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF); - P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5); - P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B); - P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1); - P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4); - P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5); - P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98); - P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01); - P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE); - P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3); - P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74); - P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE); - P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7); - P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624); - P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585); - P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070); - - P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116); - P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08); - P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C); - P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5); - P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3); - P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A); - P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F); - P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3); - P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE); - P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F); - P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814); - P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208); - P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA); - P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB); - P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7); - P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2); - - res.s0 = (v0 + buf.s0); - res.s1 = (v1 + buf.s1); - res.s2 = (v2 + buf.s2); - res.s3 = (v3 + buf.s3); - res.s4 = (v4 + buf.s4); - res.s5 = (v5 + buf.s5); - res.s6 = (v6 + buf.s6); - res.s7 = (v7 + buf.s7); - return (res); -} - -static inline uint8 sha256_80(uint* data,uint nonce) -{ - -uint8 buf = sha256_round1( ((uint16*)data)[0]); -uint16 in = padsha80; -in.s0 = data[16]; -in.s1 = data[17]; -in.s2 = data[18]; -in.s3 = nonce; - -return(sha256_round2(in,buf)); -} - diff --git a/ocl.c b/ocl.c index 44920d2e..b0a644a2 100644 --- a/ocl.c +++ b/ocl.c @@ -36,7 +36,6 @@ #include "ocl/binary_kernel.h" #include "algorithm/neoscrypt.h" #include "algorithm/pluck.h" -#include "algorithm/yescrypt.h" #include "algorithm/lyra2rev2.h" /* FIXME: only here for global config vars, replace with configuration.h @@ -500,91 +499,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); } - // Yescrypt TC - else if ((cgpu->algorithm.type == ALGO_YESCRYPT || - algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) { - size_t glob_thread_count; - long max_int; - unsigned char type = 0; - - // determine which intensity type to use - if (cgpu->rawintensity > 0) { - glob_thread_count = cgpu->rawintensity; - max_int = glob_thread_count; - type = 2; - } - else if (cgpu->xintensity > 0) { - glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity); - max_int = cgpu->xintensity; - type = 1; - } - else { - glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity); - max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity); - } - - glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count); - - // if TC * scratchbuf size is too big for memory... reduce to max - if ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) { - - /* Selected intensity will not run on this GPU. Not enough memory. - * Adapt the memory setting. */ - // depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc - switch (type) { - //raw intensity - case 2: - while ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) { - --glob_thread_count; - } - - max_int = glob_thread_count; - cgpu->rawintensity = glob_thread_count; - break; - - //x intensity - case 1: - glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE; - max_int = glob_thread_count / clState->compute_shaders; - - while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_XINTENSITY) { - applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu); - max_int = MIN_XINTENSITY; - } - - cgpu->xintensity = max_int; - glob_thread_count = clState->compute_shaders * (1UL << max_int); - break; - - default: - glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE; - while (max_int && ((1UL << max_int) & glob_thread_count) == 0) { - --max_int; - } - - /* Check if max_intensity is >0. */ - if (max_int < MIN_INTENSITY) { - applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu); - max_int = MIN_INTENSITY; - } - - cgpu->intensity = max_int; - glob_thread_count = 1UL << max_int; - break; - } - } - - // TC is glob thread count - cgpu->thread_concurrency = glob_thread_count; - - applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); - } - // Lyra2re v2 TC else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) { size_t glob_thread_count; @@ -788,26 +702,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { - /* The scratch/pad-buffer needs 32kBytes memory per thread. */ - bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency; - buf1size = PLUCK_SECBUF_SIZE * cgpu->thread_concurrency; - buf2size = 128 * 8 * 8 * cgpu->thread_concurrency; - buf3size= 8 * 8 * 4 * cgpu->thread_concurrency; - /* This is the input buffer. For yescrypt this is guaranteed to be - * 80 bytes only. */ - readbufsize = 80; - - applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); - // scrypt/n-scrypt - } else if (algorithm->type == ALGO_LYRA2REV2) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; buf1size = 4* 8 * cgpu->thread_concurrency; //matrix - /* This is the input buffer. For yescrypt this is guaranteed to be - * 80 bytes only. */ + /* This is the input buffer.*/ readbufsize = 80; applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size); @@ -839,27 +739,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); } - if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { - // need additionnal buffers - clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); - if (status != CL_SUCCESS && !clState->buffer1) { - applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status); - return NULL; - } - - clState->buffer2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf2size, NULL, &status); - if (status != CL_SUCCESS && !clState->buffer2) { - applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer2), decrease TC or increase LG", status); - return NULL; - } - - clState->buffer3 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf3size, NULL, &status); - if (status != CL_SUCCESS && !clState->buffer3) { - applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer3), decrease TC or increase LG", status); - return NULL; - } - } - else if (algorithm->type == ALGO_LYRA2REV2) { + if (algorithm->type == ALGO_LYRA2REV2) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); if (status != CL_SUCCESS && !clState->buffer1) { diff --git a/sph/gost.c b/sph/gost.c index f46aeb58..696cc6fc 100644 --- a/sph/gost.c +++ b/sph/gost.c @@ -667,8 +667,8 @@ const unsigned char C[12][64] = { void AddModulo512(const void *a,const void *b,void *c) { - const unsigned char *A=a, *B=b; - unsigned char *C=c; + const unsigned char *A = (unsigned char*)a, *B = (unsigned char*) b; + unsigned char *C = (unsigned char*) c; int t = 0; #ifdef FULL_UNROLL #define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF; @@ -751,8 +751,8 @@ void AddModulo512(const void *a,const void *b,void *c) void AddXor512(const void *a,const void *b,void *c) { - const unsigned long long *A=a, *B=b; - unsigned long long *C=c; + const unsigned long long *A = (unsigned long long *) a, *B = (unsigned long long *) b; + unsigned long long *C = (unsigned long long *) c; #ifdef FULL_UNROLL C[0] = A[0] ^ B[0]; C[1] = A[1] ^ B[1]; @@ -1051,7 +1051,7 @@ sph_gost256_init(void *cc) void sph_gost256(void *cc, const void *data, size_t len) { - hash_256(data, 8*len, cc); + hash_256((const unsigned char*)data, 8 * len, (unsigned char*)cc); } /* see sph_gost.h */ @@ -1080,7 +1080,7 @@ sph_gost512_init(void *cc) void sph_gost512(void *cc, const void *data, size_t len) { - hash_512(data, 8*len, cc); + hash_512((const unsigned char*)data, 8 * len, (unsigned char*)cc); } /* see sph_gost.h */ diff --git a/winbuild/dist/include/config.h b/winbuild/dist/include/config.h index 86931708..b91d6d47 100644 --- a/winbuild/dist/include/config.h +++ b/winbuild/dist/include/config.h @@ -1,82 +1,86 @@ -#ifndef __CONFIG_H__ -#define __CONFIG_H__ - -#define HAVE_STDINT_H - -#if defined(_MSC_VER) - -#define HAVE_LIBCURL 1 -#define CURL_HAS_KEEPALIVE 1 -#define HAVE_CURSES 1 -#define HAVE_ADL 1 - -#define STDC_HEADERS 1 -#define EXECV_2ND_ARG_TYPE char* const* - -#define HAVE_ALLOCA 1 -#define HAVE_ATTRIBUTE_COLD 1 -#define HAVE_ATTRIBUTE_CONST 1 -#define HAVE_ATTRIBUTE_NORETURN 1 -#define HAVE_ATTRIBUTE_PRINTF 1 -#define HAVE_ATTRIBUTE_UNUSED 1 -#define HAVE_ATTRIBUTE_USED 1 -#define HAVE_BUILTIN_CONSTANT_P 1 -#define HAVE_BUILTIN_TYPES_COMPATIBLE_P 1 -#define HAVE_DECL_MEMMEM 0 -#define HAVE_INTTYPES_H 1 -#define HAVE_LONG_LONG_INT 1 -#define HAVE_MEMORY_H 1 -#define HAVE_MPROTECT 1 -#define HAVE_RAW_DECL_MEMPCPY 1 -#define HAVE_RAW_DECL_STRNCAT 1 -#define HAVE_RAW_DECL_STRNLEN 1 -#define HAVE_RAW_DECL_STRPBRK 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STRINGS_H 1 -#define HAVE_STRING_H 1 -#define HAVE_SYS_STAT_H 1 -#define HAVE_SYS_TYPES_H 1 -#define HAVE_UNISTD_H 1 -#define HAVE_UNSIGNED_LONG_LONG_INT 1 -#define HAVE_WARN_UNUSED_RESULT 1 -#define HAVE_WCHAR_H 1 -#define HAVE_WCHAR_T 1 - -#define PRIi64 "I64d" -#define PRIi32 "I32d" -#define PRIu32 "I32u" -#define PRIu64 "I64u" - -#define PATH_MAX MAX_PATH - -// Libraries to include -#pragma comment(lib, "winmm.lib") -#pragma comment(lib, "wsock32.lib") -#pragma comment(lib, "pthreadVC2.lib") -#pragma comment(lib, "OpenCL.lib") -#pragma comment(lib, "jansson.lib") - -#ifdef HAVE_LIBCURL -#define CURL_STATICLIB 1 -#pragma comment(lib, "libcurl_a.lib") -#endif - -#ifdef HAVE_CURSES -#pragma comment(lib, "pdcurses.lib") -#endif - -#endif - -#define VERSION "v5.2.0" -#define PACKAGE_NAME "sgminer" -#define PACKAGE_TARNAME "sgminer" -#define PACKAGE_VERSION "5.2.0" -#define PACKAGE_STRING "sgminer 5.2.0" -#define PACKAGE "sgminer" - -#define SGMINER_PREFIX "" - -#include "gitversion.h" -#include "winbuild.h" - -#endif +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define HAVE_STDINT_H + +#if defined(_MSC_VER) + +#define HAVE_LIBCURL 1 +#define CURL_HAS_KEEPALIVE 1 +#define HAVE_CURSES 1 +#define HAVE_ADL 1 +#define HAVE_NVML 1 + +#define STDC_HEADERS 1 +#define EXECV_2ND_ARG_TYPE char* const* + +#define HAVE_ALLOCA 1 +#define HAVE_ATTRIBUTE_COLD 1 +#define HAVE_ATTRIBUTE_CONST 1 +#define HAVE_ATTRIBUTE_NORETURN 1 +#define HAVE_ATTRIBUTE_PRINTF 1 +#define HAVE_ATTRIBUTE_UNUSED 1 +#define HAVE_ATTRIBUTE_USED 1 +#define HAVE_BUILTIN_CONSTANT_P 1 +#define HAVE_BUILTIN_TYPES_COMPATIBLE_P 1 +#define HAVE_DECL_MEMMEM 0 +#define HAVE_INTTYPES_H 1 +#define HAVE_LONG_LONG_INT 1 +#define HAVE_MEMORY_H 1 +#define HAVE_MPROTECT 1 +#define HAVE_RAW_DECL_MEMPCPY 1 +#define HAVE_RAW_DECL_STRNCAT 1 +#define HAVE_RAW_DECL_STRNLEN 1 +#define HAVE_RAW_DECL_STRPBRK 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRINGS_H 1 +#define HAVE_STRING_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_UNISTD_H 1 +#define HAVE_UNSIGNED_LONG_LONG_INT 1 +#define HAVE_WARN_UNUSED_RESULT 1 +#define HAVE_WCHAR_H 1 +#define HAVE_WCHAR_T 1 + +#define PRIi64 "I64d" +#define PRIi32 "I32d" +#define PRIu32 "I32u" +#define PRIu64 "I64u" + +#define PATH_MAX MAX_PATH + +// Libraries to include +#pragma comment(lib, "winmm.lib") +#pragma comment(lib, "wsock32.lib") +#pragma comment(lib, "pthreadVC2.lib") +#pragma comment(lib, "OpenCL.lib") +#pragma comment(lib, "jansson.lib") + +#ifdef HAVE_LIBCURL +#define CURL_STATICLIB 1 +#pragma comment(lib, "ws2_32.lib") +#pragma comment(lib, "zlib.lib") +#pragma comment(lib, "openssl.lib") +#pragma comment(lib, "libcurl_a.lib") +#endif + +#ifdef HAVE_CURSES +#pragma comment(lib, "pdcurses.lib") +#endif + +#endif + +#define VERSION "5.3.9" +#define PACKAGE_NAME "sgminer" +#define PACKAGE_TARNAME "sgminer" +#define PACKAGE_VERSION "5.3.9" +#define PACKAGE_STRING "sgminer 5.3.9" +#define PACKAGE "sgminer" + +#define SGMINER_PREFIX "" + +//#include "gitversion.h" +#include "winbuild.h" + +#endif diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index 67679bb1..16cbb253 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -263,51 +263,60 @@ + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + @@ -321,7 +330,6 @@ - @@ -330,52 +338,61 @@ + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + @@ -388,7 +405,6 @@ - diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 7866e29d..6e120dcf 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -86,6 +86,9 @@ Source Files\sph + + Source Files\sph + Source Files\sph @@ -137,6 +140,9 @@ Source Files\algorithm + + Source Files\algorithm + Source Files\algorithm @@ -310,6 +316,9 @@ Header Files\sph + + Header Files\sph + Header Files\sph @@ -343,6 +352,9 @@ Header Files\algorithm + + Header Files\algorithm + Header Files\algorithm