Browse Source

fixed windows building via msvc13

deleted lbry and yescrypt (cause they cant be builded in msvc)
pull/1/head
R4SAS 7 years ago
parent
commit
f85329baf6
  1. 2
      Makefile.am
  2. 140
      algorithm.c
  3. 60
      algorithm/lbry.c
  4. 8
      algorithm/lbry.h
  5. 1364
      algorithm/yescrypt-opt.c
  6. 119
      algorithm/yescrypt.c
  7. 10
      algorithm/yescrypt.h
  8. 376
      algorithm/yescrypt_core.h
  9. 360
      algorithm/yescryptcommon.c
  10. 179
      kernel/lbry.cl
  11. 314
      kernel/yescrypt-multi.cl
  12. 253
      kernel/yescrypt.cl
  13. 760
      kernel/yescrypt_essential.cl
  14. 124
      ocl.c
  15. 12
      sph/gost.c
  16. 12
      winbuild/dist/include/config.h
  17. 72
      winbuild/sgminer.vcxproj
  18. 12
      winbuild/sgminer.vcxproj.filters

2
Makefile.am

@ -78,12 +78,10 @@ sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
sgminer_SOURCES += algorithm/sia.c algorithm/sia.h sgminer_SOURCES += algorithm/sia.c algorithm/sia.h
sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h
sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h
sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h
sgminer_SOURCES += algorithm/decred.c algorithm/decred.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h
sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h
sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h
sgminer_SOURCES += algorithm/gostcoin.c algorithm/gostcoin.h sgminer_SOURCES += algorithm/gostcoin.c algorithm/gostcoin.h
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

140
algorithm.c

@ -36,14 +36,12 @@
#include "algorithm/lyra2re.h" #include "algorithm/lyra2re.h"
#include "algorithm/lyra2rev2.h" #include "algorithm/lyra2rev2.h"
#include "algorithm/pluck.h" #include "algorithm/pluck.h"
#include "algorithm/yescrypt.h"
#include "algorithm/credits.h" #include "algorithm/credits.h"
#include "algorithm/blake256.h" #include "algorithm/blake256.h"
#include "algorithm/blakecoin.h" #include "algorithm/blakecoin.h"
#include "algorithm/sia.h" #include "algorithm/sia.h"
#include "algorithm/decred.h" #include "algorithm/decred.h"
#include "algorithm/pascal.h" #include "algorithm/pascal.h"
#include "algorithm/lbry.h"
#include "algorithm/sibcoin.h" #include "algorithm/sibcoin.h"
#include "algorithm/gostcoin.h" #include "algorithm/gostcoin.h"
@ -72,16 +70,13 @@ const char *algorithm_type_str[] = {
"Neoscrypt", "Neoscrypt",
"WhirlpoolX", "WhirlpoolX",
"Lyra2RE", "Lyra2RE",
"Lyra2REV2" "Lyra2REV2",
"Pluck" "Pluck",
"Yescrypt",
"Yescrypt-multi",
"Blakecoin", "Blakecoin",
"Blake", "Blake",
"Sia", "Sia",
"Decred", "Decred",
"Vanilla", "Vanilla",
"Lbry",
"Sibcoin", "Sibcoin",
"Gostcoin" "Gostcoin"
}; };
@ -284,100 +279,6 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_
return status; return status;
} }
static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_uint le_target;
cl_int status = 0;
// le_target = (*(cl_uint *)(blk->work->device_target + 28));
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
// le_target = (cl_uint)((uint32_t *)blk->work->target)[7];
// memcpy(clState->cldata, blk->work->data, 80);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->buffer1);
CL_SET_ARG(clState->buffer2);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
// cl_kernel *kernel = &clState->kernel;
cl_kernel *kernel;
unsigned int num = 0;
cl_uint le_target;
cl_int status = 0;
// le_target = (*(cl_uint *)(blk->work->device_target + 28));
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
memcpy(clState->cldata, blk->work->data, 80);
// flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
//pbkdf and initial sha
kernel = &clState->kernel;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->buffer1);
CL_SET_ARG(clState->buffer2);
CL_SET_ARG(clState->buffer3);
CL_SET_ARG(le_target);
//inactive kernel
num = 0;
kernel = clState->extra_kernels;
CL_SET_ARG_N(0,clState->buffer1);
CL_SET_ARG_N(1,clState->buffer2);
// CL_SET_ARG_N(3, clState->buffer3);
//mix2_2
num = 0;
CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8);
CL_SET_ARG_N(1,clState->buffer1);
CL_SET_ARG_N(2,clState->buffer2);
//mix2_2
//inactive kernel
num = 0;
CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
CL_SET_ARG_N(1, clState->buffer2);
//mix2_2
num = 0;
CL_NEXTKERNEL_SET_ARG_N(0, clState->padbuffer8);
CL_SET_ARG_N(1, clState->buffer1);
CL_SET_ARG_N(2, clState->buffer2);
//inactive kernel
num = 0;
CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
CL_SET_ARG_N(1, clState->buffer2);
//mix2_2
//pbkdf and finalization
num=0;
CL_NEXTKERNEL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->buffer2);
CL_SET_ARG(clState->buffer3);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{ {
cl_kernel *kernel = &clState->kernel; cl_kernel *kernel = &clState->kernel;
@ -1127,31 +1028,6 @@ static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
return status; return status;
} }
static cl_int queue_lbry_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->target + 24);
flip112(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 112, clState->cldata, 0, NULL, NULL);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
num = 0;
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static algorithm_settings_t algos[] = { static algorithm_settings_t algos[] = {
// kernels starting from this will have difficulty calculated by using litecoin algorithm // kernels starting from this will have difficulty calculated by using litecoin algorithm
#define A_SCRYPT(a) \ #define A_SCRYPT(a) \
@ -1185,16 +1061,6 @@ static algorithm_settings_t algos[] = {
A_DECRED("decred"), A_DECRED("decred"),
#undef A_DECRED #undef A_DECRED
#define A_YESCRYPT(a) \
{ a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
A_YESCRYPT("yescrypt"),
#undef A_YESCRYPT
#define A_YESCRYPT_MULTI(a) \
{ a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options}
A_YESCRYPT_MULTI("yescrypt-multi"),
#undef A_YESCRYPT_MULTI
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \ #define A_QUARK(a, b) \
{ a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options } { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }
@ -1253,8 +1119,6 @@ static algorithm_settings_t algos[] = {
{ "sia", ALGO_SIA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 128, 0, sia_regenhash, NULL, NULL, queue_sia_kernel, NULL, NULL }, { "sia", ALGO_SIA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 128, 0, sia_regenhash, NULL, NULL, queue_sia_kernel, NULL, NULL },
{ "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL }, { "vanilla", ALGO_VANILLA, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, blakecoin_midstate, blakecoin_prepare_work, queue_blake_kernel, gen_hash, NULL },
{ "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL },
{ "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL }, { "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL },

60
algorithm/lbry.c

@ -1,60 +0,0 @@
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_sha2.h"
#include "sph/sph_ripemd.h"
typedef struct {
sph_sha256_context sha256;
sph_sha512_context sha512;
sph_ripemd160_context ripemd;
} lbryhash_context_holder;
void lbryhash(void* output, const void* input)
{
uint32_t hashA[16], hashB[16], hashC[16];
lbryhash_context_holder ctx;
sph_sha256_init(&ctx.sha256);
sph_sha512_init(&ctx.sha512);
sph_ripemd160_init(&ctx.ripemd);
sph_sha256 (&ctx.sha256, input, 112);
sph_sha256_close(&ctx.sha256, hashA);
sph_sha256 (&ctx.sha256, hashA, 32);
sph_sha256_close(&ctx.sha256, hashA);
sph_sha512 (&ctx.sha512, hashA, 32);
sph_sha512_close(&ctx.sha512, hashA);
sph_ripemd160 (&ctx.ripemd, hashA, 32);
sph_ripemd160_close(&ctx.ripemd, hashB);
sph_ripemd160 (&ctx.ripemd, hashA+8, 32);
sph_ripemd160_close(&ctx.ripemd, hashC);
sph_sha256 (&ctx.sha256, hashB, 20);
sph_sha256 (&ctx.sha256, hashC, 20);
sph_sha256_close(&ctx.sha256, hashA);
sph_sha256 (&ctx.sha256, hashA, 32);
sph_sha256_close(&ctx.sha256, hashA);
memcpy(output, hashA, 32);
}
void lbry_regenhash(struct work *work)
{
uint32_t data[28];
uint32_t *nonce = (uint32_t *)(work->data + 108);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 27);
data[27] = htobe32(*nonce);
lbryhash(ohash, data);
}

8
algorithm/lbry.h

@ -1,8 +0,0 @@
#ifndef LBRY_H
#define LBRY_H
#include "miner.h"
extern void lbry_regenhash(struct work *work);
#endif

1364
algorithm/yescrypt-opt.c

File diff suppressed because it is too large Load Diff

119
algorithm/yescrypt.c

@ -1,119 +0,0 @@
/*-
* Copyright 2015 djm34
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "algorithm/yescrypt_core.h"
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
yescrypt_hash((unsigned char*)data,(unsigned char*)ohash);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void yescrypt_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
yescrypt_hash((unsigned char*)data, (unsigned char*)ohash);
}
bool scanhash_yescrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while (1)
{
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
yescrypt_hash((unsigned char*)data, (unsigned char*)ostate);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg))
{
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart))
{
*last_nonce = n;
break;
}
}
return ret;
}

10
algorithm/yescrypt.h

@ -1,10 +0,0 @@
#ifndef YESCRYPT_H
#define YESCRYPT_H
#include "miner.h"
#define YESCRYPT_SCRATCHBUF_SIZE (128 * 2048 * 8 ) //uchar
#define YESCRYP_SECBUF_SIZE (128*64*8)
extern int yescrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
extern void yescrypt_regenhash(struct work *work);
#endif /* YESCRYPT_H */

376
algorithm/yescrypt_core.h

@ -1,376 +0,0 @@
/*-
* Copyright 2009 Colin Percival
* Copyright 2013,2014 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#ifndef _YESCRYPT_H_
#define _YESCRYPT_H_
#include <stdint.h>
#include <stdlib.h> /* for size_t */
#include <errno.h>
#ifdef __cplusplus
extern "C" {
#endif
//extern void yescrypt_hash_sp(const unsigned char *input, unsigned char *output);
extern void yescrypt_hash(const unsigned char *input, unsigned char *output);
/**
* crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
* p, buflen) and write the result into buf. The parameters r, p, and buflen
* must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N
* must be a power of 2 greater than 1.
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as buf is local to the thread.
*/
extern int crypto_scrypt(const uint8_t * __passwd, size_t __passwdlen,
const uint8_t * __salt, size_t __saltlen,
uint64_t __N, uint32_t __r, uint32_t __p,
uint8_t * __buf, size_t __buflen);
/**
* Internal type used by the memory allocator. Please do not use it directly.
* Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since
* they might differ from each other in a future version.
*/
typedef struct {
void * base, * aligned;
size_t base_size, aligned_size;
} yescrypt_region_t;
/**
* Types for shared (ROM) and thread-local (RAM) data structures.
*/
typedef yescrypt_region_t yescrypt_shared1_t;
typedef struct {
yescrypt_shared1_t shared1;
uint32_t mask1;
} yescrypt_shared_t;
typedef yescrypt_region_t yescrypt_local_t;
/**
* Possible values for yescrypt_init_shared()'s flags argument.
*/
typedef enum {
YESCRYPT_SHARED_DEFAULTS = 0,
YESCRYPT_SHARED_PREALLOCATED = 0x100
} yescrypt_init_shared_flags_t;
/**
* Possible values for the flags argument of yescrypt_kdf(),
* yescrypt_gensalt_r(), yescrypt_gensalt(). These may be OR'ed together,
* except that YESCRYPT_WORM and YESCRYPT_RW are mutually exclusive.
* Please refer to the description of yescrypt_kdf() below for the meaning of
* these flags.
*/
typedef enum {
/* public */
YESCRYPT_WORM = 0,
YESCRYPT_RW = 1,
YESCRYPT_PARALLEL_SMIX = 2,
YESCRYPT_PWXFORM = 4,
/* private */
__YESCRYPT_INIT_SHARED_1 = 0x10000,
__YESCRYPT_INIT_SHARED_2 = 0x20000,
__YESCRYPT_INIT_SHARED = 0x30000
} yescrypt_flags_t;
#define YESCRYPT_KNOWN_FLAGS \
(YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | YESCRYPT_PWXFORM | \
__YESCRYPT_INIT_SHARED)
/**
* yescrypt_init_shared(shared, param, paramlen, N, r, p, flags, mask,
* buf, buflen):
* Optionally allocate memory for and initialize the shared (ROM) data
* structure. The parameters N, r, and p must satisfy the same conditions as
* with crypto_scrypt(). param and paramlen specify a local parameter with
* which the ROM is seeded. If buf is not NULL, then it is used to return
* buflen bytes of message digest for the initialized ROM (the caller may use
* this to verify that the ROM has been computed in the same way that it was on
* a previous run).
*
* Return 0 on success; or -1 on error.
*
* If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the
* ROM is assumed to have been preallocated by the caller, with
* shared->shared1.aligned being the start address of the ROM and
* shared->shared1.aligned_size being its size (which must be consistent with
* N, r, and p). This may be used e.g. when the ROM is to be placed in a SysV
* shared memory segment allocated by the caller.
*
* mask controls the frequency of ROM accesses by yescrypt_kdf(). Normally it
* should be set to 1, to interleave RAM and ROM accesses, which works well
* when both regions reside in the machine's RAM anyway. Other values may be
* used e.g. when the ROM is memory-mapped from a disk file. Recommended mask
* values are powers of 2 minus 1 or minus 2. Here's the effect of some mask
* values:
* mask value ROM accesses in SMix 1st loop ROM accesses in SMix 2nd loop
* 0 0 1/2
* 1 1/2 1/2
* 2 0 1/4
* 3 1/4 1/4
* 6 0 1/8
* 7 1/8 1/8
* 14 0 1/16
* 15 1/16 1/16
* 1022 0 1/1024
* 1023 1/1024 1/1024
*
* Actual computation of the ROM contents may be avoided, if you don't intend
* to use a ROM but need a dummy shared structure, by calling this function
* with NULL, 0, 0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0 for the
* arguments starting with param and on.
*
* MT-safe as long as shared is local to the thread.
*/
extern int yescrypt_init_shared(yescrypt_shared_t * __shared,
const uint8_t * __param, size_t __paramlen,
uint64_t __N, uint32_t __r, uint32_t __p,
yescrypt_init_shared_flags_t __flags, uint32_t __mask,
uint8_t * __buf, size_t __buflen);
/**
* yescrypt_free_shared(shared):
* Free memory that had been allocated with yescrypt_init_shared().
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as shared is local to the thread.
*/
extern int yescrypt_free_shared(yescrypt_shared_t * __shared);
/**
* yescrypt_init_local(local):
* Initialize the thread-local (RAM) data structure. Actual memory allocation
* is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r().
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as local is local to the thread.
*/
extern int yescrypt_init_local(yescrypt_local_t * __local);
/**
* yescrypt_free_local(local):
* Free memory that may have been allocated for an initialized thread-local
* (RAM) data structure.
*
* Return 0 on success; or -1 on error.
*
* MT-safe as long as local is local to the thread.
*/
extern int yescrypt_free_local(yescrypt_local_t * __local);
/**
* yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
* N, r, p, t, flags, buf, buflen):
* Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
* p, buflen), or a revision of scrypt as requested by flags and shared, and
* write the result into buf. The parameters N, r, p, and buflen must satisfy
* the same conditions as with crypto_scrypt(). t controls computation time
* while not affecting peak memory usage. shared and flags may request
* special modes as described below. local is the thread-local data
* structure, allowing to preserve and reuse a memory allocation across calls,
* thereby reducing its overhead.
*
* Return 0 on success; or -1 on error.
*
* t controls computation time. t = 0 is optimal in terms of achieving the
* highest area-time for ASIC attackers. Thus, higher computation time, if
* affordable, is best achieved by increasing N rather than by increasing t.
* However, if the higher memory usage (which goes along with higher N) is not
* affordable, or if fine-tuning of the time is needed (recall that N must be a
* power of 2), then t = 1 or above may be used to increase time while staying
* at the same peak memory usage. t = 1 increases the time by 25% and
* decreases the normalized area-time to 96% of optimal. (Of course, in
* absolute terms the area-time increases with higher t. It's just that it
* would increase slightly more with higher N*r rather than with higher t.)
* t = 2 increases the time by another 20% and decreases the normalized
* area-time to 89% of optimal. Thus, these two values are reasonable to use
* for fine-tuning. Values of t higher than 2 result in further increase in
* time while reducing the efficiency much further (e.g., down to around 50% of
* optimal for t = 5, which runs 3 to 4 times slower than t = 0, with exact
* numbers varying by the flags settings).
*
* Classic scrypt is available by setting t = 0 and flags to YESCRYPT_WORM and
* passing a dummy shared structure (see the description of
* yescrypt_init_shared() above for how to produce one). In this mode, the
* thread-local memory region (RAM) is first sequentially written to and then
* randomly read from. This algorithm is friendly towards time-memory
* tradeoffs (TMTO), available both to defenders (albeit not in this
* implementation) and to attackers.
*
* Setting YESCRYPT_RW adds extra random reads and writes to the thread-local
* memory region (RAM), which makes TMTO a lot less efficient. This may be
* used to slow down the kinds of attackers who would otherwise benefit from
* classic scrypt's efficient TMTO. Since classic scrypt's TMTO allows not
* only for the tradeoff, but also for a decrease of attacker's area-time (by
* up to a constant factor), setting YESCRYPT_RW substantially increases the
* cost of attacks in area-time terms as well. Yet another benefit of it is
* that optimal area-time is reached at an earlier time than with classic
* scrypt, and t = 0 actually corresponds to this earlier completion time,
* resulting in quicker hash computations (and thus in higher request rate
* capacity). Due to these properties, YESCRYPT_RW should almost always be
* set, except when compatibility with classic scrypt or TMTO-friendliness are
* desired.
*
* YESCRYPT_PARALLEL_SMIX moves parallelism that is present with p > 1 to a
* lower level as compared to where it is in classic scrypt. This reduces
* flexibility for efficient computation (for both attackers and defenders) by
* requiring that, short of resorting to TMTO, the full amount of memory be
* allocated as needed for the specified p, regardless of whether that
* parallelism is actually being fully made use of or not. (For comparison, a
* single instance of classic scrypt may be computed in less memory without any
* CPU time overhead, but in more real time, by not making full use of the
* parallelism.) This may be desirable when the defender has enough memory
* with sufficiently low latency and high bandwidth for efficient full parallel
* execution, yet the required memory size is high enough that some likely
* attackers might end up being forced to choose between using higher latency
* memory than they could use otherwise (waiting for data longer) or using TMTO
* (waiting for data more times per one hash computation). The area-time cost
* for other kinds of attackers (who would use the same memory type and TMTO
* factor or no TMTO either way) remains roughly the same, given the same
* running time for the defender. In the TMTO-friendly YESCRYPT_WORM mode, as
* long as the defender has enough memory that is just as fast as the smaller
* per-thread regions would be, doesn't expect to ever need greater
* flexibility (except possibly via TMTO), and doesn't need backwards
* compatibility with classic scrypt, there are no other serious drawbacks to
* this setting. In the YESCRYPT_RW mode, which is meant to discourage TMTO,
* this new approach to parallelization makes TMTO less inefficient. (This is
* an unfortunate side-effect of avoiding some random writes, as we have to in
* order to allow for parallel threads to access a common memory region without
* synchronization overhead.) Thus, in this mode this setting poses an extra
* tradeoff of its own (higher area-time cost for a subset of attackers vs.
* better TMTO resistance). Setting YESCRYPT_PARALLEL_SMIX also changes the
* way the running time is to be controlled from N*r*p (for classic scrypt) to
* N*r (in this modification). All of this applies only when p > 1. For
* p = 1, this setting is a no-op.
*
* Passing a real shared structure, with ROM contents previously computed by
* yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW for
* the thread-local RAM region. In order to allow for initialization of the
* ROM to be split into a separate program, the shared->shared1.aligned and
* shared->shared1.aligned_size fields may be set by the caller of
* yescrypt_kdf() manually rather than with yescrypt_init_shared().
*
* local must be initialized with yescrypt_init_local().
*
* MT-safe as long as local and buf are local to the thread.
*/
extern int yescrypt_kdf(const yescrypt_shared_t * __shared,
yescrypt_local_t * __local,
const uint8_t * __passwd, size_t __passwdlen,
const uint8_t * __salt, size_t __saltlen,
uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t,
yescrypt_flags_t __flags,
uint8_t * __buf, size_t __buflen);
/**
* yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen):
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the
* parameters and salt value encoded in setting. If the shared structure is
* not dummy, a ROM is used and YESCRYPT_RW is required. Otherwise, whether to
* use the YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
* discouraging modification) is determined by the setting string. shared and
* local must be initialized as described above for yescrypt_kdf(). buf must
* be large enough (as indicated by buflen) to hold the encoded hash string.
*
* Return the encoded hash string on success; or NULL on error.
*
* MT-safe as long as local and buf are local to the thread.
*/
extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared,
yescrypt_local_t * __local,
const uint8_t * __passwd, size_t __passwdlen,
const uint8_t * __setting,
uint8_t * __buf, size_t __buflen);
/**
* yescrypt(passwd, setting):
* Compute and encode an scrypt or enhanced scrypt hash of passwd given the
* parameters and salt value encoded in setting. Whether to use the
* YESCRYPT_WORM (classic scrypt) or YESCRYPT_RW (time-memory tradeoff
* discouraging modification) is determined by the setting string.
*
* Return the encoded hash string on success; or NULL on error.
*
* This is a crypt(3)-like interface, which is simpler to use than
* yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM,
* and it is slower than yescrypt_r() for repeated calls because it allocates
* and frees memory on each call.
*
* MT-unsafe.
*/
extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting);
/**
* yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen):
* Generate a setting string for use with yescrypt_r() and yescrypt() by
* encoding into it the parameters N_log2 (which is to be set to base 2
* logarithm of the desired value for N), r, p, flags, and a salt given by src
* (of srclen bytes). buf must be large enough (as indicated by buflen) to
* hold the setting string.
*
* Return the setting string on success; or NULL on error.
*
* MT-safe as long as buf is local to the thread.
*/
extern uint8_t * yescrypt_gensalt_r(
uint32_t __N_log2, uint32_t __r, uint32_t __p,
yescrypt_flags_t __flags,
const uint8_t * __src, size_t __srclen,
uint8_t * __buf, size_t __buflen);
/**
* yescrypt_gensalt(N_log2, r, p, flags, src, srclen):
* Generate a setting string for use with yescrypt_r() and yescrypt(). This
* function is the same as yescrypt_gensalt_r() except that it uses a static
* buffer and thus is not MT-safe.
*
* Return the setting string on success; or NULL on error.
*
* MT-unsafe.
*/
extern uint8_t * yescrypt_gensalt(
uint32_t __N_log2, uint32_t __r, uint32_t __p,
yescrypt_flags_t __flags,
const uint8_t * __src, size_t __srclen);
#ifdef __cplusplus
}
#endif
#endif /* !_YESCRYPT_H_ */

360
algorithm/yescryptcommon.c

@ -1,360 +0,0 @@
/*-
* Copyright 2013,2014 Alexander Peslyak
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algorithm/yescrypt_core.h"
#define BYTES2CHARS(bytes) \
((((bytes) * 8) + 5) / 6)
#define HASH_SIZE 32 /* bytes */
#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */
#define YESCRYPT_FLAGS (YESCRYPT_RW | YESCRYPT_PWXFORM)
static const char * const itoa64 =
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
static uint8_t * encode64_uint32(uint8_t * dst, size_t dstlen,
uint32_t src, uint32_t srcbits)
{
uint32_t bit;
for (bit = 0; bit < srcbits; bit += 6) {
if (dstlen < 1)
return NULL;
*dst++ = itoa64[src & 0x3f];
dstlen--;
src >>= 6;
}
return dst;
}
static uint8_t * encode64(uint8_t * dst, size_t dstlen,
const uint8_t * src, size_t srclen)
{
size_t i;
for (i = 0; i < srclen; ) {
uint8_t * dnext;
uint32_t value = 0, bits = 0;
do {
value |= (uint32_t)src[i++] << bits;
bits += 8;
} while (bits < 24 && i < srclen);
dnext = encode64_uint32(dst, dstlen, value, bits);
if (!dnext)
return NULL;
dstlen -= dnext - dst;
dst = dnext;
}
return dst;
}
static int decode64_one(uint32_t * dst, uint8_t src)
{
const char * ptr = strchr(itoa64, src);
if (ptr) {
*dst = ptr - itoa64;
return 0;
}
*dst = 0;
return -1;
}
static const uint8_t * decode64_uint32(uint32_t * dst, uint32_t dstbits,
const uint8_t * src)
{
uint32_t bit;
uint32_t value;
value = 0;
for (bit = 0; bit < dstbits; bit += 6) {
uint32_t one;
if (decode64_one(&one, *src)) {
*dst = 0;
return NULL;
}
src++;
value |= one << bit;
}
*dst = value;
return src;
}
uint8_t *
yescrypt_r(const yescrypt_shared_t * shared, yescrypt_local_t * local,
const uint8_t * passwd, size_t passwdlen,
const uint8_t * setting,
uint8_t * buf, size_t buflen)
{
uint8_t hash[HASH_SIZE];
const uint8_t * src, * salt;
uint8_t * dst;
size_t prefixlen, saltlen, need;
uint8_t version;
uint64_t N;
uint32_t r, p;
yescrypt_flags_t flags = YESCRYPT_WORM;
fflush(stdout);
if (setting[0] != '$' || setting[1] != '7')
{
fflush(stdout);
return NULL;
}
fflush(stdout);
src = setting + 2;
fflush(stdout);
switch ((version = *src)) {
case '$':
fflush(stdout);
break;
case 'X':
src++;
flags = YESCRYPT_RW;
fflush(stdout);
break;
default:
{
fflush(stdout);
return NULL;
}
}
fflush(stdout);
if (*src != '$') {
uint32_t decoded_flags;
if (decode64_one(&decoded_flags, *src))
{
fflush(stdout);
return NULL;
}
flags = decoded_flags;
if (*++src != '$')
{
fflush(stdout);
return NULL;
}
}
src++;
{
uint32_t N_log2;
if (decode64_one(&N_log2, *src))
{
return NULL;
}
src++;
N = (uint64_t)1 << N_log2;
}
src = decode64_uint32(&r, 30, src);
if (!src)
{
return NULL;
}
src = decode64_uint32(&p, 30, src);
if (!src)
{
return NULL;
}
prefixlen = src - setting;
salt = src;
src = (uint8_t *)strrchr((char *)salt, '$');
if (src)
saltlen = src - salt;
else
saltlen = strlen((char *)salt);
need = prefixlen + saltlen + 1 + HASH_LEN + 1;
if (need > buflen || need < saltlen)
{
fflush(stdout);
return NULL;
}
fflush(stdout);
if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen,
N, r, p, 0, flags, hash, sizeof(hash)))
{
fflush(stdout);
return NULL;
}
dst = buf;
memcpy(dst, setting, prefixlen + saltlen);
dst += prefixlen + saltlen;
*dst++ = '$';
dst = encode64(dst, buflen - (dst - buf), hash, sizeof(hash));
/* Could zeroize hash[] here, but yescrypt_kdf() doesn't zeroize its
* memory allocations yet anyway. */
if (!dst || dst >= buf + buflen) /* Can't happen */
{
return NULL;
}
*dst = 0; /* NUL termination */
fflush(stdout);
return buf;
}
uint8_t *
yescrypt(const uint8_t * passwd, const uint8_t * setting)
{
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1];
yescrypt_shared_t shared;
yescrypt_local_t local;
uint8_t * retval;
if (yescrypt_init_shared(&shared, NULL, 0,
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
return NULL;
if (yescrypt_init_local(&local)) {
yescrypt_free_shared(&shared);
return NULL;
}
retval = yescrypt_r(&shared, &local,
passwd, 80, setting, buf, sizeof(buf));
// printf("hashse='%s'\n", (char *)retval);
if (yescrypt_free_local(&local)) {
yescrypt_free_shared(&shared);
return NULL;
}
if (yescrypt_free_shared(&shared))
return NULL;
return retval;
}
uint8_t *
yescrypt_gensalt_r(uint32_t N_log2, uint32_t r, uint32_t p,
yescrypt_flags_t flags,
const uint8_t * src, size_t srclen,
uint8_t * buf, size_t buflen)
{
uint8_t * dst;
size_t prefixlen = 3 + 1 + 5 + 5;
size_t saltlen = BYTES2CHARS(srclen);
size_t need;
if (p == 1)
flags &= ~YESCRYPT_PARALLEL_SMIX;
if (flags) {
if (flags & ~0x3f)
return NULL;
prefixlen++;
if (flags != YESCRYPT_RW)
prefixlen++;
}
need = prefixlen + saltlen + 1;
if (need > buflen || need < saltlen || saltlen < srclen)
return NULL;
if (N_log2 > 63 || ((uint64_t)r * (uint64_t)p >= (1U << 30)))
return NULL;
dst = buf;
*dst++ = '$';
*dst++ = '7';
if (flags) {
*dst++ = 'X'; /* eXperimental, subject to change */
if (flags != YESCRYPT_RW)
*dst++ = itoa64[flags];
}
*dst++ = '$';
*dst++ = itoa64[N_log2];
dst = encode64_uint32(dst, buflen - (dst - buf), r, 30);
if (!dst) /* Can't happen */
return NULL;
dst = encode64_uint32(dst, buflen - (dst - buf), p, 30);
if (!dst) /* Can't happen */
return NULL;
dst = encode64(dst, buflen - (dst - buf), src, srclen);
if (!dst || dst >= buf + buflen) /* Can't happen */
return NULL;
*dst = 0; /* NUL termination */
return buf;
}
uint8_t *
yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p,
yescrypt_flags_t flags,
const uint8_t * src, size_t srclen)
{
static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1];
return yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen,
buf, sizeof(buf));
}
static int
yescrypt_bsty(const uint8_t * passwd, size_t passwdlen,
const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
uint8_t * buf, size_t buflen)
{
static __thread int initialized = 0;
static __thread yescrypt_shared_t shared;
static __thread yescrypt_local_t local;
// static __declspec(thread) int initialized = 0;
// static __declspec(thread) yescrypt_shared_t shared;
// static __declspec(thread) yescrypt_local_t local;
int retval;
if (!initialized) {
/* "shared" could in fact be shared, but it's simpler to keep it private
* along with "local". It's dummy and tiny anyway. */
if (yescrypt_init_shared(&shared, NULL, 0,
0, 0, 0, YESCRYPT_SHARED_DEFAULTS, 0, NULL, 0))
return -1;
if (yescrypt_init_local(&local)) {
yescrypt_free_shared(&shared);
return -1;
}
initialized = 1;
}
retval = yescrypt_kdf(&shared, &local,
passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS,
buf, buflen);
return retval;
}
void yescrypt_hash(const unsigned char *input, unsigned char *output)
{
yescrypt_bsty((const uint8_t *)input, 80, (const uint8_t *) input, 80, 2048, 8, 1, (uint8_t *)output, 32);
}

179
kernel/lbry.cl

@ -1,179 +0,0 @@
#include "sha256.cl"
#include "wolf-sha512.cl"
#include "ripemd160.cl"
#define SWAP32(x) as_uint(as_uchar4(x).s3210)
#define SWAP64(x) as_ulong(as_uchar8(x).s76543210)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint *input, __global uint8 *ctx)
{
// SHA256 takes 16 uints of input per block - we have 112 bytes to process
// 8 * 16 == 64, meaning two block transforms.
uint SHA256Buf[16];
uint gid = get_global_id(0);
// Remember the last four is the nonce - so 108 bytes / 4 bytes per dword
#pragma unroll
for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]);
// SHA256 initialization constants
uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
#pragma unroll
for(int i = 0; i < 3; ++i)
{
if(i == 1)
{
#pragma unroll
for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
SHA256Buf[11] = SWAP32(gid);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
}
if(i == 2)
{
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
#pragma unroll
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
}
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
}
/*
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
#pragma unroll
for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
SHA256Buf[11] = SWAP32(gid);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/
/*
//outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
//outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf);
// outbuf would normall be SWAP32'd here, but it'll need it again
// once we use it as input to the next SHA256, so it negates.
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/
outbuf.s0 = SWAP32(outbuf.s0);
outbuf.s1 = SWAP32(outbuf.s1);
outbuf.s2 = SWAP32(outbuf.s2);
outbuf.s3 = SWAP32(outbuf.s3);
outbuf.s4 = SWAP32(outbuf.s4);
outbuf.s5 = SWAP32(outbuf.s5);
outbuf.s6 = SWAP32(outbuf.s6);
outbuf.s7 = SWAP32(outbuf.s7);
ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global uint8 *ctx)
{
ulong W[16] = { 0UL }, SHA512Out[8];
uint SHA256Buf[16];
uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
((uint8 *)W)[0] = outbuf;
for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]);
W[4] = 0x8000000000000000UL;
W[15] = 0x0000000000000100UL;
for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i];
SHA512Block(W, SHA512Out);
for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]);
uint RMD160_0[16] = { 0U };
uint RMD160_1[16] = { 0U };
uint RMD160_0_Out[5], RMD160_1_Out[5];
for(int i = 0; i < 4; ++i)
{
((ulong *)RMD160_0)[i] = SHA512Out[i];
((ulong *)RMD160_1)[i] = SHA512Out[i + 4];
}
RMD160_0[8] = RMD160_1[8] = 0x00000080;
RMD160_0[14] = RMD160_1[14] = 0x00000100;
for(int i = 0; i < 5; ++i)
{
RMD160_0_Out[i] = RMD160_IV[i];
RMD160_1_Out[i] = RMD160_IV[i];
}
RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out);
RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out);
for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]);
for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]);
SHA256Buf[10] = 0x80000000;
for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U;
SHA256Buf[15] = 0x00000140;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target)
{
uint SHA256Buf[16] = { 0U };
uint gid = get_global_id(0);
uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
outbuf.s6 = SWAP32(outbuf.s6);
outbuf.s7 = SWAP32(outbuf.s7);
if(as_ulong(outbuf.s67) <= target)
output[atomic_inc(output+0xFF)] = SWAP32(gid);
}

314
kernel/yescrypt-multi.cl

@ -1,314 +0,0 @@
/*
* "yescrypt" kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2015 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
#if !defined(cl_khr_byte_addressable_store)
#error "Device does not support unaligned stores"
#endif
#include "yescrypt_essential.cl"
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target)
{
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
uint nonce = (get_global_id(0));
uint data[20];
uint16 in;
uint8 state1, state2;
// uint8 sha256tokeep;
// ulong16 Bdev[8]; // will require an additional buffer
((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); }
uint8 passwd = sha256_80(data, nonce);
//pbkdf
in.lo = pad1.lo ^ passwd;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ passwd;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
in = ((uint16*)data)[0];
state1 = sha256_Transform(in, state1);
#pragma unroll 1
for (int i = 0; i<8; i++)
{
uint16 result;
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 1;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.lo = swapvec(sha256_Transform(in, state2));
if (i == 0) sha256tokeep[0] = result.lo;
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 2;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.hi = swapvec(sha256_Transform(in, state2));
Bdev[i].lo = as_ulong8(shuffle(result));
// Bdev[i].lo = as_ulong8(result);
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 3;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.lo = swapvec(sha256_Transform(in, state2));
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 4;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.hi = swapvec(sha256_Transform(in, state2));
Bdev[i].hi = as_ulong8(shuffle(result));
// Bdev[i].hi = as_ulong8(result);
}
//mixing1
prevstate[0] = Bdev[0];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
prevstate[1] = Bdev[0];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
uint n = 1;
#pragma unroll 1
for (uint i = 2; i < 64; i++)
{
prevstate[i] = Bdev[0];
if ((i&(i - 1)) == 0) n = n << 1;
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1);
j += i - n;
Bdev[0] ^= prevstate[j];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
}
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global uchar *buffer1, __global uchar *buffer2)
{
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2)
{
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
for (int i = 0; i<8; i++)
hashbuffer[i] = Bdev[i];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
for (int i = 0; i<8; i++)
hashbuffer[i + 8] = Bdev[i];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
int n = 1;
#pragma unroll 1
for (int i = 2; i < 2048; i ++)
{
for (int k = 0; k<8; k++)
(hashbuffer + 8 * i)[k] = Bdev[k];
if ((i&(i - 1)) == 0) n = n << 1;
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1);
j += i - n;
for (int k = 0; k < 8; k++)
Bdev[k] ^= (hashbuffer + 8 * j)[k];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
}
}
/*
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search3(__global uchar *buffer1, __global uchar *buffer2)
{
}
*/
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2)
{
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
#pragma unroll 1
for (int z = 0; z < 684; z++)
{
uint j = as_uint2(Bdev[7].hi.s0).x & 2047;
for (int k = 0; k < 8; k++)
Bdev[k] ^= (hashbuffer + 8 * j)[k];
if (z<682)
for (int k = 0; k<8; k++)
(hashbuffer + 8 * j)[k] = Bdev[k];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
////
}
}
/*
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search5(__global uchar *buffer1, __global uchar *buffer2)
{
}
*/
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target)
{
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
uint nonce = (get_global_id(0));
uint data[20];
((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
uint8 swpass = swapvec(sha256tokeep[0]);
uint16 in;
uint8 state1,state2;
in.lo = pad1.lo ^ swpass;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ swpass;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
#pragma unroll 1
for (int i = 0; i<8; i++) {
in = unshuffle(Bdev[i].lo);
in = swapvec16(in);
state1 = sha256_Transform(in, state1);
in = unshuffle(Bdev[i].hi);
in = swapvec16(in);
state1 = sha256_Transform(in, state1);
}
in = pad5;
state1 = sha256_Transform(in, state1);
in.lo = state1;
in.hi = pad4;
uint8 res = sha256_Transform(in, state2);
//hmac and final sha
in.lo = pad1.lo ^ res;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ res;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
in = ((uint16*)data)[0];
state1 = sha256_Transform(in, state1);
in = padsha80;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = get_global_id(0);
in.sf = 0x480;
state1 = sha256_Transform(in, state1);
in.lo = state1;
in.hi = pad4;
state1 = sha256_Transform(in, state2);
// state2 = H256;
in.lo = state1;
in.hi = pad4;
in.sf = 0x100;
res = sha256_Transform(in, H256);
if (SWAP32(res.s7) <= (target))
output[atomic_inc(output + 0xFF)] = (nonce);
}

253
kernel/yescrypt.cl

@ -1,253 +0,0 @@
/*
* "yescrypt" kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2015 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
#if !defined(cl_khr_byte_addressable_store)
#error "Device does not support unaligned stores"
#endif
#include "yescrypt_essential.cl"
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, const uint target)
{
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS)));
uint nonce = (get_global_id(0));
uint data[20];
uint16 in;
uint8 state1, state2;
uint8 sha256tokeep;
((uint16 *)data)[0] = ((__global const uint16 *)input)[0];
((uint4 *)data)[4] = ((__global const uint4 *)input)[4];
for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); }
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); }
uint8 passwd = sha256_80(data, nonce);
//pbkdf
in.lo = pad1.lo ^ passwd;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ passwd;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
in = ((uint16*)data)[0];
state1 = sha256_Transform(in, state1);
#pragma unroll 1
for (int i = 0; i<8; i++)
{
uint16 result;
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 1;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.lo = swapvec(sha256_Transform(in, state2));
if (i == 0) sha256tokeep = result.lo;
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 2;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.hi = swapvec(sha256_Transform(in, state2));
Bdev[i].lo = as_ulong8(shuffle(result));
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 3;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.lo = swapvec(sha256_Transform(in, state2));
in = pad3;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
in.s4 = 4 * i + 4;
in.lo = sha256_Transform(in, state1);
in.hi = pad4;
result.hi = swapvec(sha256_Transform(in, state2));
Bdev[i].hi = as_ulong8(shuffle(result));
}
//mixing1
prevstate[0] = Bdev[0];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
prevstate[1] = Bdev[0];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
uint n = 1;
#pragma unroll 1
for (uint i = 2; i < 64; i++)
{
prevstate[i] = Bdev[0];
if ((i&(i - 1)) == 0) n = n << 1;
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1);
j += i - n;
Bdev[0] ^= prevstate[j];
Bdev[0] = blockmix_salsa8_small2(Bdev[0]);
}
for (int i = 0; i<8; i++)
hashbuffer[i] = Bdev[i];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
for (int i = 0; i<8; i++)
hashbuffer[i + 8] = Bdev[i];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
n = 1;
#pragma unroll 1
for (int i = 2; i < 2048; i++)
{
for (int k = 0; k<8; k++)
(hashbuffer + 8 * i)[k] = Bdev[k];
if ((i&(i - 1)) == 0) n = n << 1;
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1);
j += i - n;
for (int k = 0; k < 8; k++)
Bdev[k] ^= (hashbuffer + 8 * j)[k];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
}
#pragma unroll 1
for (int z = 0; z < 684; z++)
{
uint j = as_uint2(Bdev[7].hi.s0).x & 2047;
for (int k = 0; k < 8; k++)
Bdev[k] ^= (hashbuffer + 8 * j)[k];
if (z<682)
for (int k = 0; k<8; k++)
(hashbuffer + 8 * j)[k] = Bdev[k];
blockmix_pwxform((__global ulong8*)Bdev, prevstate);
////
}
uint8 swpass = swapvec(sha256tokeep);
// uint16 in;
// uint8 state1, state2;
in.lo = pad1.lo ^ swpass;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ swpass;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
#pragma unroll 1
for (int i = 0; i<8; i++) {
in = unshuffle(Bdev[i].lo);
in = swapvec16(in);
state1 = sha256_Transform(in, state1);
in = unshuffle(Bdev[i].hi);
in = swapvec16(in);
state1 = sha256_Transform(in, state1);
}
in = pad5;
state1 = sha256_Transform(in, state1);
in.lo = state1;
in.hi = pad4;
uint8 res = sha256_Transform(in, state2);
//hmac and final sha
in.lo = pad1.lo ^ res;
in.hi = pad1.hi;
state1 = sha256_Transform(in, H256);
in.lo = pad2.lo ^ res;
in.hi = pad2.hi;
state2 = sha256_Transform(in, H256);
in = ((uint16*)data)[0];
state1 = sha256_Transform(in, state1);
in = padsha80;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = get_global_id(0);
in.sf = 0x480;
state1 = sha256_Transform(in, state1);
in.lo = state1;
in.hi = pad4;
state1 = sha256_Transform(in, state2);
// state2 = H256;
in.lo = state1;
in.hi = pad4;
in.sf = 0x100;
res = sha256_Transform(in, H256);
if (SWAP32(res.s7) <= (target))
output[atomic_inc(output + 0xFF)] = (nonce);
}

760
kernel/yescrypt_essential.cl

@ -1,760 +0,0 @@
/*
* "yescrypt" kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2015 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author djm34
*/
#define ROL32(x, n) rotate(x, (uint) n)
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))
//#define ROL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define HASH_MEMORY 4096
#define SALSA(a,b,c,d) do { \
t =a+d; b^=ROL32(t, 7U); \
t =b+a; c^=ROL32(t, 9U); \
t =c+b; d^=ROL32(t, 13U); \
t =d+c; a^=ROL32(t, 18U); \
} while(0)
#define SALSA_CORE(state) do { \
\
SALSA(state.s0,state.s4,state.s8,state.sc); \
SALSA(state.s5,state.s9,state.sd,state.s1); \
SALSA(state.sa,state.se,state.s2,state.s6); \
SALSA(state.sf,state.s3,state.s7,state.sb); \
SALSA(state.s0,state.s1,state.s2,state.s3); \
SALSA(state.s5,state.s6,state.s7,state.s4); \
SALSA(state.sa,state.sb,state.s8,state.s9); \
SALSA(state.sf,state.sc,state.sd,state.se); \
} while(0)
#define uSALSA_CORE(state) do { \
\
SALSA(state.s0,state.s4,state.s8,state.sc); \
SALSA(state.s1,state.s5,state.s9,state.sd); \
SALSA(state.s2,state.s6,state.sa,state.se); \
SALSA(state.s3,state.s7,state.sb,state.sf); \
SALSA(state.s0,state.sd,state.sa,state.s7); \
SALSA(state.s1,state.se,state.sb,state.s4); \
SALSA(state.s2,state.sf,state.s8,state.s5); \
SALSA(state.s3,state.sc,state.s9,state.s6); \
} while(0)
#define unshuffle(state) (as_uint16(state).s0da741eb852fc963)
#define shuffle(state) (as_uint16(state).s05af49e38d27c16b)
static __constant uint16 pad1 =
{
0x36363636, 0x36363636, 0x36363636, 0x36363636,
0x36363636, 0x36363636, 0x36363636, 0x36363636,
0x36363636, 0x36363636, 0x36363636, 0x36363636,
0x36363636, 0x36363636, 0x36363636, 0x36363636
};
static __constant uint16 pad2 =
{
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c,
0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c, 0x5c5c5c5c
};
static __constant uint16 pad5 =
{
0x00000001, 0x80000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00002220
};
static __constant uint16 pad3 =
{
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x80000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x000004a0
};
static __constant uint16 padsha80 =
{
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000280
};
static __constant uint8 pad4 =
{
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000300
};
static __constant uint8 H256 = {
0x6A09E667, 0xBB67AE85, 0x3C6EF372,
0xA54FF53A, 0x510E527F, 0x9B05688C,
0x1F83D9AB, 0x5BE0CD19
};
inline uint8 swapvec(uint8 buf)
{
uint8 vec;
vec.s0 = SWAP32(buf.s0);
vec.s1 = SWAP32(buf.s1);
vec.s2 = SWAP32(buf.s2);
vec.s3 = SWAP32(buf.s3);
vec.s4 = SWAP32(buf.s4);
vec.s5 = SWAP32(buf.s5);
vec.s6 = SWAP32(buf.s6);
vec.s7 = SWAP32(buf.s7);
return vec;
}
inline uint16 swapvec16(uint16 buf)
{
uint16 vec;
vec.s0 = SWAP32(buf.s0);
vec.s1 = SWAP32(buf.s1);
vec.s2 = SWAP32(buf.s2);
vec.s3 = SWAP32(buf.s3);
vec.s4 = SWAP32(buf.s4);
vec.s5 = SWAP32(buf.s5);
vec.s6 = SWAP32(buf.s6);
vec.s7 = SWAP32(buf.s7);
vec.s8 = SWAP32(buf.s8);
vec.s9 = SWAP32(buf.s9);
vec.sa = SWAP32(buf.sa);
vec.sb = SWAP32(buf.sb);
vec.sc = SWAP32(buf.sc);
vec.sd = SWAP32(buf.sd);
vec.se = SWAP32(buf.se);
vec.sf = SWAP32(buf.sf);
return vec;
}
ulong8 salsa20_8(uint16 Bx)
{
uint t;
uint16 st = Bx;
uSALSA_CORE(st);
uSALSA_CORE(st);
uSALSA_CORE(st);
uSALSA_CORE(st);
return(as_ulong8(st + Bx));
}
ulong8 salsa20_8n(uint16 Bx)
{
uint t;
uint16 st = Bx;
SALSA_CORE(st);
SALSA_CORE(st);
SALSA_CORE(st);
SALSA_CORE(st);
return(as_ulong8(st + Bx));
}
ulong16 blockmix_salsa8_small2(ulong16 Bin)
{
ulong8 X = Bin.hi;
X ^= Bin.lo;
X = salsa20_8(as_uint16(X));
Bin.lo = X;
X ^= Bin.hi;
X = salsa20_8(as_uint16(X));
Bin.hi = X;
return(Bin);
}
/*
uint16 salsa20_8_2(uint16 Bx)
{
uint t;
uint16 st = Bx;
uSALSA_CORE(st);
uSALSA_CORE(st);
uSALSA_CORE(st);
uSALSA_CORE(st);
return(st + Bx);
}
ulong16 blockmix_salsa8_small2(ulong16 Bin)
{
uint16 X = as_uint16(Bin.hi);
X ^= as_uint16(Bin.lo);
X = salsa20_8_2(as_uint16(X));
Bin.lo = as_ulong8(X);
X ^= as_uint16(Bin.hi);
X = salsa20_8_2(as_uint16(X));
Bin.hi = as_ulong8(X);
return(Bin);
}
*/
inline ulong2 madd4long2(uint4 a, uint4 b)
{
uint4 result;
result.x = a.x*a.y + b.x;
result.y = b.y + mad_hi(a.x, a.y, b.x);
result.z = a.z*a.w + b.z;
result.w = b.w + mad_hi(a.z, a.w, b.z);
return as_ulong2(result);
}
inline ulong2 madd4long3(uint4 a, ulong2 b)
{
ulong2 result;
result.x = (ulong)a.x*(ulong)a.y + b.x;
result.y = (ulong)a.z*(ulong)a.w + b.y;
return result;
}
inline ulong8 block_pwxform_long_old(ulong8 Bout, __global ulong16 *prevstate)
{
ulong2 vec = Bout.lo.lo;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = ((__global ulong2*)(prevstate ))[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
vec ^= p1;
}
Bout.lo.lo = vec;
vec = Bout.lo.hi;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = ((__global ulong2*)(prevstate))[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
vec ^= p1;
}
Bout.lo.hi = vec;
vec = Bout.hi.lo;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = ((__global ulong2*)(prevstate))[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
vec ^= p1;
}
Bout.hi.lo = vec;
vec = Bout.hi.hi;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = ((__global ulong2*)(prevstate))[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = ((__global ulong2*)(prevstate + 32))[x.y];
vec ^= p1;
}
Bout.hi.hi = vec;
return(Bout);
}
inline ulong8 block_pwxform_long(ulong8 Bout, __global ulong2 *prevstate)
{
ulong2 vec = Bout.lo.lo;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = prevstate[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = (prevstate + 32*8)[x.y];
vec ^= p1;
}
Bout.lo.lo = vec;
vec = Bout.lo.hi;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = prevstate[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = (prevstate + 32 * 8)[x.y];
vec ^= p1;
}
Bout.lo.hi = vec;
vec = Bout.hi.lo;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = prevstate[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = (prevstate + 32 * 8)[x.y];
vec ^= p1;
}
Bout.hi.lo = vec;
vec = Bout.hi.hi;
for (int i = 0; i < 6; i++)
{
ulong2 p0, p1;
uint2 x = as_uint2((vec.x >> 4) & 0x000000FF000000FF);
p0 = prevstate[x.x];
vec = madd4long3(as_uint4(vec), p0);
p1 = (prevstate + 32 * 8)[x.y];
vec ^= p1;
}
Bout.hi.hi = vec;
return(Bout);
}
inline void blockmix_pwxform(__global ulong8 *Bin, __global ulong16 *prevstate)
{
Bin[0] ^= Bin[15];
Bin[0] = block_pwxform_long_old(Bin[0], prevstate);
#pragma unroll 1
for (int i = 1; i < 16; i++)
{
Bin[i] ^= Bin[i - 1];
Bin[i] = block_pwxform_long_old(Bin[i], prevstate);
}
Bin[15] = salsa20_8(as_uint16(Bin[15]));
}
#define SHR(x, n) ((x) >> n)
#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3))
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10))
#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
#define P(a,b,c,d,e,f,g,h,x,K) \
{ \
temp1 = h + S3(e) + F1(e,f,g) + (K + x); \
d += temp1; h = temp1 + S2(a) + F0(a,b,c); \
}
#define PLAST(a,b,c,d,e,f,g,h,x,K) \
{ \
d += h + S3(e) + F1(e,f,g) + (x + K); \
}
#define F0(y, x, z) bitselect(z, y, z ^ x)
#define F1(x, y, z) bitselect(z, y, x)
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
#define RD14 (S1(W12) + W7 + S0(W15) + W14)
#define RD15 (S1(W13) + W8 + S0(W0) + W15)
/// generic sha transform
inline uint8 sha256_Transform(uint16 data, uint8 state)
{
uint temp1;
uint8 res = state;
uint W0 = data.s0;
uint W1 = data.s1;
uint W2 = data.s2;
uint W3 = data.s3;
uint W4 = data.s4;
uint W5 = data.s5;
uint W6 = data.s6;
uint W7 = data.s7;
uint W8 = data.s8;
uint W9 = data.s9;
uint W10 = data.sA;
uint W11 = data.sB;
uint W12 = data.sC;
uint W13 = data.sD;
uint W14 = data.sE;
uint W15 = data.sF;
#define v0 res.s0
#define v1 res.s1
#define v2 res.s2
#define v3 res.s3
#define v4 res.s4
#define v5 res.s5
#define v6 res.s6
#define v7 res.s7
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
#undef v0
#undef v1
#undef v2
#undef v3
#undef v4
#undef v5
#undef v6
#undef v7
return (res+state);
}
static inline uint8 sha256_round1(uint16 data)
{
uint temp1;
uint8 res;
uint W0 = data.s0;
uint W1 = data.s1;
uint W2 = data.s2;
uint W3 = data.s3;
uint W4 = data.s4;
uint W5 = data.s5;
uint W6 = data.s6;
uint W7 = data.s7;
uint W8 = data.s8;
uint W9 = data.s9;
uint W10 = data.sA;
uint W11 = data.sB;
uint W12 = data.sC;
uint W13 = data.sD;
uint W14 = data.sE;
uint W15 = data.sF;
uint v0 = 0x6A09E667;
uint v1 = 0xBB67AE85;
uint v2 = 0x3C6EF372;
uint v3 = 0xA54FF53A;
uint v4 = 0x510E527F;
uint v5 = 0x9B05688C;
uint v6 = 0x1F83D9AB;
uint v7 = 0x5BE0CD19;
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
res.s0 = v0 + 0x6A09E667;
res.s1 = v1 + 0xBB67AE85;
res.s2 = v2 + 0x3C6EF372;
res.s3 = v3 + 0xA54FF53A;
res.s4 = v4 + 0x510E527F;
res.s5 = v5 + 0x9B05688C;
res.s6 = v6 + 0x1F83D9AB;
res.s7 = v7 + 0x5BE0CD19;
return (res);
}
static inline uint8 sha256_round2(uint16 data,uint8 buf)
{
uint temp1;
uint8 res;
uint W0 = data.s0;
uint W1 = data.s1;
uint W2 = data.s2;
uint W3 = data.s3;
uint W4 = data.s4;
uint W5 = data.s5;
uint W6 = data.s6;
uint W7 = data.s7;
uint W8 = data.s8;
uint W9 = data.s9;
uint W10 = data.sA;
uint W11 = data.sB;
uint W12 = data.sC;
uint W13 = data.sD;
uint W14 = data.sE;
uint W15 = data.sF;
uint v0 = buf.s0;
uint v1 = buf.s1;
uint v2 = buf.s2;
uint v3 = buf.s3;
uint v4 = buf.s4;
uint v5 = buf.s5;
uint v6 = buf.s6;
uint v7 = buf.s7;
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
res.s0 = (v0 + buf.s0);
res.s1 = (v1 + buf.s1);
res.s2 = (v2 + buf.s2);
res.s3 = (v3 + buf.s3);
res.s4 = (v4 + buf.s4);
res.s5 = (v5 + buf.s5);
res.s6 = (v6 + buf.s6);
res.s7 = (v7 + buf.s7);
return (res);
}
static inline uint8 sha256_80(uint* data,uint nonce)
{
uint8 buf = sha256_round1( ((uint16*)data)[0]);
uint16 in = padsha80;
in.s0 = data[16];
in.s1 = data[17];
in.s2 = data[18];
in.s3 = nonce;
return(sha256_round2(in,buf));
}

124
ocl.c

@ -36,7 +36,6 @@
#include "ocl/binary_kernel.h" #include "ocl/binary_kernel.h"
#include "algorithm/neoscrypt.h" #include "algorithm/neoscrypt.h"
#include "algorithm/pluck.h" #include "algorithm/pluck.h"
#include "algorithm/yescrypt.h"
#include "algorithm/lyra2rev2.h" #include "algorithm/lyra2rev2.h"
/* FIXME: only here for global config vars, replace with configuration.h /* FIXME: only here for global config vars, replace with configuration.h
@ -500,91 +499,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
} }
// Yescrypt TC
else if ((cgpu->algorithm.type == ALGO_YESCRYPT ||
algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) {
size_t glob_thread_count;
long max_int;
unsigned char type = 0;
// determine which intensity type to use
if (cgpu->rawintensity > 0) {
glob_thread_count = cgpu->rawintensity;
max_int = glob_thread_count;
type = 2;
}
else if (cgpu->xintensity > 0) {
glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
max_int = cgpu->xintensity;
type = 1;
}
else {
glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
}
glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
// if TC * scratchbuf size is too big for memory... reduce to max
if ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
/* Selected intensity will not run on this GPU. Not enough memory.
* Adapt the memory setting. */
// depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
switch (type) {
//raw intensity
case 2:
while ((glob_thread_count * YESCRYPT_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
--glob_thread_count;
}
max_int = glob_thread_count;
cgpu->rawintensity = glob_thread_count;
break;
//x intensity
case 1:
glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
max_int = glob_thread_count / clState->compute_shaders;
while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
--max_int;
}
/* Check if max_intensity is >0. */
if (max_int < MIN_XINTENSITY) {
applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
max_int = MIN_XINTENSITY;
}
cgpu->xintensity = max_int;
glob_thread_count = clState->compute_shaders * (1UL << max_int);
break;
default:
glob_thread_count = cgpu->max_alloc / YESCRYPT_SCRATCHBUF_SIZE;
while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
--max_int;
}
/* Check if max_intensity is >0. */
if (max_int < MIN_INTENSITY) {
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
max_int = MIN_INTENSITY;
}
cgpu->intensity = max_int;
glob_thread_count = 1UL << max_int;
break;
}
}
// TC is glob thread count
cgpu->thread_concurrency = glob_thread_count;
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
}
// Lyra2re v2 TC // Lyra2re v2 TC
else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) { else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) {
size_t glob_thread_count; size_t glob_thread_count;
@ -788,26 +702,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
// scrypt/n-scrypt // scrypt/n-scrypt
} }
else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
buf1size = PLUCK_SECBUF_SIZE * cgpu->thread_concurrency;
buf2size = 128 * 8 * 8 * cgpu->thread_concurrency;
buf3size= 8 * 8 * 4 * cgpu->thread_concurrency;
/* This is the input buffer. For yescrypt this is guaranteed to be
* 80 bytes only. */
readbufsize = 80;
applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
// scrypt/n-scrypt
}
else if (algorithm->type == ALGO_LYRA2REV2) { else if (algorithm->type == ALGO_LYRA2REV2) {
/* The scratch/pad-buffer needs 32kBytes memory per thread. */ /* The scratch/pad-buffer needs 32kBytes memory per thread. */
bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
buf1size = 4* 8 * cgpu->thread_concurrency; //matrix buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
/* This is the input buffer. For yescrypt this is guaranteed to be /* This is the input buffer.*/
* 80 bytes only. */
readbufsize = 80; readbufsize = 80;
applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size); applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size);
@ -839,27 +739,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
} }
if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { if (algorithm->type == ALGO_LYRA2REV2) {
// need additionnal buffers
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer1) {
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
return NULL;
}
clState->buffer2 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf2size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer2) {
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer2), decrease TC or increase LG", status);
return NULL;
}
clState->buffer3 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf3size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer3) {
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer3), decrease TC or increase LG", status);
return NULL;
}
}
else if (algorithm->type == ALGO_LYRA2REV2) {
// need additionnal buffers // need additionnal buffers
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer1) { if (status != CL_SUCCESS && !clState->buffer1) {

12
sph/gost.c

@ -667,8 +667,8 @@ const unsigned char C[12][64] = {
void AddModulo512(const void *a,const void *b,void *c) void AddModulo512(const void *a,const void *b,void *c)
{ {
const unsigned char *A=a, *B=b; const unsigned char *A = (unsigned char*)a, *B = (unsigned char*) b;
unsigned char *C=c; unsigned char *C = (unsigned char*) c;
int t = 0; int t = 0;
#ifdef FULL_UNROLL #ifdef FULL_UNROLL
#define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF; #define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF;
@ -751,8 +751,8 @@ void AddModulo512(const void *a,const void *b,void *c)
void AddXor512(const void *a,const void *b,void *c) void AddXor512(const void *a,const void *b,void *c)
{ {
const unsigned long long *A=a, *B=b; const unsigned long long *A = (unsigned long long *) a, *B = (unsigned long long *) b;
unsigned long long *C=c; unsigned long long *C = (unsigned long long *) c;
#ifdef FULL_UNROLL #ifdef FULL_UNROLL
C[0] = A[0] ^ B[0]; C[0] = A[0] ^ B[0];
C[1] = A[1] ^ B[1]; C[1] = A[1] ^ B[1];
@ -1051,7 +1051,7 @@ sph_gost256_init(void *cc)
void void
sph_gost256(void *cc, const void *data, size_t len) sph_gost256(void *cc, const void *data, size_t len)
{ {
hash_256(data, 8*len, cc); hash_256((const unsigned char*)data, 8 * len, (unsigned char*)cc);
} }
/* see sph_gost.h */ /* see sph_gost.h */
@ -1080,7 +1080,7 @@ sph_gost512_init(void *cc)
void void
sph_gost512(void *cc, const void *data, size_t len) sph_gost512(void *cc, const void *data, size_t len)
{ {
hash_512(data, 8*len, cc); hash_512((const unsigned char*)data, 8 * len, (unsigned char*)cc);
} }
/* see sph_gost.h */ /* see sph_gost.h */

12
winbuild/dist/include/config.h vendored

@ -9,6 +9,7 @@
#define CURL_HAS_KEEPALIVE 1 #define CURL_HAS_KEEPALIVE 1
#define HAVE_CURSES 1 #define HAVE_CURSES 1
#define HAVE_ADL 1 #define HAVE_ADL 1
#define HAVE_NVML 1
#define STDC_HEADERS 1 #define STDC_HEADERS 1
#define EXECV_2ND_ARG_TYPE char* const* #define EXECV_2ND_ARG_TYPE char* const*
@ -58,6 +59,9 @@
#ifdef HAVE_LIBCURL #ifdef HAVE_LIBCURL
#define CURL_STATICLIB 1 #define CURL_STATICLIB 1
#pragma comment(lib, "ws2_32.lib")
#pragma comment(lib, "zlib.lib")
#pragma comment(lib, "openssl.lib")
#pragma comment(lib, "libcurl_a.lib") #pragma comment(lib, "libcurl_a.lib")
#endif #endif
@ -67,16 +71,16 @@
#endif #endif
#define VERSION "v5.2.0" #define VERSION "5.3.9"
#define PACKAGE_NAME "sgminer" #define PACKAGE_NAME "sgminer"
#define PACKAGE_TARNAME "sgminer" #define PACKAGE_TARNAME "sgminer"
#define PACKAGE_VERSION "5.2.0" #define PACKAGE_VERSION "5.3.9"
#define PACKAGE_STRING "sgminer 5.2.0" #define PACKAGE_STRING "sgminer 5.3.9"
#define PACKAGE "sgminer" #define PACKAGE "sgminer"
#define SGMINER_PREFIX "" #define SGMINER_PREFIX ""
#include "gitversion.h" //#include "gitversion.h"
#include "winbuild.h" #include "winbuild.h"
#endif #endif

72
winbuild/sgminer.vcxproj

@ -263,51 +263,60 @@
<ClCompile Include="..\algorithm.c" /> <ClCompile Include="..\algorithm.c" />
<ClCompile Include="..\algorithm\animecoin.c" /> <ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" /> <ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\blake256.c" />
<ClCompile Include="..\algorithm\blakecoin.c" />
<ClCompile Include="..\algorithm\credits.c" /> <ClCompile Include="..\algorithm\credits.c" />
<ClCompile Include="..\algorithm\darkcoin.c" />
<ClCompile Include="..\algorithm\decred.c" />
<ClCompile Include="..\algorithm\fresh.c" />
<ClCompile Include="..\algorithm\fuguecoin.c" />
<ClCompile Include="..\algorithm\gostcoin.c" />
<ClCompile Include="..\algorithm\groestlcoin.c" />
<ClCompile Include="..\algorithm\inkcoin.c" />
<ClCompile Include="..\algorithm\lyra2.c" /> <ClCompile Include="..\algorithm\lyra2.c" />
<ClCompile Include="..\algorithm\lyra2re.c" /> <ClCompile Include="..\algorithm\lyra2re.c" />
<ClCompile Include="..\algorithm\lyra2rev2.c" /> <ClCompile Include="..\algorithm\lyra2rev2.c" />
<ClCompile Include="..\algorithm\marucoin.c" />
<ClCompile Include="..\algorithm\maxcoin.c" />
<ClCompile Include="..\algorithm\myriadcoin-groestl.c" />
<ClCompile Include="..\algorithm\neoscrypt.c" /> <ClCompile Include="..\algorithm\neoscrypt.c" />
<ClCompile Include="..\algorithm\pascal.c" />
<ClCompile Include="..\algorithm\pluck.c" /> <ClCompile Include="..\algorithm\pluck.c" />
<ClCompile Include="..\algorithm\quarkcoin.c" />
<ClCompile Include="..\algorithm\qubitcoin.c" />
<ClCompile Include="..\algorithm\scrypt.c" />
<ClCompile Include="..\algorithm\sia.c" />
<ClCompile Include="..\algorithm\sibcoin.c" />
<ClCompile Include="..\algorithm\sifcoin.c" />
<ClCompile Include="..\algorithm\sponge.c" /> <ClCompile Include="..\algorithm\sponge.c" />
<ClCompile Include="..\algorithm\talkcoin.c" /> <ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\twecoin.c" />
<ClCompile Include="..\algorithm\whirlcoin.c" />
<ClCompile Include="..\algorithm\whirlpoolx.c" /> <ClCompile Include="..\algorithm\whirlpoolx.c" />
<ClCompile Include="..\algorithm\x14.c" /> <ClCompile Include="..\algorithm\x14.c" />
<ClCompile Include="..\algorithm\fresh.c" />
<ClCompile Include="..\algorithm\whirlcoin.c" />
<ClCompile Include="..\api.c" /> <ClCompile Include="..\api.c" />
<ClCompile Include="..\ccan\opt\helpers.c" /> <ClCompile Include="..\ccan\opt\helpers.c" />
<ClCompile Include="..\ccan\opt\opt.c" /> <ClCompile Include="..\ccan\opt\opt.c" />
<ClCompile Include="..\ccan\opt\parse.c" /> <ClCompile Include="..\ccan\opt\parse.c" />
<ClCompile Include="..\ccan\opt\usage.c" /> <ClCompile Include="..\ccan\opt\usage.c" />
<ClCompile Include="..\algorithm\darkcoin.c" />
<ClCompile Include="..\config_parser.c" /> <ClCompile Include="..\config_parser.c" />
<ClCompile Include="..\driver-opencl.c" /> <ClCompile Include="..\driver-opencl.c" />
<ClCompile Include="..\events.c" /> <ClCompile Include="..\events.c" />
<ClCompile Include="..\findnonce.c" /> <ClCompile Include="..\findnonce.c" />
<ClCompile Include="..\algorithm\fuguecoin.c" />
<ClCompile Include="..\algorithm\groestlcoin.c" />
<ClCompile Include="..\hexdump.c" /> <ClCompile Include="..\hexdump.c" />
<ClCompile Include="..\algorithm\inkcoin.c" />
<ClCompile Include="..\logging.c" /> <ClCompile Include="..\logging.c" />
<ClCompile Include="..\algorithm\marucoin.c" />
<ClCompile Include="..\algorithm\maxcoin.c" />
<ClCompile Include="..\algorithm\myriadcoin-groestl.c" />
<ClCompile Include="..\ocl.c" /> <ClCompile Include="..\ocl.c" />
<ClCompile Include="..\ocl\binary_kernel.c" /> <ClCompile Include="..\ocl\binary_kernel.c" />
<ClCompile Include="..\ocl\build_kernel.c" /> <ClCompile Include="..\ocl\build_kernel.c" />
<ClCompile Include="..\pool.c" /> <ClCompile Include="..\pool.c" />
<ClCompile Include="..\algorithm\quarkcoin.c" />
<ClCompile Include="..\algorithm\qubitcoin.c" />
<ClCompile Include="..\algorithm\scrypt.c" />
<ClCompile Include="..\sgminer.c" /> <ClCompile Include="..\sgminer.c" />
<ClCompile Include="..\algorithm\sifcoin.c" />
<ClCompile Include="..\sph\aes_helper.c" /> <ClCompile Include="..\sph\aes_helper.c" />
<ClCompile Include="..\sph\blake.c" /> <ClCompile Include="..\sph\blake.c" />
<ClCompile Include="..\sph\bmw.c" /> <ClCompile Include="..\sph\bmw.c" />
<ClCompile Include="..\sph\cubehash.c" /> <ClCompile Include="..\sph\cubehash.c" />
<ClCompile Include="..\sph\echo.c" /> <ClCompile Include="..\sph\echo.c" />
<ClCompile Include="..\sph\fugue.c" /> <ClCompile Include="..\sph\fugue.c" />
<ClCompile Include="..\sph\gost.c" />
<ClCompile Include="..\sph\groestl.c" /> <ClCompile Include="..\sph\groestl.c" />
<ClCompile Include="..\sph\hamsi.c" /> <ClCompile Include="..\sph\hamsi.c" />
<ClCompile Include="..\sph\hamsi_helper.c" /> <ClCompile Include="..\sph\hamsi_helper.c" />
@ -321,7 +330,6 @@
<ClCompile Include="..\sph\shavite.c" /> <ClCompile Include="..\sph\shavite.c" />
<ClCompile Include="..\sph\simd.c" /> <ClCompile Include="..\sph\simd.c" />
<ClCompile Include="..\sph\skein.c" /> <ClCompile Include="..\sph\skein.c" />
<ClCompile Include="..\algorithm\twecoin.c" />
<ClCompile Include="..\sph\whirlpool.c" /> <ClCompile Include="..\sph\whirlpool.c" />
<ClCompile Include="..\util.c" /> <ClCompile Include="..\util.c" />
</ItemGroup> </ItemGroup>
@ -330,52 +338,61 @@
<ClInclude Include="..\algorithm.h" /> <ClInclude Include="..\algorithm.h" />
<ClInclude Include="..\algorithm\animecoin.h" /> <ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" /> <ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\blake256.h" />
<ClInclude Include="..\algorithm\blakecoin.h" />
<ClInclude Include="..\algorithm\credits.h" /> <ClInclude Include="..\algorithm\credits.h" />
<ClInclude Include="..\algorithm\darkcoin.h" />
<ClInclude Include="..\algorithm\decred.h" />
<ClInclude Include="..\algorithm\fresh.h" />
<ClInclude Include="..\algorithm\fuguecoin.h" />
<ClInclude Include="..\algorithm\gostcoin.h" />
<ClInclude Include="..\algorithm\groestlcoin.h" />
<ClInclude Include="..\algorithm\inkcoin.h" />
<ClInclude Include="..\algorithm\lyra2.h" /> <ClInclude Include="..\algorithm\lyra2.h" />
<ClInclude Include="..\algorithm\lyra2re.h" /> <ClInclude Include="..\algorithm\lyra2re.h" />
<ClInclude Include="..\algorithm\lyra2rev2.h" /> <ClInclude Include="..\algorithm\lyra2rev2.h" />
<ClInclude Include="..\algorithm\marucoin.h" />
<ClInclude Include="..\algorithm\maxcoin.h" />
<ClInclude Include="..\algorithm\myriadcoin-groestl.h" />
<ClInclude Include="..\algorithm\neoscrypt.h" /> <ClInclude Include="..\algorithm\neoscrypt.h" />
<ClInclude Include="..\algorithm\pascal.h" />
<ClInclude Include="..\algorithm\pluck.h" /> <ClInclude Include="..\algorithm\pluck.h" />
<ClInclude Include="..\algorithm\quarkcoin.h" />
<ClInclude Include="..\algorithm\qubitcoin.h" />
<ClInclude Include="..\algorithm\scrypt.h" />
<ClInclude Include="..\algorithm\sia.h" />
<ClInclude Include="..\algorithm\sibcoin.h" />
<ClInclude Include="..\algorithm\sifcoin.h" />
<ClInclude Include="..\algorithm\sponge.h" /> <ClInclude Include="..\algorithm\sponge.h" />
<ClInclude Include="..\algorithm\sysendian.h" /> <ClInclude Include="..\algorithm\sysendian.h" />
<ClInclude Include="..\algorithm\talkcoin.h" /> <ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\twecoin.h" />
<ClInclude Include="..\algorithm\whirlcoin.h" />
<ClInclude Include="..\algorithm\whirlpoolx.h" /> <ClInclude Include="..\algorithm\whirlpoolx.h" />
<ClInclude Include="..\algorithm\x14.h" /> <ClInclude Include="..\algorithm\x14.h" />
<ClInclude Include="..\algorithm\fresh.h" />
<ClInclude Include="..\algorithm\whirlcoin.h" />
<ClInclude Include="..\api.h" /> <ClInclude Include="..\api.h" />
<ClInclude Include="..\arg-nonnull.h" /> <ClInclude Include="..\arg-nonnull.h" />
<ClInclude Include="..\bench_block.h" /> <ClInclude Include="..\bench_block.h" />
<ClInclude Include="..\c++defs.h" /> <ClInclude Include="..\c++defs.h" />
<ClInclude Include="..\compat.h" /> <ClInclude Include="..\compat.h" />
<ClInclude Include="..\algorithm\darkcoin.h" />
<ClInclude Include="..\config_parser.h" /> <ClInclude Include="..\config_parser.h" />
<ClInclude Include="..\driver-opencl.h" /> <ClInclude Include="..\driver-opencl.h" />
<ClInclude Include="..\elist.h" /> <ClInclude Include="..\elist.h" />
<ClInclude Include="..\events.h" /> <ClInclude Include="..\events.h" />
<ClInclude Include="..\findnonce.h" /> <ClInclude Include="..\findnonce.h" />
<ClInclude Include="..\algorithm\fuguecoin.h" />
<ClInclude Include="..\algorithm\groestlcoin.h" />
<ClInclude Include="..\algorithm\inkcoin.h" />
<ClInclude Include="..\logging.h" /> <ClInclude Include="..\logging.h" />
<ClInclude Include="..\algorithm\marucoin.h" />
<ClInclude Include="..\algorithm\maxcoin.h" />
<ClInclude Include="..\miner.h" /> <ClInclude Include="..\miner.h" />
<ClInclude Include="..\algorithm\myriadcoin-groestl.h" />
<ClInclude Include="..\ocl.h" /> <ClInclude Include="..\ocl.h" />
<ClInclude Include="..\ocl\binary_kernel.h" /> <ClInclude Include="..\ocl\binary_kernel.h" />
<ClInclude Include="..\ocl\build_kernel.h" /> <ClInclude Include="..\ocl\build_kernel.h" />
<ClInclude Include="..\pool.h" /> <ClInclude Include="..\pool.h" />
<ClInclude Include="..\algorithm\quarkcoin.h" />
<ClInclude Include="..\algorithm\qubitcoin.h" />
<ClInclude Include="..\algorithm\scrypt.h" />
<ClInclude Include="..\algorithm\sifcoin.h" />
<ClInclude Include="..\sph\sha256_Y.h" /> <ClInclude Include="..\sph\sha256_Y.h" />
<ClInclude Include="..\sph\sph_blake.h" /> <ClInclude Include="..\sph\sph_blake.h" />
<ClInclude Include="..\sph\sph_bmw.h" /> <ClInclude Include="..\sph\sph_bmw.h" />
<ClInclude Include="..\sph\sph_cubehash.h" /> <ClInclude Include="..\sph\sph_cubehash.h" />
<ClInclude Include="..\sph\sph_echo.h" /> <ClInclude Include="..\sph\sph_echo.h" />
<ClInclude Include="..\sph\sph_fugue.h" /> <ClInclude Include="..\sph\sph_fugue.h" />
<ClInclude Include="..\sph\sph_gost.h" />
<ClInclude Include="..\sph\sph_groestl.h" /> <ClInclude Include="..\sph\sph_groestl.h" />
<ClInclude Include="..\sph\sph_hamsi.h" /> <ClInclude Include="..\sph\sph_hamsi.h" />
<ClInclude Include="..\sph\sph_jh.h" /> <ClInclude Include="..\sph\sph_jh.h" />
@ -388,7 +405,6 @@
<ClInclude Include="..\sph\sph_simd.h" /> <ClInclude Include="..\sph\sph_simd.h" />
<ClInclude Include="..\sph\sph_skein.h" /> <ClInclude Include="..\sph\sph_skein.h" />
<ClInclude Include="..\sph\sph_types.h" /> <ClInclude Include="..\sph\sph_types.h" />
<ClInclude Include="..\algorithm\twecoin.h" />
<ClInclude Include="..\sph\sph_whirlpool.h" /> <ClInclude Include="..\sph\sph_whirlpool.h" />
<ClInclude Include="..\uthash.h" /> <ClInclude Include="..\uthash.h" />
<ClInclude Include="..\util.h" /> <ClInclude Include="..\util.h" />

12
winbuild/sgminer.vcxproj.filters

@ -86,6 +86,9 @@
<ClCompile Include="..\sph\fugue.c"> <ClCompile Include="..\sph\fugue.c">
<Filter>Source Files\sph</Filter> <Filter>Source Files\sph</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\sph\gost.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
<ClCompile Include="..\sph\groestl.c"> <ClCompile Include="..\sph\groestl.c">
<Filter>Source Files\sph</Filter> <Filter>Source Files\sph</Filter>
</ClCompile> </ClCompile>
@ -137,6 +140,9 @@
<ClCompile Include="..\algorithm\fuguecoin.c"> <ClCompile Include="..\algorithm\fuguecoin.c">
<Filter>Source Files\algorithm</Filter> <Filter>Source Files\algorithm</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\algorithm\gostcoin.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\groestlcoin.c"> <ClCompile Include="..\algorithm\groestlcoin.c">
<Filter>Source Files\algorithm</Filter> <Filter>Source Files\algorithm</Filter>
</ClCompile> </ClCompile>
@ -310,6 +316,9 @@
<ClInclude Include="..\sph\sph_hamsi.h"> <ClInclude Include="..\sph\sph_hamsi.h">
<Filter>Header Files\sph</Filter> <Filter>Header Files\sph</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\sph\sph_gost.h">
<Filter>Header Files\sph</Filter>
</ClInclude>
<ClInclude Include="..\sph\sph_groestl.h"> <ClInclude Include="..\sph\sph_groestl.h">
<Filter>Header Files\sph</Filter> <Filter>Header Files\sph</Filter>
</ClInclude> </ClInclude>
@ -343,6 +352,9 @@
<ClInclude Include="..\algorithm\fuguecoin.h"> <ClInclude Include="..\algorithm\fuguecoin.h">
<Filter>Header Files\algorithm</Filter> <Filter>Header Files\algorithm</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\algorithm\gostcoin.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\groestlcoin.h"> <ClInclude Include="..\algorithm\groestlcoin.h">
<Filter>Header Files\algorithm</Filter> <Filter>Header Files\algorithm</Filter>
</ClInclude> </ClInclude>

Loading…
Cancel
Save