diff --git a/Makefile.am b/Makefile.am index 7047bf10..0cda408e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -73,7 +73,6 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h -sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h sgminer_SOURCES += algorithm/credits.c algorithm/credits.h sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h diff --git a/algorithm.c b/algorithm.c index 6acab924..07c7753c 100644 --- a/algorithm.c +++ b/algorithm.c @@ -33,9 +33,9 @@ #include "algorithm/neoscrypt.h" #include "algorithm/whirlpoolx.h" #include "algorithm/lyra2re.h" -#include "algorithm/lyra2re_old.h" +#include "algorithm/lyra2rev2.h" #include "algorithm/pluck.h" -#include "algorithm/yescrypt.h" +//#include "algorithm/yescrypt.h" #include "algorithm/credits.h" #include "compat.h" @@ -43,6 +43,7 @@ #include #include +bool opt_lyra; const char *algorithm_type_str[] = { "Unknown", "Credits", @@ -62,7 +63,7 @@ const char *algorithm_type_str[] = { "Neoscrypt", "WhirlpoolX", "Lyra2RE", - "Lyra2REv2" + "Lyra2REV2" "Pluck" "Yescrypt", "Yescrypt-multi" @@ -216,6 +217,7 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_ return status; } +#if 0 static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; @@ -309,6 +311,7 @@ static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __ return status; } +#endif static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { @@ -764,40 +767,43 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { - uint64_t midblock[8], key[8] = { 0 }, tmp[8] = { 0 }; + cl_kernel *kernel = &clState->kernel; + unsigned int num = 0; cl_ulong le_target; cl_int status; le_target = *(cl_ulong *)(blk->work->device_target + 24); flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); - memcpy(midblock, clState->cldata, 64); - - // midblock = n, key = h - for (int i = 0; i < 10; ++i) { - tmp[0] = WHIRLPOOL_ROUND_CONSTANTS[i]; - whirlpool_round(key, tmp); - tmp[0] = 0; - whirlpool_round(midblock, tmp); - - for (int x = 0; x < 8; ++x) { - midblock[x] ^= key[x]; - } - } - - for (int i = 0; i < 8; ++i) { - midblock[i] ^= ((uint64_t *)(clState->cldata))[i]; - } - - status = clSetKernelArg(clState->kernel, 0, sizeof(cl_ulong8), (cl_ulong8 *)&midblock); - status |= clSetKernelArg(clState->kernel, 1, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 8)); - status |= clSetKernelArg(clState->kernel, 2, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 9)); - status |= clSetKernelArg(clState->kernel, 3, sizeof(cl_mem), (void *)&clState->outputBuffer); - status |= clSetKernelArg(clState->kernel, 4, sizeof(cl_ulong), (void *)&le_target); + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); return status; } +typedef struct _algorithm_settings_t { + const char *name; /* Human-readable identifier */ + algorithm_type_t type; //common algorithm type + const char *kernelfile; /* alternate kernel file */ + double diff_multiplier1; + double diff_multiplier2; + double share_diff_multiplier; + uint32_t xintensity_shift; + uint32_t intensity_shift; + uint32_t found_idx; + unsigned long long diff_numerator; + uint32_t diff1targ; + size_t n_extra_kernels; + long rw_buffer_size; + cl_command_queue_properties cq_properties; + void (*regenhash)(struct work *); + cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); + void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); + void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); +} algorithm_settings_t; + static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; @@ -842,7 +848,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct return status; } -static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; unsigned int num; @@ -945,6 +951,7 @@ static algorithm_settings_t algos[] = { +#if 0 #define A_YESCRYPT(a) \ { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options} A_YESCRYPT("yescrypt"), @@ -955,6 +962,7 @@ static algorithm_settings_t algos[] = { A_YESCRYPT_MULTI("yescrypt-multi"), #undef A_YESCRYPT_MULTI +#endif // kernels starting from this will have difficulty calculated by using quarkcoin algorithm #define A_QUARK(a, b) \ @@ -992,10 +1000,8 @@ static algorithm_settings_t algos[] = { { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL }, - { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, - - { "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options }, - + { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL }, + { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options }, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b, c) \ @@ -1006,7 +1012,7 @@ static algorithm_settings_t algos[] = { #undef A_FUGUE { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL }, - { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, + { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL }, // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL } @@ -1079,7 +1085,10 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS("nist5", "talkcoin-mod"); ALGO_ALIAS("keccak", "maxcoin"); ALGO_ALIAS("whirlpool", "whirlcoin"); + ALGO_ALIAS("Lyra2RE", "lyra2re"); ALGO_ALIAS("lyra2", "lyra2re"); + ALGO_ALIAS("Lyra2REv2", "lyra2rev2"); + ALGO_ALIAS("lyra2rev2", "lyra2rev2"); ALGO_ALIAS("lyra2v2", "lyra2rev2"); #undef ALGO_ALIAS @@ -1107,6 +1116,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias) if ((old_nfactor > 0) && (old_nfactor != nfactor)) nfactor = old_nfactor; + if (algo->type == ALGO_LYRA2RE || algo->type == ALGO_LYRA2REV2) { opt_lyra = true; } set_algorithm_nfactor(algo, nfactor); //reapply kernelfile if was set diff --git a/algorithm.h b/algorithm.h index 8b7185a4..a278934e 100644 --- a/algorithm.h +++ b/algorithm.h @@ -9,7 +9,7 @@ #include #include -#include "ocl/build_kernel.h" // For the build_kernel_data type +//#include "ocl/build_kernel.h" // For the build_kernel_data type typedef enum { ALGO_UNK, @@ -30,7 +30,7 @@ typedef enum { ALGO_NEOSCRYPT, ALGO_WHIRLPOOLX, ALGO_LYRA2RE, - ALGO_LYRA2REv2, + ALGO_LYRA2REV2, ALGO_PLUCK, ALGO_YESCRYPT, ALGO_YESCRYPT_MULTI, @@ -72,28 +72,6 @@ typedef struct _algorithm_t { void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *); } algorithm_t; -typedef struct _algorithm_settings_t -{ - const char *name; - algorithm_type_t type; - const char *kernelfile; - double diff_multiplier1; - double diff_multiplier2; - double share_diff_multiplier; - uint32_t xintensity_shift; - uint32_t intensity_shift; - uint32_t found_idx; - unsigned long long diff_numerator; - uint32_t diff1targ; - size_t n_extra_kernels; - long rw_buffer_size; - cl_command_queue_properties cq_properties; - void (*regenhash)(struct work *); - cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint); - void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *); - void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *); -} algorithm_settings_t; - /* Set default parameters based on name. */ void set_algorithm(algorithm_t* algo, const char* name); diff --git a/algorithm/lyra2.c b/algorithm/lyra2.c index 42640e76..6944b22f 100644 --- a/algorithm/lyra2.c +++ b/algorithm/lyra2.c @@ -58,19 +58,15 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //========== Initializing the Memory Matrix and pointers to it =============// //Tries to allocate enough space for the whole memory matrix - - const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; - const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; - i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); - uint64_t *wholeMatrix = malloc(i); + uint64_t *wholeMatrix = (uint64_t*)malloc(i); if (wholeMatrix == NULL) { return -1; } memset(wholeMatrix, 0, i); //Allocates pointers to each row of the matrix - uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); + uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*)); if (memMatrix == NULL) { return -1; } @@ -122,7 +118,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //======================= Initializing the Sponge State ====================// //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) - uint64_t *state = malloc(16 * sizeof (uint64_t)); + uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t)); if (state == NULL) { return -1; } @@ -134,16 +130,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * ptrWord = wholeMatrix; for (i = 0; i < nBlocksInput; i++) { absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) - ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) } //Initializes M[0] and M[1] - reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here - reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols); + reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here + reducedDuplexRow1(state, memMatrix[0], memMatrix[1]); do { //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) - reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); + reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); //updates the value of row* (deterministically picked during Setup)) @@ -176,7 +172,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * //------------------------------------------------------------------------------------------ //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] - reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); + reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); //update prev: it now points to the last row ever computed prev = row; @@ -196,7 +192,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * absorbBlock(state, memMatrix[rowa]); //Squeezes the key - squeeze(state, K, kLen); + squeeze(state, (unsigned char*)K, kLen); //==========================================================================/ //========================= Freeing the memory =============================// diff --git a/algorithm/lyra2.h b/algorithm/lyra2.h index 798e6af1..13c7dbd3 100644 --- a/algorithm/lyra2.h +++ b/algorithm/lyra2.h @@ -37,6 +37,14 @@ typedef unsigned char byte; #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes #endif +#ifndef N_COLS + #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default +#endif + +#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks +#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row + + int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); #endif /* LYRA2_H_ */ diff --git a/algorithm/lyra2re.c b/algorithm/lyra2re.c index cfc5adbb..61f2b34f 100644 --- a/algorithm/lyra2re.c +++ b/algorithm/lyra2re.c @@ -36,8 +36,6 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" -#include "sph/sph_bmw.h" -#include "sph/sph_cubehash.h" #include "lyra2.h" /* @@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) inline void lyra2rehash(void *state, const void *input) { sph_blake256_context ctx_blake; - sph_bmw256_context ctx_bmw; + sph_groestl256_context ctx_groestl; sph_keccak256_context ctx_keccak; sph_skein256_context ctx_skein; - sph_cubehash256_context ctx_cube; uint32_t hashA[8], hashB[8]; @@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input) sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - sph_cubehash256_init(&ctx_cube); - sph_cubehash256(&ctx_cube, hashB, 32); - sph_cubehash256_close(&ctx_cube, hashA); + LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); - LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4); sph_skein256_init(&ctx_skein); - sph_skein256 (&ctx_skein, hashB, 32); - sph_skein256_close(&ctx_skein, hashA); + sph_skein256 (&ctx_skein, hashA, 32); + sph_skein256_close(&ctx_skein, hashB); - sph_cubehash256_init(&ctx_cube); - sph_cubehash256(&ctx_cube, hashA, 32); - sph_cubehash256_close(&ctx_cube, hashB); - sph_bmw256_init(&ctx_bmw); - sph_bmw256 (&ctx_bmw, hashB, 32); - sph_bmw256_close(&ctx_bmw, hashA); + sph_groestl256_init(&ctx_groestl); + sph_groestl256 (&ctx_groestl, hashB, 32); + sph_groestl256_close(&ctx_groestl, hashA); memcpy(state, hashA, 32); } diff --git a/algorithm/lyra2re.h b/algorithm/lyra2re.h index 8bc52ac4..8a58e747 100644 --- a/algorithm/lyra2re.h +++ b/algorithm/lyra2re.h @@ -2,8 +2,6 @@ #define LYRA2RE_H #include "miner.h" -#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent -#define LYRA_SECBUF_SIZE (4) // (not used) extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); diff --git a/algorithm/lyra2re_old.h b/algorithm/lyra2re_old.h deleted file mode 100644 index 0788dfb3..00000000 --- a/algorithm/lyra2re_old.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef LYRA2REOLD_H -#define LYRA2REOLD_H - -#include "miner.h" - -extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, - uint32_t nonce); -extern void lyra2reold_regenhash(struct work *work); - -#endif /* LYRA2RE_H */ diff --git a/algorithm/lyra2re_old.c b/algorithm/lyra2rev2.c similarity index 77% rename from algorithm/lyra2re_old.c rename to algorithm/lyra2rev2.c index 31a0a1ab..467de6d9 100644 --- a/algorithm/lyra2re_old.c +++ b/algorithm/lyra2rev2.c @@ -36,7 +36,9 @@ #include "sph/sph_groestl.h" #include "sph/sph_skein.h" #include "sph/sph_keccak.h" -#include "lyra2.h" +#include "sph/sph_bmw.h" +#include "sph/sph_cubehash.h" +#include "lyra2v2.h" /* * Encode a length len/4 vector of (uint32_t) into a length len vector of @@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -inline void lyra2rehash_old(void *state, const void *input) +inline void lyra2rev2hash(void *state, const void *input) { sph_blake256_context ctx_blake; - sph_groestl256_context ctx_groestl; + sph_bmw256_context ctx_bmw; sph_keccak256_context ctx_keccak; sph_skein256_context ctx_skein; - + sph_cubehash256_context ctx_cube; uint32_t hashA[8], hashB[8]; sph_blake256_init(&ctx_blake); @@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input) sph_keccak256 (&ctx_keccak,hashA, 32); sph_keccak256_close(&ctx_keccak, hashB); - LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashB, 32); + sph_cubehash256_close(&ctx_cube, hashA); + + LYRA2V2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4); + + sph_skein256_init(&ctx_skein); + sph_skein256 (&ctx_skein, hashB, 32); + sph_skein256_close(&ctx_skein, hashA); - sph_skein256_init(&ctx_skein); - sph_skein256 (&ctx_skein, hashA, 32); - sph_skein256_close(&ctx_skein, hashB); + sph_cubehash256_init(&ctx_cube); + sph_cubehash256(&ctx_cube, hashA, 32); + sph_cubehash256_close(&ctx_cube, hashB); + sph_bmw256_init(&ctx_bmw); + sph_bmw256 (&ctx_bmw, hashB, 32); + sph_bmw256_close(&ctx_bmw, hashA); - sph_groestl256_init(&ctx_groestl); - sph_groestl256 (&ctx_groestl, hashB, 32); - sph_groestl256_close(&ctx_groestl, hashA); +//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]); - memcpy(state, hashA, 32); + memcpy(state, hashA, 32); } static const uint32_t diff1targ = 0x0000ffff; /* Used externally as confirmation of correct OCL code */ -int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) { uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); uint32_t data[20], ohash[8]; be32enc_vect(data, (const uint32_t *)pdata, 19); data[19] = htobe32(nonce); - lyra2rehash_old(ohash, data); + lyra2rev2hash(ohash, data); tmp_hash7 = be32toh(ohash[7]); applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", @@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t return 1; } -void lyra2reold_regenhash(struct work *work) +void lyra2rev2_regenhash(struct work *work) { uint32_t data[20]; uint32_t *nonce = (uint32_t *)(work->data + 76); @@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work) be32enc_vect(data, (const uint32_t *)work->data, 19); data[19] = htobe32(*nonce); - lyra2rehash_old(ohash, data); + lyra2rev2hash(ohash, data); } -bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, +bool scanhash_lyra2rev2(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, unsigned char *pdata, unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash, const unsigned char *ptarget, uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) @@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse *nonce = ++n; data[19] = (n); - lyra2rehash_old(ostate, data); + lyra2rev2hash(ostate, data); tmp_hash7 = (ostate[7]); applog(LOG_INFO, "data7 %08lx", diff --git a/algorithm/lyra2rev2.h b/algorithm/lyra2rev2.h new file mode 100644 index 00000000..1a31f76f --- /dev/null +++ b/algorithm/lyra2rev2.h @@ -0,0 +1,11 @@ +#ifndef LYRA2REV2_H +#define LYRA2REV2_H + +#include "miner.h" +#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent +#define LYRA_SECBUF_SIZE (4) // (not used) +extern int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void lyra2rev2_regenhash(struct work *work); + +#endif /* LYRA2REV2_H */ diff --git a/algorithm/lyra2v2.c b/algorithm/lyra2v2.c new file mode 100644 index 00000000..affea346 --- /dev/null +++ b/algorithm/lyra2v2.c @@ -0,0 +1,213 @@ +/** + * Implementation of the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include "lyra2v2.h" +#include "spongev2.h" + +/** + * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords + * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, + * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all + * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value + * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). + * + * @param K The derived key to be output by the algorithm + * @param kLen Desired key length + * @param pwd User password + * @param pwdlen Password length + * @param salt Salt + * @param saltlen Salt length + * @param timeCost Parameter to determine the processing time (T) + * @param nRows Number or rows of the memory matrix (R) + * @param nCols Number of columns of the memory matrix (C) + * + * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) + */ +int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { + + //============================= Basic variables ============================// + int64_t row = 2; //index of row to be processed + int64_t prev = 1; //index of prev (last row ever computed/modified) + int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) + int64_t tau; //Time Loop iterator + int64_t step = 1; //Visitation step (used during Setup and Wandering phases) + int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) + int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 + int64_t i; //auxiliary iteration counter + //==========================================================================/ + + //========== Initializing the Memory Matrix and pointers to it =============// + //Tries to allocate enough space for the whole memory matrix + + + const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; + const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; + + i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); + uint64_t *wholeMatrix = (uint64_t*) malloc(i); + if (wholeMatrix == NULL) { + return -1; + } + memset(wholeMatrix, 0, i); + + //Allocates pointers to each row of the matrix + uint64_t **memMatrix = (uint64_t**) malloc(nRows * sizeof(uint64_t*)); + if (memMatrix == NULL) { + return -1; + } + //Places the pointers in the correct positions + uint64_t *ptrWord = wholeMatrix; + for (i = 0; i < nRows; i++) { + memMatrix[i] = ptrWord; + ptrWord += ROW_LEN_INT64; + } + //==========================================================================/ + + //============= Getting the password + salt + basil padded with 10*1 ===============// + //OBS.:The memory matrix will temporarily hold the password: not for saving memory, + //but this ensures that the password copied locally will be overwritten as soon as possible + + //First, we clean enough blocks for the password, salt, basil and padding + uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; + byte *ptrByte = (byte*) wholeMatrix; + memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); + + //Prepends the password + memcpy(ptrByte, pwd, pwdlen); + ptrByte += pwdlen; + + //Concatenates the salt + memcpy(ptrByte, salt, saltlen); + ptrByte += saltlen; + + //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface + memcpy(ptrByte, &kLen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &saltlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &timeCost, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nRows, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nCols, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + + //Now comes the padding + *ptrByte = 0x80; //first byte of padding: right after the password + ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix + ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block + *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block + //==========================================================================/ + + //======================= Initializing the Sponge State ====================// + //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) + uint64_t *state = (uint64_t*) malloc(16 * sizeof(uint64_t)); + if (state == NULL) { + return -1; + } + initStatev2(state); + //==========================================================================/ + + //================================ Setup Phase =============================// + //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits + ptrWord = wholeMatrix; + for (i = 0; i < nBlocksInput; i++) { + absorbBlockBlake2Safev2(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil) + } + + //Initializes M[0] and M[1] + reducedSqueezeRow0v2(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here + reducedDuplexRow1v2(state, memMatrix[0], memMatrix[1], nCols); + + do { + //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) + reducedDuplexRowSetupv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); + + + //updates the value of row* (deterministically picked during Setup)) + rowa = (rowa + step) & (window - 1); + //update prev: it now points to the last row ever computed + prev = row; + //updates row: goes to the next row to be computed + row++; + + //Checks if all rows in the window where visited. + if (rowa == 0) { + step = window + gap; //changes the step: approximately doubles its value + window *= 2; //doubles the size of the re-visitation window + gap = -gap; //inverts the modifier to the step + } + + } while (row < nRows); + //==========================================================================/ + + //============================ Wandering Phase =============================// + row = 0; //Resets the visitation to the first row of the memory matrix + for (tau = 1; tau <= timeCost; tau++) { + //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 + step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; + do { + //Selects a pseudorandom index row* + //------------------------------------------------------------------------------------------ + //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] + reducedDuplexRowv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols); + + //update prev: it now points to the last row ever computed + prev = row; + + //updates row: goes to the next row to be computed + //------------------------------------------------------------------------------------------ + //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + } while (row != 0); + } + //==========================================================================/ + + //============================ Wrap-up Phase ===============================// + //Absorbs the last block of the memory matrix + absorbBlockv2(state, memMatrix[rowa]); + + //Squeezes the key + squeezev2(state, (unsigned char*)K, kLen); + //==========================================================================/ + + //========================= Freeing the memory =============================// + free(memMatrix); + free(wholeMatrix); + + //Wiping out the sponge's internal state before freeing it + memset(state, 0, 16 * sizeof (uint64_t)); + free(state); + //==========================================================================/ + + return 0; +} diff --git a/algorithm/lyra2v2.h b/algorithm/lyra2v2.h new file mode 100644 index 00000000..24b2b95e --- /dev/null +++ b/algorithm/lyra2v2.h @@ -0,0 +1,42 @@ +/** + * Header file for the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LYRA2V2_H_ +#define LYRA2V2_H_ + +#include + +typedef unsigned char byte; + +//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED) +#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t) +#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes + + +#ifdef BLOCK_LEN_BITS + #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes +#else //default block lenght: 768 bits + #define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes +#endif + +int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); + +#endif /* LYRA2_H_ */ diff --git a/algorithm/sponge.c b/algorithm/sponge.c index c788952a..e717a508 100644 --- a/algorithm/sponge.c +++ b/algorithm/sponge.c @@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { * @param state The current state of the sponge * @param rowOut Row to receive the data squeezed */ -void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) { - uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] +void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) { + uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] int i; //M[row][C-1-col] = H.reduced_squeeze() - for (i = 0; i < nCols; i++) { + for (i = 0; i < N_COLS; i++) { ptrWord[0] = state[0]; ptrWord[1] = state[1]; ptrWord[2] = state[2]; @@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) { * @param rowIn Row to feed the sponge * @param rowOut Row to receive the sponge's output */ -void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) { +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev - uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row int i; - for (i = 0; i < nCols; i++) { + for (i = 0; i < N_COLS; i++) { //Absorbing "M[prev][col]" state[0] ^= (ptrWordIn[0]); @@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint6 * @param rowOut Row receiving the output * */ -void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* - uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row int i; - for (i = 0; i < nCols; i++) { + for (i = 0; i < N_COLS; i++) { //Absorbing "M[prev] [+] M[row*]" state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); @@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, * @param rowOut Row receiving the output * */ -void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row int i; - for (i = 0; i < nCols; i++) { + for (i = 0; i < N_COLS; i++) { //Absorbing "M[prev] [+] M[row*]" state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); diff --git a/algorithm/sponge.h b/algorithm/sponge.h index 19822979..3fcff0d7 100644 --- a/algorithm/sponge.h +++ b/algorithm/sponge.h @@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]); //---- Squeezes void squeeze(uint64_t *state, unsigned char *out, unsigned int len); -void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols); +void reducedSqueezeRow0(uint64_t* state, uint64_t* row); //---- Absorbs void absorbBlock(uint64_t *state, const uint64_t *in); void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in); //---- Duplexes -void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols); -void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); -void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); //---- Misc void printArray(unsigned char *array, unsigned int size, char *name); diff --git a/algorithm/spongev2.c b/algorithm/spongev2.c new file mode 100644 index 00000000..6a80055b --- /dev/null +++ b/algorithm/spongev2.c @@ -0,0 +1,745 @@ +/** + * A simple implementation of Blake2b's internal permutation + * in the form of a sponge. + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include "spongev2.h" +#include "lyra2v2.h" + + + +/** + * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder + * receive Blake2b's IV as per Blake2b's specification. Note: Even though sponges + * typically have their internal state initialized with zeros, Blake2b's G function + * has a fixed point: if the internal state and message are both filled with zeros. the + * resulting permutation will always be a block filled with zeros; this happens because + * Blake2b does not use the constants originally employed in Blake2 inside its G function, + * relying on the IV for avoiding possible fixed points. + * + * @param state The 1024-bit array to be initialized + */ +inline void initStatev2(uint64_t state[/*16*/]) { + //First 512 bis are zeros + memset(state, 0, 64); + //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV + state[8] = blake2b_IV[0]; + state[9] = blake2b_IV[1]; + state[10] = blake2b_IV[2]; + state[11] = blake2b_IV[3]; + state[12] = blake2b_IV[4]; + state[13] = blake2b_IV[5]; + state[14] = blake2b_IV[6]; + state[15] = blake2b_IV[7]; +} + +/** + * Execute Blake2b's G function, with all 12 rounds. + * + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +inline static void blake2bLyra(uint64_t *v) { + ROUND_LYRA(0); + ROUND_LYRA(1); + ROUND_LYRA(2); + ROUND_LYRA(3); + ROUND_LYRA(4); + ROUND_LYRA(5); + ROUND_LYRA(6); + ROUND_LYRA(7); + ROUND_LYRA(8); + ROUND_LYRA(9); + ROUND_LYRA(10); + ROUND_LYRA(11); +} + +/** + * Executes a reduced version of Blake2b's G function with only one round + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +inline static void reducedBlake2bLyra(uint64_t *v) { + ROUND_LYRA(0); +} + +/** + * Performs a squeeze operation, using Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param out Array that will receive the data squeezed + * @param len The number of bytes to be squeezed into the "out" array + */ +inline void squeezev2(uint64_t *state, byte *out, unsigned int len) { + int fullBlocks = len / BLOCK_LEN_BYTES; + byte *ptr = out; + int i; + //Squeezes full blocks + for (i = 0; i < fullBlocks; i++) { + memcpy(ptr, state, BLOCK_LEN_BYTES); + blake2bLyra(state); + ptr += BLOCK_LEN_BYTES; + } + + //Squeezes remaining bytes + memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words + * of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_INT64 words) + */ +inline void absorbBlockv2(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + state[8] ^= in[8]; + state[9] ^= in[9]; + state[10] ^= in[10]; + state[11] ^= in[11]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 + * words of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) + */ +inline void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state + + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + + + //Applies the transformation f to the sponge's state + blake2bLyra(state); + +} + +/** + * Performs a reduced squeeze operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowOut Row to receive the data squeezed + */ +inline void reducedSqueezeRow0v2(uint64_t* state, uint64_t* rowOut, uint64_t nCols) { + uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] + int i; + //M[row][C-1-col] = H.reduced_squeeze() + for (i = 0; i < nCols; i++) { + ptrWord[0] = state[0]; + ptrWord[1] = state[1]; + ptrWord[2] = state[2]; + ptrWord[3] = state[3]; + ptrWord[4] = state[4]; + ptrWord[5] = state[5]; + ptrWord[6] = state[6]; + ptrWord[7] = state[7]; + ptrWord[8] = state[8]; + ptrWord[9] = state[9]; + ptrWord[10] = state[10]; + ptrWord[11] = state[11]; + + //Goes to next block (column) that will receive the squeezed data + ptrWord -= BLOCK_LEN_INT64; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + } +} + +/** + * Performs a reduced duplex operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowIn Row to feed the sponge + * @param rowOut Row to receive the sponge's output + */ +inline void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < nCols; i++) { + + //Absorbing "M[prev][col]" + state[0] ^= (ptrWordIn[0]); + state[1] ^= (ptrWordIn[1]); + state[2] ^= (ptrWordIn[2]); + state[3] ^= (ptrWordIn[3]); + state[4] ^= (ptrWordIn[4]); + state[5] ^= (ptrWordIn[5]); + state[6] ^= (ptrWordIn[6]); + state[7] ^= (ptrWordIn[7]); + state[8] ^= (ptrWordIn[8]); + state[9] ^= (ptrWordIn[9]); + state[10] ^= (ptrWordIn[10]); + state[11] ^= (ptrWordIn[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][C-1-col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + + //Input: next column (i.e., next block in sequence) + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left and N_COLS is a system parameter. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +inline void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < nCols; i++) { + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Inputs: next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][col] = M[rowOut][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +inline void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + + for (i = 0; i < nCols; i++) { + + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = rand + ptrWordOut[0] = state[0]; + ptrWordOut[1] = state[1]; + ptrWordOut[2] = state[2]; + ptrWordOut[3] = state[3]; + ptrWordOut[4] = state[4]; + ptrWordOut[5] = state[5]; + ptrWordOut[6] = state[6]; + ptrWordOut[7] = state[7]; + ptrWordOut[8] = state[8]; + ptrWordOut[9] = state[9]; + ptrWordOut[10] = state[10]; + ptrWordOut[11] = state[11]; + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; + int i; + + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } + + ptrWordOut = rowOut + BLOCK_LEN_INT64; + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand" + * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)", + * where rotW is a 64-bit rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} +*/ + +/** + Prints an array of unsigned chars + */ +void printArrayv2(unsigned char *array, unsigned int size, char *name) { + int i; + printf("%s: ", name); + for (i = 0; i < size; i++) { + printf("%2x|", array[i]); + } + printf("\n"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/algorithm/spongev2.h b/algorithm/spongev2.h new file mode 100644 index 00000000..ed42356d --- /dev/null +++ b/algorithm/spongev2.h @@ -0,0 +1,108 @@ +/** + * Header file for Blake2b's internal permutation in the form of a sponge. + * This code is based on the original Blake2b's implementation provided by + * Samuel Neves (https://blake2.net/) + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPONGE_H_ +#define SPONGE_H_ + +#include + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + + +/*Blake2b IV Array*/ +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/*Blake2b's rotation*/ +static inline uint64_t rotr64( const uint64_t w, const unsigned c ){ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/*Blake2b's G function*/ +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + + +/*One Round of the Blake2b's compression function*/ +#define ROUND_LYRA(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); + + +//---- Housekeeping +extern void initStatev2(uint64_t state[/*16*/]); + +//---- Squeezes +extern void squeezev2(uint64_t *state, unsigned char *out, unsigned int len); +extern void reducedSqueezeRow0v2(uint64_t* state, uint64_t* row, uint64_t nCols); + +//---- Absorbs +extern void absorbBlockv2(uint64_t *state, const uint64_t *in); +extern void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in); + +//---- Duplexes +extern void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols); +extern void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); +extern void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols); + +//---- Misc +void printArrayv2(unsigned char *array, unsigned int size, char *name); + +//////////////////////////////////////////////////////////////////////////////////////////////// + + +////TESTS//// +//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2); +//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +///////////// + + +#endif /* SPONGE_H_ */ diff --git a/algorithm/whirlpoolx.c b/algorithm/whirlpoolx.c index c419057a..b93a7432 100644 --- a/algorithm/whirlpoolx.c +++ b/algorithm/whirlpoolx.c @@ -34,7 +34,7 @@ #include #include -#include "whirlpoolx.h" +#include "sph/sph_whirlpool.h" /* * Encode a length len/4 vector of (uint32_t) into a length len vector of @@ -50,124 +50,16 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) } -void whirlpool_compress(uint8_t state[64], const uint8_t block[64]) -{ - const int NUM_ROUNDS = 10; - uint64_t tempState[8]; - uint64_t tempBlock[8]; - int i; - - // Initialization - for (i = 0; i < 8; i++) { - tempState[i] = - (uint64_t)state[i << 3] - | (uint64_t)state[(i << 3) + 1] << 8 - | (uint64_t)state[(i << 3) + 2] << 16 - | (uint64_t)state[(i << 3) + 3] << 24 - | (uint64_t)state[(i << 3) + 4] << 32 - | (uint64_t)state[(i << 3) + 5] << 40 - | (uint64_t)state[(i << 3) + 6] << 48 - | (uint64_t)state[(i << 3) + 7] << 56; - tempBlock[i] = ( - (uint64_t)block[i << 3] - | (uint64_t)block[(i << 3) + 1] << 8 - | (uint64_t)block[(i << 3) + 2] << 16 - | (uint64_t)block[(i << 3) + 3] << 24 - | (uint64_t)block[(i << 3) + 4] << 32 - | (uint64_t)block[(i << 3) + 5] << 40 - | (uint64_t)block[(i << 3) + 6] << 48 - | (uint64_t)block[(i << 3) + 7] << 56) ^ tempState[i]; - } - - // Hashing rounds - uint64_t rcon[8]; - memset(rcon + 1, 0, sizeof(rcon[0]) * 7); - for (i = 0; i < NUM_ROUNDS; i++) { - rcon[0] = WHIRLPOOL_ROUND_CONSTANTS[i]; - whirlpool_round(tempState, rcon); - whirlpool_round(tempBlock, tempState); - } - - // Final combining - for (i = 0; i < 64; i++) - state[i] ^= block[i] ^ (uint8_t)(tempBlock[i >> 3] >> ((i & 7) << 3)); -} - - - - - -void whirlpool_round(uint64_t block[8], const uint64_t key[8]) { - uint64_t a = block[0]; - uint64_t b = block[1]; - uint64_t c = block[2]; - uint64_t d = block[3]; - uint64_t e = block[4]; - uint64_t f = block[5]; - uint64_t g = block[6]; - uint64_t h = block[7]; - - uint64_t r; - #define DOROW(i, s, t, u, v, w, x, y, z) \ - r = MAGIC_TABLE[(uint8_t)s]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(t >> 8)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(u >> 16)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(v >> 24)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(w >> 32)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(x >> 40)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(y >> 48)]; r = (r << 56) | (r >> 8); \ - r ^= MAGIC_TABLE[(uint8_t)(z >> 56)]; r = (r << 56) | (r >> 8); \ - block[i] = r ^ key[i]; - - DOROW(0, a, h, g, f, e, d, c, b) - DOROW(1, b, a, h, g, f, e, d, c) - DOROW(2, c, b, a, h, g, f, e, d) - DOROW(3, d, c, b, a, h, g, f, e) - DOROW(4, e, d, c, b, a, h, g, f) - DOROW(5, f, e, d, c, b, a, h, g) - DOROW(6, g, f, e, d, c, b, a, h) - DOROW(7, h, g, f, e, d, c, b, a) -} - -void whirlpool_hash(const uint8_t *message, uint32_t len, uint8_t hash[64]) { - memset(hash, 0, 64); - - uint32_t i; - for (i = 0; len - i >= 64; i += 64) - whirlpool_compress(hash, message + i); - - uint8_t block[64]; - uint32_t rem = len - i; - memcpy(block, message + i, rem); - - block[rem] = 0x80; - rem++; - if (64 - rem >= 32) - memset(block + rem, 0, 56 - rem); - else { - memset(block + rem, 0, 64 - rem); - whirlpool_compress(hash, block); - memset(block, 0, 56); - } - - uint64_t longLen = ((uint64_t)len) << 3; - for (i = 0; i < 8; i++) - block[64 - 1 - i] = (uint8_t)(longLen >> (i * 8)); - whirlpool_compress(hash, block); -} - void whirlpoolx_hash(void *state, const void *input) { - //sph_whirlpool1_context ctx; + sph_whirlpool1_context ctx; - //sph_whirlpool1_init(&ctx); + sph_whirlpool1_init(&ctx); uint8_t digest[64]; - //sph_whirlpool(&ctx, input, 80); - //sph_whirlpool_close(&ctx, digest); - - whirlpool_hash((uint8_t *)input, 80, digest); + sph_whirlpool(&ctx, input, 80); + sph_whirlpool_close(&ctx, digest); uint8_t digest_xored[32]; diff --git a/algorithm/whirlpoolx.h b/algorithm/whirlpoolx.h index 2a7659c6..27c9ec1a 100644 --- a/algorithm/whirlpoolx.h +++ b/algorithm/whirlpoolx.h @@ -1,58 +1,10 @@ #ifndef WHIRLPOOLX_H #define WHIRLPOOLX_H -#include #include "miner.h" -// The combined effect of gamma (SubBytes) and theta (MixRows) -static uint64_t MAGIC_TABLE[256] = { - UINT64_C(0xD83078C018601818), UINT64_C(0x2646AF05238C2323), UINT64_C(0xB891F97EC63FC6C6), UINT64_C(0xFBCD6F13E887E8E8), UINT64_C(0xCB13A14C87268787), UINT64_C(0x116D62A9B8DAB8B8), UINT64_C(0x0902050801040101), UINT64_C(0x0D9E6E424F214F4F), - UINT64_C(0x9B6CEEAD36D83636), UINT64_C(0xFF510459A6A2A6A6), UINT64_C(0x0CB9BDDED26FD2D2), UINT64_C(0x0EF706FBF5F3F5F5), UINT64_C(0x96F280EF79F97979), UINT64_C(0x30DECE5F6FA16F6F), UINT64_C(0x6D3FEFFC917E9191), UINT64_C(0xF8A407AA52555252), - UINT64_C(0x47C0FD27609D6060), UINT64_C(0x35657689BCCABCBC), UINT64_C(0x372BCDAC9B569B9B), UINT64_C(0x8A018C048E028E8E), UINT64_C(0xD25B1571A3B6A3A3), UINT64_C(0x6C183C600C300C0C), UINT64_C(0x84F68AFF7BF17B7B), UINT64_C(0x806AE1B535D43535), - UINT64_C(0xF53A69E81D741D1D), UINT64_C(0xB3DD4753E0A7E0E0), UINT64_C(0x21B3ACF6D77BD7D7), UINT64_C(0x9C99ED5EC22FC2C2), UINT64_C(0x435C966D2EB82E2E), UINT64_C(0x29967A624B314B4B), UINT64_C(0x5DE121A3FEDFFEFE), UINT64_C(0xD5AE168257415757), - UINT64_C(0xBD2A41A815541515), UINT64_C(0xE8EEB69F77C17777), UINT64_C(0x926EEBA537DC3737), UINT64_C(0x9ED7567BE5B3E5E5), UINT64_C(0x1323D98C9F469F9F), UINT64_C(0x23FD17D3F0E7F0F0), UINT64_C(0x20947F6A4A354A4A), UINT64_C(0x44A9959EDA4FDADA), - UINT64_C(0xA2B025FA587D5858), UINT64_C(0xCF8FCA06C903C9C9), UINT64_C(0x7C528D5529A42929), UINT64_C(0x5A1422500A280A0A), UINT64_C(0x507F4FE1B1FEB1B1), UINT64_C(0xC95D1A69A0BAA0A0), UINT64_C(0x14D6DA7F6BB16B6B), UINT64_C(0xD917AB5C852E8585), - UINT64_C(0x3C677381BDCEBDBD), UINT64_C(0x8FBA34D25D695D5D), UINT64_C(0x9020508010401010), UINT64_C(0x07F503F3F4F7F4F4), UINT64_C(0xDD8BC016CB0BCBCB), UINT64_C(0xD37CC6ED3EF83E3E), UINT64_C(0x2D0A112805140505), UINT64_C(0x78CEE61F67816767), - UINT64_C(0x97D55373E4B7E4E4), UINT64_C(0x024EBB25279C2727), UINT64_C(0x7382583241194141), UINT64_C(0xA70B9D2C8B168B8B), UINT64_C(0xF6530151A7A6A7A7), UINT64_C(0xB2FA94CF7DE97D7D), UINT64_C(0x4937FBDC956E9595), UINT64_C(0x56AD9F8ED847D8D8), - UINT64_C(0x70EB308BFBCBFBFB), UINT64_C(0xCDC17123EE9FEEEE), UINT64_C(0xBBF891C77CED7C7C), UINT64_C(0x71CCE31766856666), UINT64_C(0x7BA78EA6DD53DDDD), UINT64_C(0xAF2E4BB8175C1717), UINT64_C(0x458E460247014747), UINT64_C(0x1A21DC849E429E9E), - UINT64_C(0xD489C51ECA0FCACA), UINT64_C(0x585A99752DB42D2D), UINT64_C(0x2E637991BFC6BFBF), UINT64_C(0x3F0E1B38071C0707), UINT64_C(0xAC472301AD8EADAD), UINT64_C(0xB0B42FEA5A755A5A), UINT64_C(0xEF1BB56C83368383), UINT64_C(0xB666FF8533CC3333), - UINT64_C(0x5CC6F23F63916363), UINT64_C(0x12040A1002080202), UINT64_C(0x93493839AA92AAAA), UINT64_C(0xDEE2A8AF71D97171), UINT64_C(0xC68DCF0EC807C8C8), UINT64_C(0xD1327DC819641919), UINT64_C(0x3B92707249394949), UINT64_C(0x5FAF9A86D943D9D9), - UINT64_C(0x31F91DC3F2EFF2F2), UINT64_C(0xA8DB484BE3ABE3E3), UINT64_C(0xB9B62AE25B715B5B), UINT64_C(0xBC0D9234881A8888), UINT64_C(0x3E29C8A49A529A9A), UINT64_C(0x0B4CBE2D26982626), UINT64_C(0xBF64FA8D32C83232), UINT64_C(0x597D4AE9B0FAB0B0), - UINT64_C(0xF2CF6A1BE983E9E9), UINT64_C(0x771E33780F3C0F0F), UINT64_C(0x33B7A6E6D573D5D5), UINT64_C(0xF41DBA74803A8080), UINT64_C(0x27617C99BEC2BEBE), UINT64_C(0xEB87DE26CD13CDCD), UINT64_C(0x8968E4BD34D03434), UINT64_C(0x3290757A483D4848), - UINT64_C(0x54E324ABFFDBFFFF), UINT64_C(0x8DF48FF77AF57A7A), UINT64_C(0x643DEAF4907A9090), UINT64_C(0x9DBE3EC25F615F5F), UINT64_C(0x3D40A01D20802020), UINT64_C(0x0FD0D56768BD6868), UINT64_C(0xCA3472D01A681A1A), UINT64_C(0xB7412C19AE82AEAE), - UINT64_C(0x7D755EC9B4EAB4B4), UINT64_C(0xCEA8199A544D5454), UINT64_C(0x7F3BE5EC93769393), UINT64_C(0x2F44AA0D22882222), UINT64_C(0x63C8E907648D6464), UINT64_C(0x2AFF12DBF1E3F1F1), UINT64_C(0xCCE6A2BF73D17373), UINT64_C(0x82245A9012481212), - UINT64_C(0x7A805D3A401D4040), UINT64_C(0x4810284008200808), UINT64_C(0x959BE856C32BC3C3), UINT64_C(0xDFC57B33EC97ECEC), UINT64_C(0x4DAB9096DB4BDBDB), UINT64_C(0xC05F1F61A1BEA1A1), UINT64_C(0x9107831C8D0E8D8D), UINT64_C(0xC87AC9F53DF43D3D), - UINT64_C(0x5B33F1CC97669797), UINT64_C(0x0000000000000000), UINT64_C(0xF983D436CF1BCFCF), UINT64_C(0x6E5687452BAC2B2B), UINT64_C(0xE1ECB39776C57676), UINT64_C(0xE619B06482328282), UINT64_C(0x28B1A9FED67FD6D6), UINT64_C(0xC33677D81B6C1B1B), - UINT64_C(0x74775BC1B5EEB5B5), UINT64_C(0xBE432911AF86AFAF), UINT64_C(0x1DD4DF776AB56A6A), UINT64_C(0xEAA00DBA505D5050), UINT64_C(0x578A4C1245094545), UINT64_C(0x38FB18CBF3EBF3F3), UINT64_C(0xAD60F09D30C03030), UINT64_C(0xC4C3742BEF9BEFEF), - UINT64_C(0xDA7EC3E53FFC3F3F), UINT64_C(0xC7AA1C9255495555), UINT64_C(0xDB591079A2B2A2A2), UINT64_C(0xE9C96503EA8FEAEA), UINT64_C(0x6ACAEC0F65896565), UINT64_C(0x036968B9BAD2BABA), UINT64_C(0x4A5E93652FBC2F2F), UINT64_C(0x8E9DE74EC027C0C0), - UINT64_C(0x60A181BEDE5FDEDE), UINT64_C(0xFC386CE01C701C1C), UINT64_C(0x46E72EBBFDD3FDFD), UINT64_C(0x1F9A64524D294D4D), UINT64_C(0x7639E0E492729292), UINT64_C(0xFAEABC8F75C97575), UINT64_C(0x360C1E3006180606), UINT64_C(0xAE0998248A128A8A), - UINT64_C(0x4B7940F9B2F2B2B2), UINT64_C(0x85D15963E6BFE6E6), UINT64_C(0x7E1C36700E380E0E), UINT64_C(0xE73E63F81F7C1F1F), UINT64_C(0x55C4F73762956262), UINT64_C(0x3AB5A3EED477D4D4), UINT64_C(0x814D3229A89AA8A8), UINT64_C(0x5231F4C496629696), - UINT64_C(0x62EF3A9BF9C3F9F9), UINT64_C(0xA397F666C533C5C5), UINT64_C(0x104AB13525942525), UINT64_C(0xABB220F259795959), UINT64_C(0xD015AE54842A8484), UINT64_C(0xC5E4A7B772D57272), UINT64_C(0xEC72DDD539E43939), UINT64_C(0x1698615A4C2D4C4C), - UINT64_C(0x94BC3BCA5E655E5E), UINT64_C(0x9FF085E778FD7878), UINT64_C(0xE570D8DD38E03838), UINT64_C(0x980586148C0A8C8C), UINT64_C(0x17BFB2C6D163D1D1), UINT64_C(0xE4570B41A5AEA5A5), UINT64_C(0xA1D94D43E2AFE2E2), UINT64_C(0x4EC2F82F61996161), - UINT64_C(0x427B45F1B3F6B3B3), UINT64_C(0x3442A51521842121), UINT64_C(0x0825D6949C4A9C9C), UINT64_C(0xEE3C66F01E781E1E), UINT64_C(0x6186522243114343), UINT64_C(0xB193FC76C73BC7C7), UINT64_C(0x4FE52BB3FCD7FCFC), UINT64_C(0x2408142004100404), - UINT64_C(0xE3A208B251595151), UINT64_C(0x252FC7BC995E9999), UINT64_C(0x22DAC44F6DA96D6D), UINT64_C(0x651A39680D340D0D), UINT64_C(0x79E93583FACFFAFA), UINT64_C(0x69A384B6DF5BDFDF), UINT64_C(0xA9FC9BD77EE57E7E), UINT64_C(0x1948B43D24902424), - UINT64_C(0xFE76D7C53BEC3B3B), UINT64_C(0x9A4B3D31AB96ABAB), UINT64_C(0xF081D13ECE1FCECE), UINT64_C(0x9922558811441111), UINT64_C(0x8303890C8F068F8F), UINT64_C(0x049C6B4A4E254E4E), UINT64_C(0x667351D1B7E6B7B7), UINT64_C(0xE0CB600BEB8BEBEB), - UINT64_C(0xC178CCFD3CF03C3C), UINT64_C(0xFD1FBF7C813E8181), UINT64_C(0x4035FED4946A9494), UINT64_C(0x1CF30CEBF7FBF7F7), UINT64_C(0x186F67A1B9DEB9B9), UINT64_C(0x8B265F98134C1313), UINT64_C(0x51589C7D2CB02C2C), UINT64_C(0x05BBB8D6D36BD3D3), - UINT64_C(0x8CD35C6BE7BBE7E7), UINT64_C(0x39DCCB576EA56E6E), UINT64_C(0xAA95F36EC437C4C4), UINT64_C(0x1B060F18030C0303), UINT64_C(0xDCAC138A56455656), UINT64_C(0x5E88491A440D4444), UINT64_C(0xA0FE9EDF7FE17F7F), UINT64_C(0x884F3721A99EA9A9), - UINT64_C(0x6754824D2AA82A2A), UINT64_C(0x0A6B6DB1BBD6BBBB), UINT64_C(0x879FE246C123C1C1), UINT64_C(0xF1A602A253515353), UINT64_C(0x72A58BAEDC57DCDC), UINT64_C(0x531627580B2C0B0B), UINT64_C(0x0127D39C9D4E9D9D), UINT64_C(0x2BD8C1476CAD6C6C), - UINT64_C(0xA462F59531C43131), UINT64_C(0xF3E8B98774CD7474), UINT64_C(0x15F109E3F6FFF6F6), UINT64_C(0x4C8C430A46054646), UINT64_C(0xA5452609AC8AACAC), UINT64_C(0xB50F973C891E8989), UINT64_C(0xB42844A014501414), UINT64_C(0xBADF425BE1A3E1E1), - UINT64_C(0xA62C4EB016581616), UINT64_C(0xF774D2CD3AE83A3A), UINT64_C(0x06D2D06F69B96969), UINT64_C(0x41122D4809240909), UINT64_C(0xD7E0ADA770DD7070), UINT64_C(0x6F7154D9B6E2B6B6), UINT64_C(0x1EBDB7CED067D0D0), UINT64_C(0xD6C77E3BED93EDED), - UINT64_C(0xE285DB2ECC17CCCC), UINT64_C(0x6884572A42154242), UINT64_C(0x2C2DC2B4985A9898), UINT64_C(0xED550E49A4AAA4A4), UINT64_C(0x7550885D28A02828), UINT64_C(0x86B831DA5C6D5C5C), UINT64_C(0x6BED3F93F8C7F8F8), UINT64_C(0xC211A44486228686), -}; - -static uint64_t WHIRLPOOL_ROUND_CONSTANTS[32] = { - UINT64_C(0x4F01B887E8C62318), UINT64_C(0x52916F79F5D2A636), UINT64_C(0x357B0CA38E9BBC60), UINT64_C(0x57FE4B2EC2D7E01D), - UINT64_C(0xDA4AF09FE5377715), UINT64_C(0x856BA0B10A29C958), UINT64_C(0x67053ECBF4105DBD), UINT64_C(0xD8957DA78B4127E4), - UINT64_C(0x9E4717DD667CEEFB), UINT64_C(0x33835AAD07BF2DCA), UINT64_C(0xD94919C871AA0263), UINT64_C(0xB032269A885BE3F2), - UINT64_C(0x4834CDBE80D50FE9), UINT64_C(0xAE1A68205F907AFF), UINT64_C(0x1273F164229354B4), UINT64_C(0x3D8DA1DBECC30840), - UINT64_C(0x1BD682762BCF0097), UINT64_C(0xEF30F345506AAFB5), UINT64_C(0xC02FBA65EAA2553F), UINT64_C(0x8A0675924DFD1CDE), - UINT64_C(0x96A8D4621F0EE6B2), UINT64_C(0x4C3972845925C5F9), UINT64_C(0x61E2A5D18C38785E), UINT64_C(0x04FCC7431E9C21B3), - UINT64_C(0x247EDFFA0D6D9951), UINT64_C(0xEBB74E8F11CEAB3B), UINT64_C(0xD32C13B9F794813C), UINT64_C(0xA97F445603C46EE7), - UINT64_C(0x6C9D0BDC53C1BB2A), UINT64_C(0xE11489AC46F67431), UINT64_C(0xEDD0B67009693A16), UINT64_C(0x86F85C28A49842CC), -}; extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); extern void whirlpoolx_regenhash(struct work *work); -extern void whirlpool_round(uint64_t block[8], const uint64_t key[8]); #endif /* WHIRLPOOLX_H */ \ No newline at end of file diff --git a/algorithm/yescrypt-opt.c b/algorithm/yescrypt-opt.c index b54be469..6adef7e7 100644 --- a/algorithm/yescrypt-opt.c +++ b/algorithm/yescrypt-opt.c @@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size) if (size + 63 < size) { errno = ENOMEM; } - else if ((base = malloc(size + 63)) != NULL) { + else if ((base = (uint8_t *)malloc(size + 63)) != NULL) { aligned = base + 63; aligned -= (uintptr_t)aligned & 63; } @@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags, uint64_t * XY, uint64_t * S) { void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = shared->shared1.aligned; + const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; uint32_t VROM_mask = shared->mask1; size_t s = 16 * r; uint64_t * X = V; @@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop, void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8); - const uint64_t * VROM = shared->shared1.aligned; + const uint64_t * VROM = (uint64_t *)shared->shared1.aligned; uint32_t VROM_mask = shared->mask1 | 1; size_t s = 16 * r; yescrypt_flags_t rw = flags & YESCRYPT_RW; @@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S; if (Sp) - smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); + smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL); diff --git a/api.c b/api.c index 08701b8f..996c0ea2 100644 --- a/api.c +++ b/api.c @@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR); root = api_add_string(root, "Miner", PACKAGE " " VERSION, false); - root = api_add_string(root, "CGMiner", CGMINER_VERSION, false); + root = api_add_string(root, "SGMiner", VERSION, false); root = api_add_const(root, "API", APIVERSION, false); root = print_data(root, buf, isjson, false); diff --git a/driver-opencl.c b/driver-opencl.c index 0e45e555..4a9d0693 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr) static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work) { - if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) { + if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REV2) { work->blk.work = work; precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data)); } diff --git a/kernel/lyra2rev2.cl b/kernel/lyra2rev2.cl index 0fe0440d..9694ae72 100644 --- a/kernel/lyra2rev2.cl +++ b/kernel/lyra2rev2.cl @@ -31,8 +31,8 @@ // typedef unsigned int uint; #pragma OPENCL EXTENSION cl_amd_printf : enable -#ifndef LYRA2RE_CL -#define LYRA2RE_CL +#ifndef LYRA2REV2_CL +#define LYRA2REV2_CL #if __ENDIAN_LITTLE__ #define SPH_LITTLE_ENDIAN 1 @@ -90,7 +90,7 @@ static inline sph_u64 ror64(sph_u64 vw, unsigned a) { //#define SPH_ROTR64(l,n) ror64(l,n) #define memshift 3 #include "blake256.cl" -#include "lyra2v2.cl" +#include "Lyra2v2.cl" #include "keccak1600.cl" #include "skein256.cl" #include "cubehash.cl" @@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong } -#endif // LYRA2RE_CL \ No newline at end of file +#endif // LYRA2REV2_CL \ No newline at end of file diff --git a/kernel/neoscrypt.cl b/kernel/neoscrypt.cl index 7939d7ed..c71fc9c5 100644 --- a/kernel/neoscrypt.cl +++ b/kernel/neoscrypt.cl @@ -1,5 +1,4 @@ -/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */ -/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */ +// NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 // Stupid AMD compiler ignores the unroll pragma in these two #define SALSA_SMALL_UNROLL 3 @@ -351,74 +350,71 @@ uint16 salsa_small_scalar_rnd(uint16 X) return(X + st); } -#define CHACHA_CORE_PARALLEL(state) do { \ - state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \ - state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \ - state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \ - state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \ - \ - state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \ - state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \ - state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \ - state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \ +#define CHACHA_CORE(state) do { \ + state.s0 += state.s4; state.sc = as_uint(as_ushort2(state.sc ^ state.s0).s10); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 12U); state.s0 += state.s4; state.sc = rotate(state.sc ^ state.s0, 8U); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 7U); \ + state.s1 += state.s5; state.sd = as_uint(as_ushort2(state.sd ^ state.s1).s10); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 12U); state.s1 += state.s5; state.sd = rotate(state.sd ^ state.s1, 8U); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 7U); \ + state.s2 += state.s6; state.se = as_uint(as_ushort2(state.se ^ state.s2).s10); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 12U); state.s2 += state.s6; state.se = rotate(state.se ^ state.s2, 8U); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 7U); \ + state.s3 += state.s7; state.sf = as_uint(as_ushort2(state.sf ^ state.s3).s10); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 12U); state.s3 += state.s7; state.sf = rotate(state.sf ^ state.s3, 8U); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 7U); \ + state.s0 += state.s5; state.sf = as_uint(as_ushort2(state.sf ^ state.s0).s10); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 12U); state.s0 += state.s5; state.sf = rotate(state.sf ^ state.s0, 8U); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 7U); \ + state.s1 += state.s6; state.sc = as_uint(as_ushort2(state.sc ^ state.s1).s10); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 12U); state.s1 += state.s6; state.sc = rotate(state.sc ^ state.s1, 8U); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 7U); \ + state.s2 += state.s7; state.sd = as_uint(as_ushort2(state.sd ^ state.s2).s10); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 12U); state.s2 += state.s7; state.sd = rotate(state.sd ^ state.s2, 8U); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 7U); \ + state.s3 += state.s4; state.se = as_uint(as_ushort2(state.se ^ state.s3).s10); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 12U); state.s3 += state.s4; state.se = rotate(state.se ^ state.s3, 8U); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 7U); \ } while(0) -uint16 chacha_small_parallel_rnd(uint16 X) -{ - uint4 t, st[4]; - - ((uint16 *)st)[0] = X; - +uint16 chacha_small_scalar_rnd(uint16 X) +{ + uint16 st = X; + #if CHACHA_SMALL_UNROLL == 1 for(int i = 0; i < 10; ++i) { - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); } #elif CHACHA_SMALL_UNROLL == 2 for(int i = 0; i < 5; ++i) { - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); + CHACHA_CORE(st); } #elif CHACHA_SMALL_UNROLL == 3 for(int i = 0; i < 4; ++i) { - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); if(i == 3) break; - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); + CHACHA_CORE(st); } #elif CHACHA_SMALL_UNROLL == 4 for(int i = 0; i < 3; ++i) { - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); + CHACHA_CORE(st); if(i == 2) break; - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); + CHACHA_CORE(st); } #else for(int i = 0; i < 2; ++i) { - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); - CHACHA_CORE_PARALLEL(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); + CHACHA_CORE(st); } #endif - return(X + ((uint16 *)st)[0]); + return(X + st); } void neoscrypt_blkmix(uint16 *XV, bool alg) @@ -443,10 +439,10 @@ void neoscrypt_blkmix(uint16 *XV, bool alg) } else { - XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0]; - XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1]; - XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2]; - XV[3] = chacha_small_parallel_rnd(XV[3]); + XV[0] = chacha_small_scalar_rnd(XV[0]); XV[1] ^= XV[0]; + XV[1] = chacha_small_scalar_rnd(XV[1]); XV[2] ^= XV[1]; + XV[2] = chacha_small_scalar_rnd(XV[2]); XV[3] ^= XV[2]; + XV[3] = chacha_small_scalar_rnd(XV[3]); } XV[1] ^= XV[2]; @@ -454,7 +450,7 @@ void neoscrypt_blkmix(uint16 *XV, bool alg) XV[1] ^= XV[2]; } -void ScratchpadStore(__global void *V, void *X, uchar idx) +void ScratchpadStore(__global void *V, const void *X, uchar idx) { ((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0]; ((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1]; @@ -466,20 +462,34 @@ void ScratchpadMix(void *X, const __global void *V, uchar idx) ((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1]; } -void SMix(uint16 *X, __global uint16 *V, bool flag) +void ScratchpadLoad(void *X, const __global void *V, uchar idx) { + ((ulong16 *)X)[0] = ((__global ulong16 *)V)[idx << 1]; + ((ulong16 *)X)[1] = ((__global ulong16 *)V)[(idx << 1) + 1]; +} + +void SMix(uint16 *X, __global uint16 *V, bool flag) +{ #pragma unroll 1 - for(int i = 0; i < 128; ++i) + for(int i = 0; i < 64; ++i) { ScratchpadStore(V, X, i); neoscrypt_blkmix(X, flag); + neoscrypt_blkmix(X, flag); } #pragma unroll 1 for(int i = 0; i < 128; ++i) { + uint16 tmp[4]; const uint idx = convert_uchar(((uint *)X)[48] & 0x7F); - ScratchpadMix(X, V, idx); + + ScratchpadLoad(tmp, V, idx >> 1); + + if(idx & 1) neoscrypt_blkmix(tmp, flag); + + ((ulong16 *)X)[0] ^= ((ulong16 *)tmp)[0]; + ((ulong16 *)X)[1] ^= ((ulong16 *)tmp)[1]; neoscrypt_blkmix(X, flag); } } @@ -492,7 +502,8 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri // X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha uint16 X[4], Z[4]; /* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */ - __global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS))); + //__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS))); + __global ulong16 *V = (__global ulong16 *)(padcache + (0x4000 * (get_global_id(0) % MAX_GLOBAL_THREADS))); uchar outbuf[32]; uchar data[PASSWORD_LEN]; diff --git a/kernel/whirlpoolx.cl b/kernel/whirlpoolx.cl index 89a0a3f0..362fa346 100644 --- a/kernel/whirlpoolx.cl +++ b/kernel/whirlpoolx.cl @@ -1,315 +1,1264 @@ +/* + * whirlcoin kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 djm34 + * Copyright (c) 2014 uray + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34, uray + */ #ifndef WHIRLPOOLX_CL #define WHIRLPOOLX_CL -/* - Where are the other tables? You'll probably feel stupid when I tell you, but the T1 - T7 - tables are all copies of the T0 table, with every ulong rotated left by the table number - of bytes. Basically, T1 is T0 rotated left 8 bits, T2 is T0 rotated left 16 bits, and so on. - Makes one hell of a lot more sense to create them dynamically (and/or rotate instead), but - few things in the stock miners make sense. -*/ - -__constant static const ulong T0_C[256] = -{ - 0xD83078C018601818UL, 0x2646AF05238C2323UL, 0xB891F97EC63FC6C6UL, 0xFBCD6F13E887E8E8UL, - 0xCB13A14C87268787UL, 0x116D62A9B8DAB8B8UL, 0x0902050801040101UL, 0x0D9E6E424F214F4FUL, - 0x9B6CEEAD36D83636UL, 0xFF510459A6A2A6A6UL, 0x0CB9BDDED26FD2D2UL, 0x0EF706FBF5F3F5F5UL, - 0x96F280EF79F97979UL, 0x30DECE5F6FA16F6FUL, 0x6D3FEFFC917E9191UL, 0xF8A407AA52555252UL, - 0x47C0FD27609D6060UL, 0x35657689BCCABCBCUL, 0x372BCDAC9B569B9BUL, 0x8A018C048E028E8EUL, - 0xD25B1571A3B6A3A3UL, 0x6C183C600C300C0CUL, 0x84F68AFF7BF17B7BUL, 0x806AE1B535D43535UL, - 0xF53A69E81D741D1DUL, 0xB3DD4753E0A7E0E0UL, 0x21B3ACF6D77BD7D7UL, 0x9C99ED5EC22FC2C2UL, - 0x435C966D2EB82E2EUL, 0x29967A624B314B4BUL, 0x5DE121A3FEDFFEFEUL, 0xD5AE168257415757UL, - 0xBD2A41A815541515UL, 0xE8EEB69F77C17777UL, 0x926EEBA537DC3737UL, 0x9ED7567BE5B3E5E5UL, - 0x1323D98C9F469F9FUL, 0x23FD17D3F0E7F0F0UL, 0x20947F6A4A354A4AUL, 0x44A9959EDA4FDADAUL, - 0xA2B025FA587D5858UL, 0xCF8FCA06C903C9C9UL, 0x7C528D5529A42929UL, 0x5A1422500A280A0AUL, - 0x507F4FE1B1FEB1B1UL, 0xC95D1A69A0BAA0A0UL, 0x14D6DA7F6BB16B6BUL, 0xD917AB5C852E8585UL, - 0x3C677381BDCEBDBDUL, 0x8FBA34D25D695D5DUL, 0x9020508010401010UL, 0x07F503F3F4F7F4F4UL, - 0xDD8BC016CB0BCBCBUL, 0xD37CC6ED3EF83E3EUL, 0x2D0A112805140505UL, 0x78CEE61F67816767UL, - 0x97D55373E4B7E4E4UL, 0x024EBB25279C2727UL, 0x7382583241194141UL, 0xA70B9D2C8B168B8BUL, - 0xF6530151A7A6A7A7UL, 0xB2FA94CF7DE97D7DUL, 0x4937FBDC956E9595UL, 0x56AD9F8ED847D8D8UL, - 0x70EB308BFBCBFBFBUL, 0xCDC17123EE9FEEEEUL, 0xBBF891C77CED7C7CUL, 0x71CCE31766856666UL, - 0x7BA78EA6DD53DDDDUL, 0xAF2E4BB8175C1717UL, 0x458E460247014747UL, 0x1A21DC849E429E9EUL, - 0xD489C51ECA0FCACAUL, 0x585A99752DB42D2DUL, 0x2E637991BFC6BFBFUL, 0x3F0E1B38071C0707UL, - 0xAC472301AD8EADADUL, 0xB0B42FEA5A755A5AUL, 0xEF1BB56C83368383UL, 0xB666FF8533CC3333UL, - 0x5CC6F23F63916363UL, 0x12040A1002080202UL, 0x93493839AA92AAAAUL, 0xDEE2A8AF71D97171UL, - 0xC68DCF0EC807C8C8UL, 0xD1327DC819641919UL, 0x3B92707249394949UL, 0x5FAF9A86D943D9D9UL, - 0x31F91DC3F2EFF2F2UL, 0xA8DB484BE3ABE3E3UL, 0xB9B62AE25B715B5BUL, 0xBC0D9234881A8888UL, - 0x3E29C8A49A529A9AUL, 0x0B4CBE2D26982626UL, 0xBF64FA8D32C83232UL, 0x597D4AE9B0FAB0B0UL, - 0xF2CF6A1BE983E9E9UL, 0x771E33780F3C0F0FUL, 0x33B7A6E6D573D5D5UL, 0xF41DBA74803A8080UL, - 0x27617C99BEC2BEBEUL, 0xEB87DE26CD13CDCDUL, 0x8968E4BD34D03434UL, 0x3290757A483D4848UL, - 0x54E324ABFFDBFFFFUL, 0x8DF48FF77AF57A7AUL, 0x643DEAF4907A9090UL, 0x9DBE3EC25F615F5FUL, - 0x3D40A01D20802020UL, 0x0FD0D56768BD6868UL, 0xCA3472D01A681A1AUL, 0xB7412C19AE82AEAEUL, - 0x7D755EC9B4EAB4B4UL, 0xCEA8199A544D5454UL, 0x7F3BE5EC93769393UL, 0x2F44AA0D22882222UL, - 0x63C8E907648D6464UL, 0x2AFF12DBF1E3F1F1UL, 0xCCE6A2BF73D17373UL, 0x82245A9012481212UL, - 0x7A805D3A401D4040UL, 0x4810284008200808UL, 0x959BE856C32BC3C3UL, 0xDFC57B33EC97ECECUL, - 0x4DAB9096DB4BDBDBUL, 0xC05F1F61A1BEA1A1UL, 0x9107831C8D0E8D8DUL, 0xC87AC9F53DF43D3DUL, - 0x5B33F1CC97669797UL, 0x0000000000000000UL, 0xF983D436CF1BCFCFUL, 0x6E5687452BAC2B2BUL, - 0xE1ECB39776C57676UL, 0xE619B06482328282UL, 0x28B1A9FED67FD6D6UL, 0xC33677D81B6C1B1BUL, - 0x74775BC1B5EEB5B5UL, 0xBE432911AF86AFAFUL, 0x1DD4DF776AB56A6AUL, 0xEAA00DBA505D5050UL, - 0x578A4C1245094545UL, 0x38FB18CBF3EBF3F3UL, 0xAD60F09D30C03030UL, 0xC4C3742BEF9BEFEFUL, - 0xDA7EC3E53FFC3F3FUL, 0xC7AA1C9255495555UL, 0xDB591079A2B2A2A2UL, 0xE9C96503EA8FEAEAUL, - 0x6ACAEC0F65896565UL, 0x036968B9BAD2BABAUL, 0x4A5E93652FBC2F2FUL, 0x8E9DE74EC027C0C0UL, - 0x60A181BEDE5FDEDEUL, 0xFC386CE01C701C1CUL, 0x46E72EBBFDD3FDFDUL, 0x1F9A64524D294D4DUL, - 0x7639E0E492729292UL, 0xFAEABC8F75C97575UL, 0x360C1E3006180606UL, 0xAE0998248A128A8AUL, - 0x4B7940F9B2F2B2B2UL, 0x85D15963E6BFE6E6UL, 0x7E1C36700E380E0EUL, 0xE73E63F81F7C1F1FUL, - 0x55C4F73762956262UL, 0x3AB5A3EED477D4D4UL, 0x814D3229A89AA8A8UL, 0x5231F4C496629696UL, - 0x62EF3A9BF9C3F9F9UL, 0xA397F666C533C5C5UL, 0x104AB13525942525UL, 0xABB220F259795959UL, - 0xD015AE54842A8484UL, 0xC5E4A7B772D57272UL, 0xEC72DDD539E43939UL, 0x1698615A4C2D4C4CUL, - 0x94BC3BCA5E655E5EUL, 0x9FF085E778FD7878UL, 0xE570D8DD38E03838UL, 0x980586148C0A8C8CUL, - 0x17BFB2C6D163D1D1UL, 0xE4570B41A5AEA5A5UL, 0xA1D94D43E2AFE2E2UL, 0x4EC2F82F61996161UL, - 0x427B45F1B3F6B3B3UL, 0x3442A51521842121UL, 0x0825D6949C4A9C9CUL, 0xEE3C66F01E781E1EUL, - 0x6186522243114343UL, 0xB193FC76C73BC7C7UL, 0x4FE52BB3FCD7FCFCUL, 0x2408142004100404UL, - 0xE3A208B251595151UL, 0x252FC7BC995E9999UL, 0x22DAC44F6DA96D6DUL, 0x651A39680D340D0DUL, - 0x79E93583FACFFAFAUL, 0x69A384B6DF5BDFDFUL, 0xA9FC9BD77EE57E7EUL, 0x1948B43D24902424UL, - 0xFE76D7C53BEC3B3BUL, 0x9A4B3D31AB96ABABUL, 0xF081D13ECE1FCECEUL, 0x9922558811441111UL, - 0x8303890C8F068F8FUL, 0x049C6B4A4E254E4EUL, 0x667351D1B7E6B7B7UL, 0xE0CB600BEB8BEBEBUL, - 0xC178CCFD3CF03C3CUL, 0xFD1FBF7C813E8181UL, 0x4035FED4946A9494UL, 0x1CF30CEBF7FBF7F7UL, - 0x186F67A1B9DEB9B9UL, 0x8B265F98134C1313UL, 0x51589C7D2CB02C2CUL, 0x05BBB8D6D36BD3D3UL, - 0x8CD35C6BE7BBE7E7UL, 0x39DCCB576EA56E6EUL, 0xAA95F36EC437C4C4UL, 0x1B060F18030C0303UL, - 0xDCAC138A56455656UL, 0x5E88491A440D4444UL, 0xA0FE9EDF7FE17F7FUL, 0x884F3721A99EA9A9UL, - 0x6754824D2AA82A2AUL, 0x0A6B6DB1BBD6BBBBUL, 0x879FE246C123C1C1UL, 0xF1A602A253515353UL, - 0x72A58BAEDC57DCDCUL, 0x531627580B2C0B0BUL, 0x0127D39C9D4E9D9DUL, 0x2BD8C1476CAD6C6CUL, - 0xA462F59531C43131UL, 0xF3E8B98774CD7474UL, 0x15F109E3F6FFF6F6UL, 0x4C8C430A46054646UL, - 0xA5452609AC8AACACUL, 0xB50F973C891E8989UL, 0xB42844A014501414UL, 0xBADF425BE1A3E1E1UL, - 0xA62C4EB016581616UL, 0xF774D2CD3AE83A3AUL, 0x06D2D06F69B96969UL, 0x41122D4809240909UL, - 0xD7E0ADA770DD7070UL, 0x6F7154D9B6E2B6B6UL, 0x1EBDB7CED067D0D0UL, 0xD6C77E3BED93EDEDUL, - 0xE285DB2ECC17CCCCUL, 0x6884572A42154242UL, 0x2C2DC2B4985A9898UL, 0xED550E49A4AAA4A4UL, - 0x7550885D28A02828UL, 0x86B831DA5C6D5C5CUL, 0x6BED3F93F8C7F8F8UL, 0xC211A44486228686UL +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64 __attribute__ ((aligned (128))); +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_C64(x) ((sph_u64)(x ## UL)) + +__constant static const sph_u64 plain_T0[256] __attribute__ ((aligned (128))) = { + SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), + SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), + SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), + SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F), + SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6), + SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5), + SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F), + SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252), + SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC), + SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E), + SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C), + SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535), + SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0), + SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2), + SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B), + SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757), + SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777), + SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5), + SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0), + SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA), + SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9), + SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A), + SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0), + SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585), + SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D), + SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4), + SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E), + SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767), + SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727), + SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B), + SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D), + SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8), + SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE), + SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666), + SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717), + SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E), + SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D), + SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707), + SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A), + SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333), + SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202), + SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171), + SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919), + SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9), + SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3), + SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888), + SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626), + SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0), + SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F), + SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080), + SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD), + SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848), + SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A), + SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F), + SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868), + SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE), + SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454), + SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222), + SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1), + SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212), + SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808), + SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC), + SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1), + SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D), + SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000), + SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B), + SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282), + SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B), + SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF), + SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050), + SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3), + SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF), + SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555), + SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA), + SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA), + SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0), + SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C), + SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D), + SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575), + SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A), + SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6), + SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F), + SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4), + SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696), + SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5), + SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959), + SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272), + SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C), + SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878), + SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C), + SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5), + SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161), + SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121), + SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E), + SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7), + SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404), + SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999), + SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D), + SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF), + SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424), + SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB), + SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111), + SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E), + SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB), + SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181), + SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7), + SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313), + SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3), + SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E), + SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303), + SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444), + SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9), + SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB), + SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353), + SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B), + SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C), + SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474), + SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646), + SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989), + SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1), + SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A), + SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909), + SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6), + SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED), + SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242), + SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4), + SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C), + SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) }; -__constant static const ulong ROUND_CONSTANTS[10] = -{ - 0x4F01B887E8C62318UL, 0x52916F79F5D2A636UL, 0x357B0CA38E9BBC60UL, 0x57FE4B2EC2D7E01DUL, - 0xDA4AF09FE5377715UL, 0x856BA0B10A29C958UL, 0x67053ECBF4105DBDUL, 0xD8957DA78B4127E4UL, - 0x9E4717DD667CEEFBUL, 0x33835AAD07BF2DCAUL +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +__constant static const sph_u64 plain_T1[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), + SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), + SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), + SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D), + SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF), + SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E), + SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30), + SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8), + SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35), + SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A), + SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C), + SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580), + SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3), + SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C), + SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29), + SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5), + SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8), + SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E), + SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023), + SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44), + SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF), + SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A), + SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9), + SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9), + SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F), + SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407), + SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3), + SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778), + SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702), + SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7), + SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2), + SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856), + SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD), + SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671), + SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF), + SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A), + SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58), + SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F), + SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0), + SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6), + SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212), + SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE), + SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1), + SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F), + SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8), + SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC), + SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B), + SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059), + SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77), + SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4), + SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB), + SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832), + SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D), + SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D), + SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F), + SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7), + SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE), + SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F), + SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A), + SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282), + SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848), + SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF), + SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0), + SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8), + SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000), + SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E), + SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6), + SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3), + SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE), + SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA), + SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338), + SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4), + SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7), + SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9), + SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03), + SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E), + SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC), + SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F), + SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA), + SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE), + SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685), + SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7), + SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A), + SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652), + SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3), + SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB), + SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5), + SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16), + SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F), + SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98), + SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4), + SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E), + SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134), + SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE), + SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1), + SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424), + SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925), + SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65), + SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69), + SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419), + SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A), + SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199), + SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04), + SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0), + SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD), + SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C), + SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B), + SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305), + SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39), + SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B), + SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E), + SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988), + SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A), + SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1), + SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53), + SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B), + SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3), + SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C), + SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5), + SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA), + SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7), + SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941), + SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F), + SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6), + SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268), + SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED), + SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86), + SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) }; -/* - That BYTE macro was criminal. AMD has an instruction that is quite useful for this purpose - Bitfield Extract. - The AMD OpenCL compiler is often VERY stupid, and cannot be relied on to compile ridiculous code into clever - instructions like BFE. However, remember two things about the amd_bfe built-in function: One, while it's preferable - to convoluted multiplications (*shudder*), bitshifts, and AND masks, as it compiles to one instruction - it requires - the OPENCL_EXTENSION pragma to enable cl_amd_media_ops2 (example below), and two, it can only work on uints and below, - not ulongs. As you can see, for the extraction of bits from the high 32, I shift the upper 32 bits down and cast to - uint to fix this. -*/ +__constant static const sph_u64 plain_T2[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), + SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), + SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), + SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E), + SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51), + SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7), + SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE), + SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4), + SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565), + SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01), + SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18), + SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A), + SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD), + SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99), + SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996), + SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE), + SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE), + SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7), + SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD), + SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9), + SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F), + SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14), + SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D), + SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917), + SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA), + SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5), + SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C), + SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE), + SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E), + SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B), + SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA), + SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD), + SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1), + SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC), + SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E), + SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21), + SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A), + SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E), + SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4), + SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666), + SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204), + SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2), + SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132), + SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF), + SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB), + SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D), + SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C), + SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D), + SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E), + SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D), + SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87), + SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290), + SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4), + SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE), + SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0), + SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741), + SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8), + SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44), + SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF), + SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224), + SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810), + SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5), + SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F), + SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A), + SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000), + SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56), + SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619), + SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336), + SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43), + SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0), + SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB), + SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3), + SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA), + SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9), + SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369), + SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D), + SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38), + SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A), + SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA), + SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09), + SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1), + SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E), + SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5), + SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231), + SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397), + SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2), + SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4), + SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698), + SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0), + SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805), + SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457), + SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2), + SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442), + SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C), + SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193), + SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408), + SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F), + SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A), + SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3), + SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948), + SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B), + SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922), + SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C), + SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB), + SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F), + SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3), + SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26), + SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB), + SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC), + SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06), + SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88), + SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F), + SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B), + SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6), + SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316), + SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8), + SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8), + SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C), + SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F), + SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF), + SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774), + SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112), + SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71), + SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7), + SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884), + SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55), + SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8), + SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) +}; -#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable +__constant static const sph_u64 plain_T3[256] __attribute__ ((aligned (128))) = { + SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), + SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), + SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), + SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E), + SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104), + SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706), + SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE), + SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407), + SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576), + SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C), + SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C), + SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1), + SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47), + SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED), + SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A), + SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16), + SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6), + SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756), + SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17), + SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995), + SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA), + SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422), + SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A), + SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB), + SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34), + SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503), + SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6), + SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6), + SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB), + SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D), + SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94), + SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F), + SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171), + SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3), + SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B), + SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC), + SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99), + SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B), + SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F), + SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF), + SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A), + SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8), + SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D), + SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A), + SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48), + SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92), + SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE), + SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A), + SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33), + SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA), + SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE), + SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075), + SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F), + SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E), + SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5), + SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C), + SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819), + SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA), + SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12), + SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A), + SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028), + SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B), + SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F), + SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9), + SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000), + SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687), + SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0), + SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677), + SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329), + SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D), + SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18), + SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374), + SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C), + SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965), + SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968), + SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7), + SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C), + SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64), + SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC), + SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998), + SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159), + SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63), + SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3), + SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4), + SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6), + SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220), + SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7), + SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861), + SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085), + SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586), + SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B), + SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8), + SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5), + SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66), + SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC), + SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814), + SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7), + SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39), + SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384), + SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4), + SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D), + SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255), + SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B), + SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60), + SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF), + SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C), + SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F), + SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8), + SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB), + SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F), + SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849), + SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37), + SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D), + SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602), + SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627), + SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1), + SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9), + SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43), + SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97), + SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42), + SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2), + SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D), + SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154), + SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E), + SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457), + SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E), + SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831), + SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) +}; -/* - Note that while the compiler is pretty much clinically brain-dead half the time, it CAN do very basic things reliably. - This is why I reduced the complexity of my BYTELO and BYTEHI macros to a new BYTE one (I left the former two for demonstration.) - It resolves the two usages of the ternary operator into constants at compile time, after it inlines the macros, because every time - I use BYTE, the y argument is known at compile time. Therefore, while it's a bit less easy to read, it's more compact to use one macro. -*/ +__constant static const sph_u64 plain_T4[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), + SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), + SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), + SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42), + SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459), + SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB), + SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F), + SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA), + SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689), + SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04), + SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60), + SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5), + SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753), + SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E), + SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62), + SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682), + SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F), + SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B), + SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3), + SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E), + SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06), + SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250), + SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69), + SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C), + SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2), + SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3), + SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED), + SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F), + SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25), + SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C), + SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF), + SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E), + SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123), + SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317), + SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8), + SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84), + SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975), + SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38), + SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA), + SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85), + SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10), + SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF), + SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8), + SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86), + SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B), + SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234), + SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D), + SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9), + SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378), + SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74), + SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26), + SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A), + SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7), + SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2), + SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567), + SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19), + SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A), + SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D), + SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB), + SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90), + SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840), + SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33), + SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61), + SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5), + SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000), + SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745), + SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064), + SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8), + SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911), + SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA), + SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB), + SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B), + SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92), + SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503), + SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9), + SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E), + SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0), + SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452), + SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F), + SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824), + SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963), + SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8), + SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE), + SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4), + SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666), + SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2), + SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7), + SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A), + SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7), + SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614), + SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41), + SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F), + SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515), + SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0), + SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76), + SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420), + SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC), + SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968), + SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6), + SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D), + SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31), + SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588), + SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A), + SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B), + SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C), + SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB), + SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98), + SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6), + SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57), + SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18), + SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A), + SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721), + SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1), + SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2), + SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758), + SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147), + SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987), + SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A), + SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C), + SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B), + SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD), + SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48), + SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9), + SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B), + SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A), + SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49), + SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA), + SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) +}; -//#define BYTELO(x, y) (amd_bfe((uint)(x), (y), 8U)) -//#define BYTEHI(x, y) (amd_bfe((uint)((x) >> 32), (y) - 32U, 8U)) +__constant static const sph_u64 plain_T5[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), + SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), + SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), + SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F), + SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6), + SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5), + SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F), + SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52), + SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC), + SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E), + SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C), + SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535), + SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0), + SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2), + SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B), + SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257), + SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77), + SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5), + SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0), + SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA), + SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9), + SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A), + SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0), + SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85), + SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D), + SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4), + SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E), + SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67), + SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527), + SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B), + SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D), + SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8), + SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE), + SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766), + SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817), + SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E), + SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D), + SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807), + SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A), + SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533), + SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002), + SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71), + SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819), + SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9), + SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3), + SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488), + SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26), + SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0), + SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F), + SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480), + SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD), + SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48), + SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A), + SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F), + SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768), + SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE), + SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54), + SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22), + SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1), + SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012), + SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008), + SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC), + SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1), + SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D), + SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000), + SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B), + SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482), + SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B), + SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF), + SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50), + SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3), + SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF), + SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255), + SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA), + SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA), + SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0), + SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C), + SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D), + SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75), + SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A), + SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6), + SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F), + SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4), + SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496), + SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5), + SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259), + SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772), + SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C), + SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778), + SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C), + SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5), + SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61), + SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521), + SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E), + SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7), + SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004), + SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99), + SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D), + SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF), + SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24), + SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB), + SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811), + SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E), + SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB), + SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81), + SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7), + SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813), + SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3), + SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E), + SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803), + SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44), + SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9), + SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB), + SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253), + SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B), + SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C), + SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774), + SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46), + SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89), + SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1), + SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A), + SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809), + SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6), + SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED), + SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42), + SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4), + SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C), + SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) +}; -#define BYTE(x, y) (amd_bfe((uint)((x) >> ((y >= 32U) ? 32U : 0U)), (y) - (((y) >= 32) ? 32U : 0), 8U)) +__constant static const sph_u64 plain_T6[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), + SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), + SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), + SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21), + SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2), + SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3), + SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1), + SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255), + SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA), + SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02), + SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30), + SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4), + SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7), + SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F), + SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31), + SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741), + SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1), + SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3), + SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7), + SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F), + SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903), + SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28), + SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA), + SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E), + SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69), + SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7), + SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8), + SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781), + SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C), + SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16), + SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9), + SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847), + SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F), + SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685), + SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C), + SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42), + SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4), + SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C), + SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75), + SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC), + SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208), + SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9), + SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964), + SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943), + SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB), + SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A), + SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698), + SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA), + SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C), + SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A), + SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13), + SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D), + SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5), + SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61), + SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD), + SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82), + SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D), + SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288), + SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3), + SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248), + SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820), + SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97), + SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE), + SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4), + SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC), + SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232), + SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C), + SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86), + SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D), + SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB), + SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B), + SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549), + SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F), + SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2), + SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027), + SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70), + SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29), + SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9), + SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12), + SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF), + SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C), + SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477), + SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662), + SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533), + SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979), + SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5), + SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D), + SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD), + SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A), + SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE), + SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199), + SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184), + SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78), + SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B), + SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410), + SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E), + SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34), + SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B), + SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490), + SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96), + SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144), + SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25), + SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B), + SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E), + SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB), + SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C), + SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B), + SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5), + SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C), + SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D), + SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E), + SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6), + SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351), + SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C), + SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD), + SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD), + SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605), + SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E), + SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3), + SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8), + SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924), + SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2), + SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93), + SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215), + SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA), + SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D), + SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) +}; -/* - Macro here to differentiate between the round implementations for Hawaii and Tonga versus all of the earlier cards; I'm most interested - in making sure it works well for Tahiti and Pitcairn, though. More on why they're different below. -*/ - -#if defined(__Hawaii__) || defined(__Tonga__) - - #define W_ROUND(in, i0, i1, i2, i3, i4, i5, i6, i7) (T0[BYTE(in.s ## i0, 0U)] ^ T1[BYTE(in.s ## i1, 8U)] ^ T2[BYTE(in.s ## i2, 16U)] ^ T3[BYTE(in.s ## i3, 24U)] ^ \ - rotate(T0[BYTE(in.s ## i4, 32U)], 32UL) ^ rotate(T0[BYTE(in.s ## i5, 40U)], 40UL) ^ rotate(T0[BYTE(in.s ## i6, 48U)], 48UL) ^ \ - rotate(T0[BYTE(in.s ## i7, 56U)], 56UL)) - +__constant static const sph_u64 plain_T7[256] __attribute__ ((aligned (128))) = { + SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), + SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), + SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), + SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F), + SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6), + SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5), + SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F), + SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552), + SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC), + SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E), + SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C), + SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435), + SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0), + SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2), + SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B), + SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157), + SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177), + SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5), + SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0), + SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA), + SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9), + SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A), + SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0), + SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85), + SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D), + SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4), + SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E), + SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167), + SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27), + SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B), + SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D), + SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8), + SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE), + SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566), + SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17), + SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E), + SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D), + SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07), + SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A), + SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33), + SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802), + SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971), + SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419), + SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9), + SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3), + SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88), + SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826), + SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0), + SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F), + SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80), + SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD), + SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48), + SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A), + SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F), + SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68), + SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE), + SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54), + SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822), + SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1), + SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812), + SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008), + SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC), + SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1), + SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D), + SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000), + SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B), + SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282), + SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B), + SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF), + SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50), + SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3), + SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF), + SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955), + SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA), + SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA), + SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0), + SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C), + SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D), + SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975), + SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A), + SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6), + SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F), + SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4), + SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296), + SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5), + SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959), + SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572), + SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C), + SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78), + SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C), + SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5), + SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961), + SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421), + SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E), + SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7), + SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004), + SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99), + SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D), + SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF), + SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024), + SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB), + SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411), + SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E), + SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB), + SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81), + SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7), + SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13), + SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3), + SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E), + SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03), + SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44), + SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9), + SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB), + SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153), + SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B), + SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C), + SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74), + SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546), + SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89), + SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1), + SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A), + SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409), + SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6), + SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED), + SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542), + SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4), + SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C), + SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) +}; +#endif +__constant static const sph_u64 rc[10] __attribute__ ((aligned (128))) = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + + +/* ====================================================================== */ + +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) + +#define ROUND_ELT(in, i0, i1, i2, i3, i4, i5, i6, i7) \ + ( plain_T0[BYTE(in[i0], 0)] \ + ^ plain_T1[BYTE(in[i1], 1)] \ + ^ plain_T2[BYTE(in[i2], 2)] \ + ^ plain_T3[BYTE(in[i3], 3)] \ + ^ plain_T4[BYTE(in[i4], 4)] \ + ^ plain_T5[BYTE(in[i5], 5)] \ + ^ plain_T6[BYTE(in[i6], 6)] \ + ^ plain_T7[BYTE(in[i7], 7)]) + +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SWAP4(x) (SPH_ROTL32(as_uint(x) & 0x00FF00FF, 24U)|SPH_ROTL32(as_uint(x) & 0xFF00FF00, 8U)) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); + #define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x)); #else - - #define W_ROUND(in, i0, i1, i2, i3, i4, i5, i6, i7) (T0[BYTE(in.s ## i0, 0U)] ^ T1[BYTE(in.s ## i1, 8U)] ^ rotate(T0[BYTE(in.s ## i2, 16U)], 16UL) ^ rotate(T0[BYTE(in.s ## i3, 24U)], 24UL) ^ \ - rotate(T0[BYTE(in.s ## i4, 32U)], 32UL) ^ rotate(T0[BYTE(in.s ## i5, 40U)], 40UL) ^ rotate(T0[BYTE(in.s ## i6, 48U)], 48UL) ^ \ - rotate(T0[BYTE(in.s ## i7, 56U)], 56UL)) - + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); + #define DEC64LE(x) (*(const __global sph_u64 *) (x)); #endif -/* - The kernel parameters probably look odd, and the reason for that is likely another thing that will make you feel - like you should have thought of it before now - the first execution of Whirlpool is actually constant! It does - not depend on the value of the nonce in any way. So, I precompute it every time there's new work, and pass it to - the kernel. Simple, easy increase - almost makes me wonder if it was an intentional oversight... - - Anyways, since we consumed the first part of the block making the midstate (Whirlpool consumes 64 bytes, or a - ulong8, remember), this leaves us with one ulong, a uint, and our nonce, which is the global ID. So, input is - therefore our input is the first ulong after the eight consumed by the midstate hash operation, then the low - 32 bits of the second. This would be ulongs number 8 and the low half 9, if you had the whole thing in a ulong - array. The nonce (global ID) goes into where the high part of 9 would go, and then the input must be terminated - with a '1' bit. Since it's supposed to be a big-endian '1' bit, I use the little-endian representation, that being - 0x80. After that, the input must be padded with zeros, and the last block terminated by the length of the input that - was processed, as a 64-bit big-endian integer. Note, this includes all previous whole blocks that have been processed; - many hash functions work this way, see "Merkle–Damgard construction" on Google for more information on this type of - hash function construction. Long story short, it enhances security versus just padding to the end of the block with - zeros, or some other constant, and it defines a system for padding to the end of a block (even with an odd number of - bits) so that everyone who hashes the same thing gets the same hash. - - In case you didn't figure it out, the pointer to the block data was replaced by two constant ulongs containing the - values of the block data, indexes 8 and 9, when indexed 64 bits at a time. Those are input0 and input1. -*/ +void whirlpool_round(sph_u64* n, sph_u64* h) +{ + sph_u64 t0, t1, t2, t3, t4, t5, t6, t7; + +#pragma unroll 10 + + for (unsigned r = 0; r < 10; r ++) + { + t0 = (ROUND_ELT(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ rc[r]); + t1 = (ROUND_ELT(h, 1, 0, 7, 6, 5, 4, 3, 2) ^ 0 ); + t2 = (ROUND_ELT(h, 2, 1, 0, 7, 6, 5, 4, 3) ^ 0 ); + t3 = (ROUND_ELT(h, 3, 2, 1, 0, 7, 6, 5, 4) ^ 0 ); + t4 = (ROUND_ELT(h, 4, 3, 2, 1, 0, 7, 6, 5) ^ 0 ); + t5 = (ROUND_ELT(h, 5, 4, 3, 2, 1, 0, 7, 6) ^ 0 ); + t6 = (ROUND_ELT(h, 6, 5, 4, 3, 2, 1, 0, 7) ^ 0 ); + t7 = (ROUND_ELT(h, 7, 6, 5, 4, 3, 2, 1, 0) ^ 0 ); + + h[0] = t0; + h[1] = t1; + h[2] = t2; + h[3] = t3; + h[4] = t4; + h[5] = t5; + h[6] = t6; + h[7] = t7; + + t0 = ROUND_ELT(n, 0, 7, 6, 5, 4, 3, 2, 1) ^ h[0]; + t1 = ROUND_ELT(n, 1, 0, 7, 6, 5, 4, 3, 2) ^ h[1]; + t2 = ROUND_ELT(n, 2, 1, 0, 7, 6, 5, 4, 3) ^ h[2]; + t3 = ROUND_ELT(n, 3, 2, 1, 0, 7, 6, 5, 4) ^ h[3]; + t4 = ROUND_ELT(n, 4, 3, 2, 1, 0, 7, 6, 5) ^ h[4]; + t5 = ROUND_ELT(n, 5, 4, 3, 2, 1, 0, 7, 6) ^ h[5]; + t6 = ROUND_ELT(n, 6, 5, 4, 3, 2, 1, 0, 7) ^ h[6]; + t7 = ROUND_ELT(n, 7, 6, 5, 4, 3, 2, 1, 0) ^ h[7]; + + n[0] = t0; + n[1] = t1; + n[2] = t2; + n[3] = t3; + n[4] = t4; + n[5] = t5; + n[6] = t6; + n[7] = t7; + } +} __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) -__kernel void search(const ulong8 midstate, const ulong input0, const ulong input1, __global uint *output, const ulong target) +__kernel void search(__global unsigned char* block, __global uint* output, const ulong target) { - /* - Note that if you don't specify a type label, the variables automatically go in private memory if they can, - but I'm being explicit here, so there's no question. - - Also, I find ulong8 to be a lot cleaner here, as we often operate with 8 ulongs at a time. Why the hell would - you loop the XOR operations and shit constantly? Besides the fact it's ugly as hell, it's also more complex... - and this compiler is bad even with simple code, sometimes. - */ - - __private uint gid = get_global_id(0); - __private ulong8 n, h = midstate; - __local ulong T0[256], T1[256]; - - /* - Hawaii and Tonga both either have more LDS than their earlier brothers (speaking specifically of Tahiti and Pitcairn based GPUs), - or they allow more waves in flight with more LDS in use. Either way, GPUs based on Hawaii and newer chips, such as Tonga, seem - to benefit from more LDS usage here, as it doesn't seem to hurt the amount of waves they may have in flight at a time. - */ - - - #if defined(__Hawaii__) || defined(__Tonga__) - - __local ulong T2[256], T3[256]; - - #endif - - #if WORKSIZE == 256 - - __private uint lid = get_local_id(0); - - T0[lid] = T0_C[lid]; - T1[lid] = rotate(T0_C[lid], 8UL); - - #if defined(__Hawaii__) || defined(__Tonga__) - - T2[lid] = rotate(T0_C[lid], 16UL); - T3[lid] = rotate(T0_C[lid], 24UL); - - #endif - - #else - - for(uint lid = get_local_id(0); lid < 256; lid += WORKSIZE) - { - T0[lid] = T0_C[lid]; - T1[lid] = rotate(T0_C[lid], 8UL); - - #if defined(__Hawaii__) || defined(__Tonga__) - - T2[lid] = rotate(T0_C[lid], 16UL); - T3[lid] = rotate(T0_C[lid], 24UL); - - #endif - } - - #endif - - mem_fence(CLK_LOCAL_MEM_FENCE); - - n = (ulong8)(input0, (input1 & 0x00000000FFFFFFFF) | ((ulong)gid << 32), 0x0000000000000080, 0, 0, 0, 0, 0x8002000000000000) ^ h; - - /* - - // Just for fun, this loop could also be written like so: - - #pragma unroll 2 - for(int i = 0; i < 20; ++i) - { - ulong8 t; - - t.s0 = W_ROUND(((i & 1) ? n : h), 0, 7, 6, 5, 4, 3, 2, 1) ^ ((i & 1) ? 0 : ROUND_CONSTANTS[i >> 1]); - t.s1 = W_ROUND(((i & 1) ? n : h), 1, 0, 7, 6, 5, 4, 3, 2); - t.s2 = W_ROUND(((i & 1) ? n : h), 2, 1, 0, 7, 6, 5, 4, 3); - t.s3 = W_ROUND(((i & 1) ? n : h), 3, 2, 1, 0, 7, 6, 5, 4); - t.s4 = W_ROUND(((i & 1) ? n : h), 4, 3, 2, 1, 0, 7, 6, 5); - t.s5 = W_ROUND(((i & 1) ? n : h), 5, 4, 3, 2, 1, 0, 7, 6); - t.s6 = W_ROUND(((i & 1) ? n : h), 6, 5, 4, 3, 2, 1, 0, 7); - t.s7 = W_ROUND(((i & 1) ? n : h), 7, 6, 5, 4, 3, 2, 1, 0); - - h = ((i & 1) ? h : t); - n = ((i & 1) ? h ^ t : n); - } - - // On second thought, that might be cleaner looking with if statements... meh. - - */ - - /* - Whirlpool is actually based on a block cipher that is designed much like Rijndael (AES), but is unlikely to be used, - in my opinion, for encryption purposes - due to the rather large size of the state, and as such, has much larger - tables to deal with when trying to make an efficient implementation. Basically, in the Whirlpool specification, - it shows how it is based off of a block cipher they named W, which I've renamed ROUND_ELT here, as I find it more - appropriate. It works VERY much like Rijndael internally, therefore, it can be put into tables quite easily. As for the - key schedule, that differs substantially from Rijndael - each round key is simply an execution of the W round function - on the key. Here, the round keys are calculated each round - that is, they are generated as needed, rather than in a - seperate loop. This is why there are technically two iterations of Whirlpool in the loop below, one to calculate the - key for the round, another to calculate the state, and then they are XOR'd in the AddRoundKey step - The SubBytes, - ShiftColumns, and MixRows steps having been computed using the tables in LDS. Whirlpool, like Rijndael, can be computed - without the use of tables, but for some odd reason, it seems almost no one on the internet has ever done it. Even the - official reference implementations of Whirlpool are devoid of an implmentation that does not rely on precomputed tables. - I have found one that doesn't, and then rewrote it to bitslice the S-box used in SubBytes and greatly simplify the - finite field multiplications used in MixRows, to do Whirlpool with exactly zero table lookups. It's fucking awesome, - but sadly quite slow on GPU. Should be the shit on FPGA, though. It will be located at the following URL when I get - around to cleaning it up and shit: - - https://ottrbutt.com/miner/wpl_bitslice_final.c - - However, you can see the messy, yet fully functional version now at this URL: - - https://ottrbutt.com/miner/wpltest.c - */ - - // This loop is rolled up for a reason, by the way. I know what you're thinking - unrolling helped last time! Go ahead, try it. - - #pragma unroll 1 - for(int i = 0; i < 10; ++i) - { - ulong8 t; - - t.s0 = W_ROUND(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ ROUND_CONSTANTS[i]; - t.s1 = W_ROUND(h, 1, 0, 7, 6, 5, 4, 3, 2); - t.s2 = W_ROUND(h, 2, 1, 0, 7, 6, 5, 4, 3); - t.s3 = W_ROUND(h, 3, 2, 1, 0, 7, 6, 5, 4); - t.s4 = W_ROUND(h, 4, 3, 2, 1, 0, 7, 6, 5); - t.s5 = W_ROUND(h, 5, 4, 3, 2, 1, 0, 7, 6); - t.s6 = W_ROUND(h, 6, 5, 4, 3, 2, 1, 0, 7); - t.s7 = W_ROUND(h, 7, 6, 5, 4, 3, 2, 1, 0); - - h = t; - - t.s0 = W_ROUND(n, 0, 7, 6, 5, 4, 3, 2, 1); - t.s1 = W_ROUND(n, 1, 0, 7, 6, 5, 4, 3, 2); - t.s2 = W_ROUND(n, 2, 1, 0, 7, 6, 5, 4, 3); - t.s3 = W_ROUND(n, 3, 2, 1, 0, 7, 6, 5, 4); - t.s4 = W_ROUND(n, 4, 3, 2, 1, 0, 7, 6, 5); - t.s5 = W_ROUND(n, 5, 4, 3, 2, 1, 0, 7, 6); - t.s6 = W_ROUND(n, 6, 5, 4, 3, 2, 1, 0, 7); - t.s7 = W_ROUND(n, 7, 6, 5, 4, 3, 2, 1, 0); - - n = t ^ h; - } - - /* - The end of Whirlpool would have me XOR the input (in the midstate variable) with the current state (in the n variable), but as - we only need the third ulong to tell if this nonce is a winner, we may as well only XOR what we need to. The compiler will most - likely apply this optimization by itself, but I prefer to ensure the compiler doesn't fuck my code up, at least, as much as I - reasonably can. - - You can not use atomic_inc() here if you like, but it's cleaner to do so, as two shares may be found at the same time, doing - God knows what to the output array. It's unlikely, but possible, so I use atomic_inc() whenever I make miners. - - The original SWAP4 macro was rather stupid - their little rotate trick will be faster on CPU, but GPUs tend to prefer vector - operations, even if they don't have hardware vectors, like AMD's GCN cards (7xxx and up, in case you haven't done your homework.) - Therefore, explicit OpenCL cast to uchar4, reverse bytes, and explicit cast back to uint should be quicker, not that it matters much. - */ - - if((midstate.s3 ^ n.s3 ^ midstate.s5 ^ n.s5) <= target) output[atomic_inc(output+0xFF)] = as_uint(as_uchar4(gid).s3210); + uint gid = get_global_id(0); + + sph_u64 n[8]; + sph_u64 h[8]; + sph_u64 state[8]; + + h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = h[7] = 0; + + n[0] = h[0] ^ DEC64LE(block + 0); + n[1] = h[1] ^ DEC64LE(block + 8); + n[2] = h[2] ^ DEC64LE(block + 16); + n[3] = h[3] ^ DEC64LE(block + 24); + n[4] = h[4] ^ DEC64LE(block + 32); + n[5] = h[5] ^ DEC64LE(block + 40); + n[6] = h[6] ^ DEC64LE(block + 48); + n[7] = h[7] ^ DEC64LE(block + 56); + + whirlpool_round(n, h); + + h[0] = state[0] = n[0] ^ DEC64LE(block + 0); + h[1] = state[1] = n[1] ^ DEC64LE(block + 8); + h[2] = state[2] = n[2] ^ DEC64LE(block + 16); + h[3] = state[3] = n[3] ^ DEC64LE(block + 24); + h[4] = state[4] = n[4] ^ DEC64LE(block + 32); + h[5] = state[5] = n[5] ^ DEC64LE(block + 40); + h[6] = state[6] = n[6] ^ DEC64LE(block + 48); + h[7] = state[7] = n[7] ^ DEC64LE(block + 56); + + n[0] = DEC64LE(block + 64); + n[1] = DEC64LE(block + 72); + n[1] &= 0x00000000FFFFFFFF; + n[1] ^= ((sph_u64) gid) << 32; + n[3] = n[4] = n[5] = n[6] = 0; + n[2] = 0x0000000000000080; + n[7] = 0x8002000000000000; + + n[0] ^= h[0]; + n[1] ^= h[1]; + n[2] ^= h[2]; + n[3] ^= h[3]; + n[4] ^= h[4]; + n[5] ^= h[5]; + n[6] ^= h[6]; + n[7] ^= h[7]; + + whirlpool_round(n, h); + + h[3] = state[3] ^ n[3] ^ state[5] ^ n[5]; + + bool result = (h[3] <= target); + + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); } #endif // WHIRLPOOLX_CL \ No newline at end of file diff --git a/miner.h b/miner.h index ecaa6a13..cafe7513 100644 --- a/miner.h +++ b/miner.h @@ -3,17 +3,6 @@ #include "config.h" -#if defined(USE_GIT_VERSION) && defined(GIT_VERSION) -#undef VERSION -#define VERSION GIT_VERSION -#endif - -#ifdef BUILD_NUMBER -#define CGMINER_VERSION VERSION "-" BUILD_NUMBER -#else -#define CGMINER_VERSION VERSION -#endif - #include "algorithm.h" #include @@ -1045,6 +1034,7 @@ extern bool opt_protocol; extern bool have_longpoll; extern char *opt_kernel_path; extern char *opt_socks_proxy; +extern bool opt_lyra; #if defined(unix) || defined(__APPLE__) extern char *opt_stderr_cmd; @@ -1165,8 +1155,8 @@ extern struct pool *add_pool(void); extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass, char *name, char *desc, char *profile, char *algo); #define MAX_GPUDEVICES 16 -#define MAX_DEVICES 4096 - +//#define MAX_DEVICES 4096 +#define MAX_DEVICES 8192 #define MIN_INTENSITY 4 #define MIN_INTENSITY_STR "4" #define MAX_INTENSITY 31 diff --git a/ocl.c b/ocl.c index cb00790f..c2b0eb95 100644 --- a/ocl.c +++ b/ocl.c @@ -36,8 +36,8 @@ #include "ocl/binary_kernel.h" #include "algorithm/neoscrypt.h" #include "algorithm/pluck.h" -#include "algorithm/yescrypt.h" -#include "algorithm/lyra2re.h" +//#include "algorithm/yescrypt.h" +#include "algorithm/lyra2rev2.h" /* FIXME: only here for global config vars, replace with configuration.h * or similar as soon as config is in a struct instead of littered all @@ -500,6 +500,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); } +#if 0 // Yescrypt TC else if ((cgpu->algorithm.type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) { @@ -584,9 +585,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency)); } +#endif - // Lyra2re v2 TC - else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) { + // Lyra2REv2 TC + else if (cgpu->algorithm.type == ALGO_LYRA2REV2 /*&& !cgpu->opt_tc*/) { size_t glob_thread_count; long max_int; unsigned char type = 0; @@ -784,6 +786,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } +#if 0 else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency; @@ -797,7 +800,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize); // scrypt/n-scrypt } - else if (algorithm->type == ALGO_LYRA2REv2) { +#endif + else if (algorithm->type == ALGO_LYRA2REV2) { /* The scratch/pad-buffer needs 32kBytes memory per thread. */ bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency; buf1size = 4* 8 * cgpu->thread_concurrency; //matrix @@ -835,6 +839,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize); } +#if 0 if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); @@ -855,7 +860,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg return NULL; } } - else if (algorithm->type == ALGO_LYRA2REv2) { + else +#endif + if (algorithm->type == ALGO_LYRA2REV2) { // need additionnal buffers clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status); if (status != CL_SUCCESS && !clState->buffer1) { diff --git a/ocl.h b/ocl.h index 8d6467f1..9a5baa48 100644 --- a/ocl.h +++ b/ocl.h @@ -10,7 +10,8 @@ #include #endif -#include "algorithm.h" +//#include "algorithm.h" +#include "miner.h" typedef struct __clState { cl_context context; diff --git a/ocl/binary_kernel.c b/ocl/binary_kernel.c index 4fd77875..3843b459 100644 --- a/ocl/binary_kernel.c +++ b/ocl/binary_kernel.c @@ -1,7 +1,5 @@ #include "binary_kernel.h" -#include "miner.h" #include -#include cl_program load_opencl_binary_kernel(build_kernel_data *data) { diff --git a/ocl/build_kernel.c b/ocl/build_kernel.c index 2e1b7383..47be753b 100644 --- a/ocl/build_kernel.c +++ b/ocl/build_kernel.c @@ -1,6 +1,4 @@ -#include #include "build_kernel.h" -#include "miner.h" static char *file_contents(const char *filename, int *length) { diff --git a/ocl/build_kernel.h b/ocl/build_kernel.h index 89fb8db8..dad9392a 100644 --- a/ocl/build_kernel.h +++ b/ocl/build_kernel.h @@ -1,6 +1,7 @@ #ifndef BUILD_KERNEL_H #define BUILD_KERNEL_H +#include "ocl.h" #include #include "logging.h" diff --git a/sgminer.c b/sgminer.c index 3c5042c3..cc913682 100644 --- a/sgminer.c +++ b/sgminer.c @@ -68,6 +68,10 @@ char *curly = ":D"; #include #endif +#if defined(USE_GIT_VERSION) && defined(GIT_VERSION) +#undef VERSION +#define VERSION GIT_VERSION +#endif static char packagename[256]; @@ -2148,7 +2152,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) } // Neoscrypt doesn't calc_midstate() - if (pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type != ALGO_NEOSCRYPT) { calc_midstate(work); } local_work++; @@ -2567,7 +2571,7 @@ static void curses_print_status(void) unsigned short int line = 0; wattron(statuswin, A_BOLD); - cg_mvwprintw(statuswin, line, 0, PACKAGE " " CGMINER_VERSION " - Started: %s", datestamp); + cg_mvwprintw(statuswin, line, 0, PACKAGE " " VERSION " - Started: %s", datestamp); curses_print_uptime(&launch_time); wattroff(statuswin, A_BOLD); @@ -5574,7 +5578,7 @@ static void *stratum_sthread(void *userdata) applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name); // Neoscrypt is little endian - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { nonce = htobe32(*((uint32_t *)(work->data + 76))); //*((uint32_t *)nonce2) = htole32(work->nonce2); } @@ -6078,7 +6082,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name); // Different for Neoscrypt because of Little Endian - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { /* Incoming data is in little endian. */ memcpy(merkle_root, merkle_sha, 32); @@ -6140,7 +6144,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) } // For Neoscrypt use set_target_neoscrypt() function - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { set_target_neoscrypt(work->target, work->sdiff, work->thr_id); } else { calc_midstate(work); @@ -6238,7 +6242,7 @@ static void apply_initial_gpu_settings(struct pool *pool) //thread-concurrency // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -6562,7 +6566,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool) if(opt_isset(options, SWITCHER_APPLY_TC)) { // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings - if (!pool->algorithm.type == ALGO_NEOSCRYPT) { + if (pool->algorithm.type == ALGO_NEOSCRYPT) { opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency)); } // otherwise use pool/profile setting or default to default profile setting @@ -8700,7 +8704,7 @@ int main(int argc, char *argv[]) /* We use the getq mutex as the staged lock */ stgd_lock = &getq->mutex; - snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, CGMINER_VERSION); + snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, VERSION); #ifndef WIN32 signal(SIGPIPE, SIG_IGN); @@ -8734,7 +8738,7 @@ int main(int argc, char *argv[]) #endif /* Default algorithm specified in algorithm.c ATM */ - set_algorithm(&default_profile.algorithm, "scrypt"); + set_algorithm(&default_profile.algorithm, "x11"); devcursor = 8; logstart = devcursor + 1; diff --git a/util.c b/util.c index 3f76aa13..8cfcf20d 100644 --- a/util.c +++ b/util.c @@ -1791,7 +1791,7 @@ static bool send_version(struct pool *pool, json_t *val) if (!id) return false; - sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"CGMINER_VERSION"\", \"error\": null}", id); + sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"VERSION"\", \"error\": null}", id); if (!stratum_send(pool, s, strlen(s))) return false; @@ -2480,9 +2480,9 @@ resend: sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++); } else { if (pool->sessionid) - sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\", \"%s\"]}", swork_id++, pool->sessionid); + sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\", \"%s\"]}", swork_id++, pool->sessionid); else - sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\"]}", swork_id++); + sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\"]}", swork_id++); } if (__stratum_send(pool, s, strlen(s)) != SEND_OK) { diff --git a/winbuild/dist/include/config.h b/winbuild/dist/include/config.h index 86931708..95606b29 100644 --- a/winbuild/dist/include/config.h +++ b/winbuild/dist/include/config.h @@ -67,11 +67,11 @@ #endif -#define VERSION "v5.2.0" +#define VERSION "5.2.1" #define PACKAGE_NAME "sgminer" #define PACKAGE_TARNAME "sgminer" -#define PACKAGE_VERSION "5.2.0" -#define PACKAGE_STRING "sgminer 5.2.0" +#define PACKAGE_VERSION "5.2.1" +#define PACKAGE_STRING "sgminer 5.2.1" #define PACKAGE "sgminer" #define SGMINER_PREFIX "" diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index 6ec8a4cd..6e48c725 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -115,16 +115,16 @@ - xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel" +REM xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel" del /f "$(OutDir)*.exe" - del /f "$(OutDir)*.dll" +REM del /f "$(OutDir)*.dll" - echo #define USE_GIT_VERSION 1 > "$(ProjectDir)dist\include\gitversion.h" - FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" >> "$(ProjectDir)dist\include\gitversion.h" +REM echo #define USE_GIT_VERSION 1 > "$(ProjectDir)dist\include\gitversion.h" +REM FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" >> "$(ProjectDir)dist\include\gitversion.h" exit 0 @@ -200,16 +200,16 @@ - xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel" +REM xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel" del /f "$(OutDir)*.exe" - del /f "$(OutDir)*.dll" +REM del /f "$(OutDir)*.dll" - echo #define USE_GIT_VERSION 1 > "$(ProjectDir)dist\include\gitversion.h" - FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" >> "$(ProjectDir)dist\include\gitversion.h" +REM echo #define USE_GIT_VERSION 1 > "$(ProjectDir)dist\include\gitversion.h" +REM FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" >> "$(ProjectDir)dist\include\gitversion.h" exit 0 @@ -263,11 +263,15 @@ + + + + @@ -328,11 +332,16 @@ + + + + + @@ -365,6 +374,7 @@ + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 02c26210..a664fd36 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -218,6 +218,18 @@ Source Files\algorithm + + Source Files\algorithm + + + Source Files\algorithm + + + Source Files\algorithm + + + Source Files\algorithm + @@ -415,6 +427,24 @@ Header Files\algorithm + + Header Files\algorithm + + + Header Files\algorithm + + + Header Files\algorithm + + + Header Files\algorithm + + + Header Files\sph + + + Header Files\algorithm +