Browse Source

Merge pull request #307 from sgminer-dev/v5_0-x15

V5 0 x15
djm34
ystarnaud 11 years ago
parent
commit
f3a773f279
  1. 2
      Makefile.am
  2. 208
      algorithm.c
  3. 2
      algorithm.h
  4. 253
      algorithm/bitblock.c
  5. 10
      algorithm/bitblock.h
  6. 247
      algorithm/x14.c
  7. 10
      algorithm/x14.h
  8. 1462
      kernel/bitblock.cl
  9. 1166
      kernel/bitblockold.cl
  10. 126
      kernel/darkcoin-mod.cl
  11. 16
      kernel/fugue.cl
  12. 13
      kernel/fuguecoin.cl
  13. 23
      kernel/hamsi.cl
  14. 515
      kernel/hamsi_helper_big.cl
  15. 516
      kernel/marucoin-mod.cl
  16. 51
      kernel/marucoin-modold.cl
  17. 350
      kernel/shabal.cl
  18. 1167
      kernel/whirlpool.cl
  19. 1341
      kernel/x14.cl
  20. 1078
      kernel/x14old.cl
  21. 1
      miner.h
  22. 6
      sgminer.c
  23. 2
      sph/Makefile.am
  24. 806
      sph/shabal.c
  25. 344
      sph/sph_shabal.h
  26. 218
      sph/sph_whirlpool.h
  27. 3480
      sph/whirlpool.c
  28. 16
      winbuild/sgminer.vcxproj
  29. 28
      winbuild/sgminer.vcxproj.filters

2
Makefile.am

@ -62,6 +62,8 @@ sgminer_SOURCES += algorithm/twecoin.c algorithm/twecoin.h @@ -62,6 +62,8 @@ sgminer_SOURCES += algorithm/twecoin.c algorithm/twecoin.h
sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h
sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h
sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h
sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h
sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

208
algorithm.c

@ -26,6 +26,8 @@ @@ -26,6 +26,8 @@
#include "algorithm/marucoin.h"
#include "algorithm/maxcoin.h"
#include "algorithm/talkcoin.h"
#include "algorithm/bitblock.h"
#include "algorithm/x14.h"
#include "compat.h"
@ -39,6 +41,8 @@ const char *algorithm_type_str[] = { @@ -39,6 +41,8 @@ const char *algorithm_type_str[] = {
"NScrypt",
"X11",
"X13",
"X14",
"X15",
"Keccak",
"Quarkcoin",
"Twecoin",
@ -90,11 +94,11 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru @@ -90,11 +94,11 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru
static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
char buf[255];
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d",
opt_hamsi_expand_big);
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d -D SPH_HAMSI_SHORT=%d ",
opt_hamsi_expand_big, ((opt_hamsi_short)?1:0));
strcat(data->compiler_options, buf);
sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big);
sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?"hs":""));
strcat(data->binary_filename, buf);
}
@ -197,6 +201,103 @@ static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_b @@ -197,6 +201,103 @@ static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_b
return status;
}
static cl_int queue_bitblock_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// echo - search10
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// hamsi - search11
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// fugue - search12
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// hamsi - search11
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// fugue - search12
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_bitblockold_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// combined echo, hamsi, fugue - shabal - whirlpool - search10
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_marucoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
@ -321,6 +422,100 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b @@ -321,6 +422,100 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b
return status;
}
static cl_int queue_x14_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// echo - search10
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// hamsi - search11
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// fugue - search12
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shabal - search13
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_x14_old_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// combined echo, hamsi, fugue - shabal - search10
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */
algorithm_type_t type; //common algorithm type
@ -379,8 +574,13 @@ static algorithm_settings_t algos[] = { @@ -379,8 +574,13 @@ static algorithm_settings_t algos[] = {
{ "marucoin-mod", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options},
{ "marucoin-modold", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options},
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL},
{ "x14", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_hamsi_compiler_options},
{ "x14old", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_hamsi_compiler_options},
{ "bitblock", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_hamsi_compiler_options},
{ "bitblockold", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_hamsi_compiler_options},
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL},
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b) \
{ a, ALGO_FUGUE, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL}

2
algorithm.h

@ -16,6 +16,8 @@ typedef enum { @@ -16,6 +16,8 @@ typedef enum {
ALGO_NSCRYPT,
ALGO_X11,
ALGO_X13,
ALGO_X14,
ALGO_X15,
ALGO_KECCAK,
ALGO_QUARK,
ALGO_TWE,

253
algorithm/bitblock.c

@ -0,0 +1,253 @@ @@ -0,0 +1,253 @@
/*-
* Copyright 2009 Colin Percival, 2011 ArtForz
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_skein.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
#include "sph/sph_shabal.h"
#include "sph/sph_whirlpool.h"
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
typedef struct {
sph_blake512_context blake1;
sph_bmw512_context bmw1;
sph_groestl512_context groestl1;
sph_skein512_context skein1;
sph_jh512_context jh1;
sph_keccak512_context keccak1;
sph_luffa512_context luffa1;
sph_cubehash512_context cubehash1;
sph_shavite512_context shavite1;
sph_simd512_context simd1;
sph_echo512_context echo1;
sph_hamsi512_context hamsi1;
sph_fugue512_context fugue1;
sph_shabal512_context shabal1;
sph_whirlpool_context whilpool1;
} Xhash_context_holder;
static Xhash_context_holder base_contexts;
void init_Bhash_contexts()
{
sph_blake512_init(&base_contexts.blake1);
sph_bmw512_init(&base_contexts.bmw1);
sph_groestl512_init(&base_contexts.groestl1);
sph_skein512_init(&base_contexts.skein1);
sph_jh512_init(&base_contexts.jh1);
sph_keccak512_init(&base_contexts.keccak1);
sph_luffa512_init(&base_contexts.luffa1);
sph_cubehash512_init(&base_contexts.cubehash1);
sph_shavite512_init(&base_contexts.shavite1);
sph_simd512_init(&base_contexts.simd1);
sph_echo512_init(&base_contexts.echo1);
sph_hamsi512_init(&base_contexts.hamsi1);
sph_fugue512_init(&base_contexts.fugue1);
sph_shabal512_init(&base_contexts.shabal1);
sph_whirlpool_init(&base_contexts.whilpool1);
}
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static inline void
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{
uint32_t i;
for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]);
}
inline void bitblockhash(void *state, const void *input)
{
init_Bhash_contexts();
Xhash_context_holder ctx;
uint32_t hashA[16], hashB[16];
//blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo
memcpy(&ctx, &base_contexts, sizeof(base_contexts));
sph_blake512 (&ctx.blake1, input, 80);
sph_blake512_close (&ctx.blake1, hashA);
sph_bmw512 (&ctx.bmw1, hashA, 64);
sph_bmw512_close(&ctx.bmw1, hashB);
sph_groestl512 (&ctx.groestl1, hashB, 64);
sph_groestl512_close(&ctx.groestl1, hashA);
sph_skein512 (&ctx.skein1, hashA, 64);
sph_skein512_close(&ctx.skein1, hashB);
sph_jh512 (&ctx.jh1, hashB, 64);
sph_jh512_close(&ctx.jh1, hashA);
sph_keccak512 (&ctx.keccak1, hashA, 64);
sph_keccak512_close(&ctx.keccak1, hashB);
sph_luffa512 (&ctx.luffa1, hashB, 64);
sph_luffa512_close (&ctx.luffa1, hashA);
sph_cubehash512 (&ctx.cubehash1, hashA, 64);
sph_cubehash512_close(&ctx.cubehash1, hashB);
sph_shavite512 (&ctx.shavite1, hashB, 64);
sph_shavite512_close(&ctx.shavite1, hashA);
sph_simd512 (&ctx.simd1, hashA, 64);
sph_simd512_close(&ctx.simd1, hashB);
sph_echo512 (&ctx.echo1, hashB, 64);
sph_echo512_close(&ctx.echo1, hashA);
sph_hamsi512 (&ctx.hamsi1, hashA, 64);
sph_hamsi512_close(&ctx.hamsi1, hashB);
sph_fugue512 (&ctx.fugue1, hashB, 64);
sph_fugue512_close(&ctx.fugue1, hashA);
sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64);
sph_shabal512_close(&ctx.shabal1, hashB);
sph_whirlpool(&ctx.whilpool1, hashB, 64);
sph_whirlpool_close(&ctx.whilpool1, hashA);
memcpy(state, hashA, 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int bitblock_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
bitblockhash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void bitblock_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
bitblockhash(ohash, data);
}
static inline void be32enc(void *pp, uint32_t x)
{
uint8_t *p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
bool scanhash_bitblock(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1) {
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
bitblockhash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx",
(long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg)) {
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart)) {
*last_nonce = n;
break;
}
}
return ret;
}

10
algorithm/bitblock.h

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
#ifndef BITBLOCK_H
#define BITBLOCK_H
#include "miner.h"
extern int bitblock_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void bitblock_regenhash(struct work *work);
#endif /* BITBLOCK_H */

247
algorithm/x14.c

@ -0,0 +1,247 @@ @@ -0,0 +1,247 @@
/*-
* Copyright 2009 Colin Percival, 2011 ArtForz
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_skein.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
#include "sph/sph_shabal.h"
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
typedef struct {
sph_blake512_context blake1;
sph_bmw512_context bmw1;
sph_groestl512_context groestl1;
sph_skein512_context skein1;
sph_jh512_context jh1;
sph_keccak512_context keccak1;
sph_luffa512_context luffa1;
sph_cubehash512_context cubehash1;
sph_shavite512_context shavite1;
sph_simd512_context simd1;
sph_echo512_context echo1;
sph_hamsi512_context hamsi1;
sph_fugue512_context fugue1;
sph_shabal512_context shabal1;
} Xhash_context_holder;
static Xhash_context_holder base_contexts;
void init_X14hash_contexts()
{
sph_blake512_init(&base_contexts.blake1);
sph_bmw512_init(&base_contexts.bmw1);
sph_groestl512_init(&base_contexts.groestl1);
sph_skein512_init(&base_contexts.skein1);
sph_jh512_init(&base_contexts.jh1);
sph_keccak512_init(&base_contexts.keccak1);
sph_luffa512_init(&base_contexts.luffa1);
sph_cubehash512_init(&base_contexts.cubehash1);
sph_shavite512_init(&base_contexts.shavite1);
sph_simd512_init(&base_contexts.simd1);
sph_echo512_init(&base_contexts.echo1);
sph_hamsi512_init(&base_contexts.hamsi1);
sph_fugue512_init(&base_contexts.fugue1);
sph_shabal512_init(&base_contexts.shabal1);
}
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{
uint32_t i;
for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]);
}
inline void x14hash(void *state, const void *input)
{
init_X14hash_contexts();
Xhash_context_holder ctx;
uint32_t hashA[16], hashB[16];
//blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo
memcpy(&ctx, &base_contexts, sizeof(base_contexts));
sph_blake512 (&ctx.blake1, input, 80);
sph_blake512_close (&ctx.blake1, hashA);
sph_bmw512 (&ctx.bmw1, hashA, 64);
sph_bmw512_close(&ctx.bmw1, hashB);
sph_groestl512 (&ctx.groestl1, hashB, 64);
sph_groestl512_close(&ctx.groestl1, hashA);
sph_skein512 (&ctx.skein1, hashA, 64);
sph_skein512_close(&ctx.skein1, hashB);
sph_jh512 (&ctx.jh1, hashB, 64);
sph_jh512_close(&ctx.jh1, hashA);
sph_keccak512 (&ctx.keccak1, hashA, 64);
sph_keccak512_close(&ctx.keccak1, hashB);
sph_luffa512 (&ctx.luffa1, hashB, 64);
sph_luffa512_close (&ctx.luffa1, hashA);
sph_cubehash512 (&ctx.cubehash1, hashA, 64);
sph_cubehash512_close(&ctx.cubehash1, hashB);
sph_shavite512 (&ctx.shavite1, hashB, 64);
sph_shavite512_close(&ctx.shavite1, hashA);
sph_simd512 (&ctx.simd1, hashA, 64);
sph_simd512_close(&ctx.simd1, hashB);
sph_echo512 (&ctx.echo1, hashB, 64);
sph_echo512_close(&ctx.echo1, hashA);
sph_hamsi512 (&ctx.hamsi1, hashA, 64);
sph_hamsi512_close(&ctx.hamsi1, hashB);
sph_fugue512 (&ctx.fugue1, hashB, 64);
sph_fugue512_close(&ctx.fugue1, hashA);
sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64);
sph_shabal512_close(&ctx.shabal1, hashB);
memcpy(state, hashB, 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int x14_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
x14hash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void x14_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
x14hash(ohash, data);
}
static inline void be32enc(void *pp, uint32_t x)
{
uint8_t *p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
bool scanhash_x14(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1)
{
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
x14hash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
if(unlikely(tmp_hash7 <= Htarg))
{
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart))
{
*last_nonce = n;
break;
}
}
return ret;
}

10
algorithm/x14.h

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
#ifndef X14_H
#define X14_H
#include "miner.h"
extern int x14_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void x14_regenhash(struct work *work);
#endif /* X14_H */

1462
kernel/bitblock.cl

File diff suppressed because it is too large Load Diff

1166
kernel/bitblockold.cl

File diff suppressed because it is too large Load Diff

126
kernel/darkcoin-mod.cl

@ -95,8 +95,6 @@ @@ -95,8 +95,6 @@
#include "shavite.cl"
#include "simd.cl"
#include "echo.cl"
#include "hamsi.cl"
#include "fugue.cl"
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
@ -181,7 +179,7 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) @@ -181,7 +179,7 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global hash_t* hashes)
{
uint gid = get_global_id(0);
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// bmw
@ -456,93 +454,69 @@ __kernel void search2(__global hash_t* hashes) @@ -456,93 +454,69 @@ __kernel void search2(__global hash_t* hashes)
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
#if !SPH_SMALL_FOOTPRINT_GROESTL
__local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256];
__local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256];
#else
__local sph_u64 T0_C[256], T4_C[256];
#endif
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
T0_C[i] = T0[i];
T4_C[i] = T4[i];
#if !SPH_SMALL_FOOTPRINT_GROESTL
T1_C[i] = T1[i];
T2_C[i] = T2[i];
T3_C[i] = T3[i];
T5_C[i] = T5[i];
T6_C[i] = T6[i];
T7_C[i] = T7[i];
#endif
T0_L[i] = T0[i];
T4_L[i] = T4[i];
T1_L[i] = T1[i];
T2_L[i] = T2[i];
T3_L[i] = T3[i];
T5_L[i] = T5[i];
T6_L[i] = T6[i];
T7_L[i] = T7[i];
}
barrier(CLK_LOCAL_MEM_FENCE); // groestl
#define T0 T0_C
#define T1 T1_C
#define T2 T2_C
#define T3 T3_C
#define T4 T4_C
#define T5 T5_C
#define T6 T6_C
#define T7 T7_C
sph_u64 H[16];
for (unsigned int u = 0; u < 15; u ++)
H[u] = 0;
#if USE_LE
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
#else
H[15] = (sph_u64)512;
#endif
barrier(CLK_LOCAL_MEM_FENCE);
#define T0 T0_L
#define T1 T1_L
#define T2 T2_L
#define T3 T3_L
#define T4 T4_L
#define T5 T5_L
#define T6 T6_L
#define T7 T7_L
// groestl
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000};
sph_u64 g[16], m[16];
m[0] = DEC64E(hash->h8[0]);
m[1] = DEC64E(hash->h8[1]);
m[2] = DEC64E(hash->h8[2]);
m[3] = DEC64E(hash->h8[3]);
m[4] = DEC64E(hash->h8[4]);
m[5] = DEC64E(hash->h8[5]);
m[6] = DEC64E(hash->h8[6]);
m[7] = DEC64E(hash->h8[7]);
for (unsigned int u = 0; u < 16; u ++)
g[u] = m[u] ^ H[u];
m[8] = 0x80; g[8] = m[8] ^ H[8];
m[9] = 0; g[9] = m[9] ^ H[9];
m[10] = 0; g[10] = m[10] ^ H[10];
m[11] = 0; g[11] = m[11] ^ H[11];
m[12] = 0; g[12] = m[12] ^ H[12];
m[13] = 0; g[13] = m[13] ^ H[13];
m[14] = 0; g[14] = m[14] ^ H[14];
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
g[0] = m[0] = DEC64E(hash->h8[0]);
g[1] = m[1] = DEC64E(hash->h8[1]);
g[2] = m[2] = DEC64E(hash->h8[2]);
g[3] = m[3] = DEC64E(hash->h8[3]);
g[4] = m[4] = DEC64E(hash->h8[4]);
g[5] = m[5] = DEC64E(hash->h8[5]);
g[6] = m[6] = DEC64E(hash->h8[6]);
g[7] = m[7] = DEC64E(hash->h8[7]);
g[8] = m[8] = 0x80;
g[9] = m[9] = 0;
g[10] = m[10] = 0;
g[11] = m[11] = 0;
g[12] = m[12] = 0;
g[13] = m[13] = 0;
g[14] = m[14] = 0;
g[15] = 0x102000000000000;
m[15] = 0x100000000000000;
PERM_BIG_P(g);
PERM_BIG_Q(m);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= g[u] ^ m[u];
sph_u64 xH[16];
for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u];
xH[u] = H[u] ^= g[u] ^ m[u];
PERM_BIG_P(xH);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= xH[u];
for (unsigned int u = 0; u < 8; u ++)
hash->h8[u] = DEC64E(H[u + 8]);
barrier(CLK_GLOBAL_MEM_FENCE);
for (unsigned int u = 8; u < 16; u ++)
hash->h8[u-8] = DEC64E(H[u] ^ xH[u]);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -863,7 +837,7 @@ __kernel void search8(__global hash_t* hashes) @@ -863,7 +837,7 @@ __kernel void search8(__global hash_t* hashes)
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
rk00 = hash->h4[0];
rk01 = hash->h4[1];

16
kernel/fugue.cl

@ -30,26 +30,26 @@ @@ -30,26 +30,26 @@
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
__constant static const sph_u32 IV224[] = {
__constant const sph_u32 IV224[] = {
SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
SPH_C32(0xbd8d679a)
};
__constant static const sph_u32 IV256[] = {
__constant const sph_u32 IV256[] = {
SPH_C32(0xe952bdde), SPH_C32(0x6671135f), SPH_C32(0xe0d4f668),
SPH_C32(0xd2b0b594), SPH_C32(0xf96c621d), SPH_C32(0xfbf929de),
SPH_C32(0x9149e899), SPH_C32(0x34f8c248)
};
__constant static const sph_u32 IV384[] = {
__constant const sph_u32 IV384[] = {
SPH_C32(0xaa61ec0d), SPH_C32(0x31252e1f), SPH_C32(0xa01db4c7),
SPH_C32(0x00600985), SPH_C32(0x215ef44a), SPH_C32(0x741b5e9c),
SPH_C32(0xfa693e9a), SPH_C32(0x473eb040), SPH_C32(0xe502ae8a),
SPH_C32(0xa99c25e0), SPH_C32(0xbc95517c), SPH_C32(0x5c1095a1)
};
__constant static const sph_u32 IV512[] = {
__constant const sph_u32 IV512[] = {
SPH_C32(0x8807a57e), SPH_C32(0xe616af75), SPH_C32(0xc5d3e4db),
SPH_C32(0xac9ab027), SPH_C32(0xd915f117), SPH_C32(0xb6eecc54),
SPH_C32(0x06e8020b), SPH_C32(0x4a92efd1), SPH_C32(0xaac6e2c9),
@ -58,7 +58,7 @@ __constant static const sph_u32 IV512[] = { @@ -58,7 +58,7 @@ __constant static const sph_u32 IV512[] = {
SPH_C32(0xe13e3567)
};
__constant static const sph_u32 mixtab0[] = {
__constant const sph_u32 mixtab0_c[] = {
SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7),
SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0),
@ -147,7 +147,7 @@ __constant static const sph_u32 mixtab0[] = { @@ -147,7 +147,7 @@ __constant static const sph_u32 mixtab0[] = {
SPH_C32(0x16166258)
};
__constant static const sph_u32 mixtab1[] = {
__constant const sph_u32 mixtab1_c[] = {
SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e),
SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a),
SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090),
@ -236,7 +236,7 @@ __constant static const sph_u32 mixtab1[] = { @@ -236,7 +236,7 @@ __constant static const sph_u32 mixtab1[] = {
SPH_C32(0x58161662)
};
__constant static const sph_u32 mixtab2[] = {
__constant const sph_u32 mixtab2_c[] = {
SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777),
SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b),
SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030),
@ -325,7 +325,7 @@ __constant static const sph_u32 mixtab2[] = { @@ -325,7 +325,7 @@ __constant static const sph_u32 mixtab2[] = {
SPH_C32(0x62581616)
};
__constant static const sph_u32 mixtab3[] = {
__constant const sph_u32 mixtab3_c[] = {
SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777),
SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b),
SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030),

13
kernel/fuguecoin.cl

@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp @@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp
{
uint gid = get_global_id(0);
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];

23
kernel/hamsi.cl

@ -88,22 +88,31 @@ @@ -88,22 +88,31 @@
*/
#if !defined SPH_HAMSI_EXPAND_SMALL
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#endif
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 8
#define SPH_HAMSI_EXPAND_BIG 8
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
#include "hamsi_helper.cl"
//temp fix for shortened implementation of X15
#ifdef SPH_HAMSI_SHORT
#if SPH_HAMSI_SHORT == 1 && SPH_HAMSI_EXPAND_BIG == 1
#include "hamsi_helper_big.cl"
#else
#include "hamsi_helper.cl"
#endif
#else
#include "hamsi_helper.cl"
#endif
__constant static const sph_u32 HAMSI_IV224[] = {
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),

515
kernel/hamsi_helper_big.cl

@ -0,0 +1,515 @@ @@ -0,0 +1,515 @@
/* $Id: hamsi_helper.c 202 2010-05-31 15:46:48Z tp $ */
/*
* Helper code for Hamsi (input block expansion). This code is
* automatically generated and includes precomputed tables for
* expansion code which handles 2 to 8 bits at a time.
*
* This file is included from hamsi.c, and is not meant to be compiled
* independently.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#if SPH_HAMSI_EXPAND_BIG == 1
/* Note: this table lists bits within each byte from least
siginificant to most significant. */
__constant const sph_u32 T512[64][16] = {
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
SPH_C32(0x9e69af68) },
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
SPH_C32(0x0c26f262) },
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
SPH_C32(0xdc24e61f) },
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
SPH_C32(0x3daac2da) },
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
SPH_C32(0x78cace29) },
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
SPH_C32(0x2dd1f9ab) },
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
SPH_C32(0xbf2c0be2) },
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
SPH_C32(0x32219526) },
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
SPH_C32(0xac8e6c88) },
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
SPH_C32(0x7b1bd6b9) },
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
SPH_C32(0xf746c320) },
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
SPH_C32(0x69505b3a) },
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
SPH_C32(0x8a341574) },
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
SPH_C32(0x450360bf) },
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
SPH_C32(0xf3d45758) },
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
SPH_C32(0x925c44e9) },
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
SPH_C32(0xa123ff9f) },
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
SPH_C32(0x1568ff0f) },
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
SPH_C32(0xc5c1eb3e) },
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
SPH_C32(0x1af21fe1) },
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
SPH_C32(0x857f3c2b) },
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
SPH_C32(0x2ba05a55) },
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
SPH_C32(0xfeabf254) },
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
SPH_C32(0xfe1cdc7f) },
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
SPH_C32(0xb0a51834) },
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
SPH_C32(0xa6b8c28d) },
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
SPH_C32(0x3a4e99d7) },
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
SPH_C32(0xe1844257) },
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
SPH_C32(0x2c3b504e) },
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
SPH_C32(0x524a0d59) },
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
SPH_C32(0x378dd173) },
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
SPH_C32(0x8b6c72bd) },
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
SPH_C32(0x8e67b7fa) },
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
SPH_C32(0x443d3004) },
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
SPH_C32(0xf4f6ea7b) },
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
SPH_C32(0x979961d0) },
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
SPH_C32(0x98aa496e) },
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
SPH_C32(0x094e3198) },
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
SPH_C32(0xe86cba2e) },
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
SPH_C32(0x4b7eec55) },
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
SPH_C32(0x1e7536a6) },
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
SPH_C32(0x24314f17) },
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
SPH_C32(0x9075b1ce) },
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
SPH_C32(0x9b6ef888) },
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
SPH_C32(0xd8b61463) },
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
SPH_C32(0x3ea660f7) },
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
SPH_C32(0x7f975691) },
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
SPH_C32(0x2c94459e) },
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
SPH_C32(0x56a7b19f) },
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
SPH_C32(0x81fdf908) },
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
SPH_C32(0x5bd61539) },
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
SPH_C32(0x15b961e7) },
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
SPH_C32(0x2a2c18f0) },
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
SPH_C32(0x551e3d6e) },
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
SPH_C32(0x33c5244f) },
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
SPH_C32(0x8a58e6a4) },
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
SPH_C32(0xda878000) },
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
SPH_C32(0x3c5dfffe) },
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
SPH_C32(0x7b1675d7) },
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
SPH_C32(0x2879ebac) },
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
SPH_C32(0xbe0a679e) },
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
SPH_C32(0x30aebcf7) },
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
SPH_C32(0xc7ff60f0) },
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
SPH_C32(0xe7e00a94) }
};
#define INPUT_BIG do { \
__constant const sph_u32 *tp = &T512[0][0]; \
unsigned u, v; \
m0 = 0; \
m1 = 0; \
m2 = 0; \
m3 = 0; \
m4 = 0; \
m5 = 0; \
m6 = 0; \
m7 = 0; \
m8 = 0; \
m9 = 0; \
mA = 0; \
mB = 0; \
mC = 0; \
mD = 0; \
mE = 0; \
mF = 0; \
for (u = 0; u < 8; u ++) { \
unsigned db = buf(u); \
for (v = 0; v < 8; v ++, db >>= 1) { \
sph_u32 dm = SPH_T32(-(sph_u32)(db & 1)); \
m0 ^= dm & *tp ++; \
m1 ^= dm & *tp ++; \
m2 ^= dm & *tp ++; \
m3 ^= dm & *tp ++; \
m4 ^= dm & *tp ++; \
m5 ^= dm & *tp ++; \
m6 ^= dm & *tp ++; \
m7 ^= dm & *tp ++; \
m8 ^= dm & *tp ++; \
m9 ^= dm & *tp ++; \
mA ^= dm & *tp ++; \
mB ^= dm & *tp ++; \
mC ^= dm & *tp ++; \
mD ^= dm & *tp ++; \
mE ^= dm & *tp ++; \
mF ^= dm & *tp ++; \
} \
} \
} while (0)
#define INPUT_BIG_LOCAL do { \
__local sph_u32 *tp = &(T512_L[0]); \
unsigned u, v; \
m0 = 0; \
m1 = 0; \
m2 = 0; \
m3 = 0; \
m4 = 0; \
m5 = 0; \
m6 = 0; \
m7 = 0; \
m8 = 0; \
m9 = 0; \
mA = 0; \
mB = 0; \
mC = 0; \
mD = 0; \
mE = 0; \
mF = 0; \
for (u = 0; u < 8; u ++) { \
unsigned db = buf(u); \
for (v = 0; v < 8; v ++, db >>= 1) { \
sph_u32 dm = SPH_T32(-(sph_u32)(db & 1)); \
m0 ^= dm & *tp ++; \
m1 ^= dm & *tp ++; \
m2 ^= dm & *tp ++; \
m3 ^= dm & *tp ++; \
m4 ^= dm & *tp ++; \
m5 ^= dm & *tp ++; \
m6 ^= dm & *tp ++; \
m7 ^= dm & *tp ++; \
m8 ^= dm & *tp ++; \
m9 ^= dm & *tp ++; \
mA ^= dm & *tp ++; \
mB ^= dm & *tp ++; \
mC ^= dm & *tp ++; \
mD ^= dm & *tp ++; \
mE ^= dm & *tp ++; \
mF ^= dm & *tp ++; \
} \
} \
} while (0)
#endif

516
kernel/marucoin-mod.cl

@ -33,6 +33,8 @@ @@ -33,6 +33,8 @@
#ifndef X13MOD_CL
#define X13MOD_CL
#define DEBUG(x)
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
#else
@ -54,15 +56,15 @@ typedef int sph_s32; @@ -54,15 +56,15 @@ typedef int sph_s32;
#define SPH_64 1
#define SPH_64_TRUE 1
#define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) ((as_uint(x)) & SPH_C32(0xFFFFFFFF))
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) (as_uint(x))
#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) ((as_ulong(x)) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
#define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) (as_ulong(x))
#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL)
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
#define SPH_ECHO_64 1
#define SPH_KECCAK_64 1
@ -74,9 +76,9 @@ typedef int sph_s32; @@ -74,9 +76,9 @@ typedef int sph_s32;
#define SPH_SMALL_FOOTPRINT_GROESTL 0
#define SPH_GROESTL_BIG_ENDIAN 0
#define SPH_CUBEHASH_UNROLL 0
#define SPH_KECCAK_UNROLL 0
#define SPH_KECCAK_UNROLL 1
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 4
#define SPH_HAMSI_EXPAND_BIG 1
#endif
#include "blake.cl"
@ -121,10 +123,10 @@ typedef union { @@ -121,10 +123,10 @@ typedef union {
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global unsigned char* block, __global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// blake
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// blake
sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B);
sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1);
sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F);
@ -134,11 +136,12 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) @@ -134,11 +136,12 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
if ((T0 = SPH_T64(T0 + 1024)) < 1024)
T1 = SPH_T64(T1 + 1);
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7;
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF;
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7;
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF;
M0 = DEC64BE(block + 0);
M1 = DEC64BE(block + 8);
M2 = DEC64BE(block + 16);
@ -169,25 +172,25 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) @@ -169,25 +172,25 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
hash->h8[6] = H6;
hash->h8[7] = H7;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global hash_t* hashes)
{
uint gid = get_global_id(0);
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// bmw
sph_u64 BMW_H[16];
#pragma unroll 16
#pragma unroll 16
for(unsigned u = 0; u < 16; u++)
BMW_H[u] = BMW_IV512[u];
sph_u64 mv[16],q[32];
sph_u64 tmp;
sph_u64 tmp;
mv[0] = SWAP8(hash->h8[0]);
mv[1] = SWAP8(hash->h8[1]);
mv[2] = SWAP8(hash->h8[2]);
@ -206,7 +209,7 @@ __kernel void search1(__global hash_t* hashes) @@ -206,7 +209,7 @@ __kernel void search1(__global hash_t* hashes)
mv[15] = SPH_C64(512);
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]);
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]);
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2];
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
@ -216,7 +219,7 @@ __kernel void search1(__global hash_t* hashes) @@ -216,7 +219,7 @@ __kernel void search1(__global hash_t* hashes)
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]);
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5];
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]);
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7];
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]);
@ -226,7 +229,7 @@ __kernel void search1(__global hash_t* hashes) @@ -226,7 +229,7 @@ __kernel void search1(__global hash_t* hashes)
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]);
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10];
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]);
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]);
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12];
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]);
@ -237,63 +240,63 @@ __kernel void search1(__global hash_t* hashes) @@ -237,63 +240,63 @@ __kernel void search1(__global hash_t* hashes)
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15];
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]);
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0];
#pragma unroll 2
#pragma unroll 2
for(int i=0;i<2;i++)
{
q[i+16] =
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
#pragma unroll 4
for(int i=2;i<6;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 3
#pragma unroll 3
for(int i=6;i<9;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
#pragma unroll 4
for(int i=9;i<13;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
#pragma unroll 3
#pragma unroll 3
for(int i=13;i<16;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23];
sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
@ -315,7 +318,7 @@ __kernel void search1(__global hash_t* hashes) @@ -315,7 +318,7 @@ __kernel void search1(__global hash_t* hashes)
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]);
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]);
#pragma unroll 16
#pragma unroll 16
for(int i=0;i<16;i++)
{
mv[i] = BMW_H[i];
@ -323,7 +326,7 @@ __kernel void search1(__global hash_t* hashes) @@ -323,7 +326,7 @@ __kernel void search1(__global hash_t* hashes)
}
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]);
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]);
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2];
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
@ -333,7 +336,7 @@ __kernel void search1(__global hash_t* hashes) @@ -333,7 +336,7 @@ __kernel void search1(__global hash_t* hashes)
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]);
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5];
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]);
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7];
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]);
@ -343,7 +346,7 @@ __kernel void search1(__global hash_t* hashes) @@ -343,7 +346,7 @@ __kernel void search1(__global hash_t* hashes)
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]);
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10];
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]);
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]);
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12];
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]);
@ -354,65 +357,66 @@ __kernel void search1(__global hash_t* hashes) @@ -354,65 +357,66 @@ __kernel void search1(__global hash_t* hashes)
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15];
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]);
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0];
#pragma unroll 2
#pragma unroll 2
for(int i=0;i<2;i++)
{
q[i+16] =
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
#pragma unroll 4
for(int i=2;i<6;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 3
#pragma unroll 3
for(int i=6;i<9;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
#pragma unroll 4
for(int i=9;i<13;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
#pragma unroll 3
#pragma unroll 3
for(int i=13;i<16;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23];
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]);
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]);
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]);
@ -439,8 +443,8 @@ __kernel void search1(__global hash_t* hashes) @@ -439,8 +443,8 @@ __kernel void search1(__global hash_t* hashes)
hash->h8[5] = SWAP8(BMW_H[13]);
hash->h8[6] = SWAP8(BMW_H[14]);
hash->h8[7] = SWAP8(BMW_H[15]);
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -449,94 +453,67 @@ __kernel void search2(__global hash_t* hashes) @@ -449,94 +453,67 @@ __kernel void search2(__global hash_t* hashes)
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
#if !SPH_SMALL_FOOTPRINT_GROESTL
__local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256];
__local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256];
#else
__local sph_u64 T0_C[256], T4_C[256];
#endif
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
T0_C[i] = T0[i];
T4_C[i] = T4[i];
#if !SPH_SMALL_FOOTPRINT_GROESTL
T1_C[i] = T1[i];
T2_C[i] = T2[i];
T3_C[i] = T3[i];
T5_C[i] = T5[i];
T6_C[i] = T6[i];
T7_C[i] = T7[i];
#endif
T0_L[i] = T0[i];
T4_L[i] = T4[i];
T1_L[i] = T1[i];
T2_L[i] = T2[i];
T3_L[i] = T3[i];
T5_L[i] = T5[i];
T6_L[i] = T6[i];
T7_L[i] = T7[i];
}
barrier(CLK_LOCAL_MEM_FENCE); // groestl
#define T0 T0_C
#define T1 T1_C
#define T2 T2_C
#define T3 T3_C
#define T4 T4_C
#define T5 T5_C
#define T6 T6_C
#define T7 T7_C
barrier(CLK_LOCAL_MEM_FENCE);
#define T0 T0_L
#define T1 T1_L
#define T2 T2_L
#define T3 T3_L
#define T4 T4_L
#define T5 T5_L
#define T6 T6_L
#define T7 T7_L
// groestl
sph_u64 H[16];
for (unsigned int u = 0; u < 15; u ++)
H[u] = 0;
#if USE_LE
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
#else
H[15] = (sph_u64)512;
#endif
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000};
sph_u64 g[16], m[16];
m[0] = DEC64E(hash->h8[0]);
m[1] = DEC64E(hash->h8[1]);
m[2] = DEC64E(hash->h8[2]);
m[3] = DEC64E(hash->h8[3]);
m[4] = DEC64E(hash->h8[4]);
m[5] = DEC64E(hash->h8[5]);
m[6] = DEC64E(hash->h8[6]);
m[7] = DEC64E(hash->h8[7]);
for (unsigned int u = 0; u < 16; u ++)
g[u] = m[u] ^ H[u];
m[8] = 0x80; g[8] = m[8] ^ H[8];
m[9] = 0; g[9] = m[9] ^ H[9];
m[10] = 0; g[10] = m[10] ^ H[10];
m[11] = 0; g[11] = m[11] ^ H[11];
m[12] = 0; g[12] = m[12] ^ H[12];
m[13] = 0; g[13] = m[13] ^ H[13];
m[14] = 0; g[14] = m[14] ^ H[14];
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
g[0] = m[0] = DEC64E(hash->h8[0]);
g[1] = m[1] = DEC64E(hash->h8[1]);
g[2] = m[2] = DEC64E(hash->h8[2]);
g[3] = m[3] = DEC64E(hash->h8[3]);
g[4] = m[4] = DEC64E(hash->h8[4]);
g[5] = m[5] = DEC64E(hash->h8[5]);
g[6] = m[6] = DEC64E(hash->h8[6]);
g[7] = m[7] = DEC64E(hash->h8[7]);
g[8] = m[8] = 0x80;
g[9] = m[9] = 0;
g[10] = m[10] = 0;
g[11] = m[11] = 0;
g[12] = m[12] = 0;
g[13] = m[13] = 0;
g[14] = m[14] = 0;
g[15] = 0x102000000000000;
m[15] = 0x100000000000000;
PERM_BIG_P(g);
PERM_BIG_Q(m);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= g[u] ^ m[u];
sph_u64 xH[16];
for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u];
xH[u] = H[u] ^= g[u] ^ m[u];
PERM_BIG_P(xH);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= xH[u];
for (unsigned int u = 0; u < 8; u ++)
hash->h8[u] = DEC64E(H[u + 8]);
for (unsigned int u = 8; u < 16; u ++)
hash->h8[u-8] = DEC64E(H[u] ^ xH[u]);
barrier(CLK_GLOBAL_MEM_FENCE);
}
@ -561,10 +538,14 @@ __kernel void search3(__global hash_t* hashes) @@ -561,10 +538,14 @@ __kernel void search3(__global hash_t* hashes)
m5 = SWAP8(hash->h8[5]);
m6 = SWAP8(hash->h8[6]);
m7 = SWAP8(hash->h8[7]);
UBI_BIG(480, 64);
bcount = 0;
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0;
UBI_BIG(510, 8);
hash->h8[0] = SWAP8(h0);
hash->h8[1] = SWAP8(h1);
hash->h8[2] = SWAP8(h2);
@ -574,7 +555,7 @@ __kernel void search3(__global hash_t* hashes) @@ -574,7 +555,7 @@ __kernel void search3(__global hash_t* hashes)
hash->h8[6] = SWAP8(h6);
hash->h8[7] = SWAP8(h7);
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -583,7 +564,7 @@ __kernel void search4(__global hash_t* hashes) @@ -583,7 +564,7 @@ __kernel void search4(__global hash_t* hashes)
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// jh
// jh
sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7);
sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b);
@ -612,7 +593,7 @@ __kernel void search4(__global hash_t* hashes) @@ -612,7 +593,7 @@ __kernel void search4(__global hash_t* hashes)
h6l ^= DEC64E(hash->h8[5]);
h7h ^= DEC64E(hash->h8[6]);
h7l ^= DEC64E(hash->h8[7]);
h0h ^= 0x80;
h3l ^= 0x2000000000000;
}
@ -630,7 +611,7 @@ __kernel void search4(__global hash_t* hashes) @@ -630,7 +611,7 @@ __kernel void search4(__global hash_t* hashes)
hash->h8[6] = DEC64E(h7h);
hash->h8[7] = DEC64E(h7l);
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -642,7 +623,7 @@ __kernel void search5(__global hash_t* hashes) @@ -642,7 +623,7 @@ __kernel void search5(__global hash_t* hashes)
// keccak
sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0;
sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0;
sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0;
sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0;
sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0;
sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0;
@ -664,6 +645,7 @@ __kernel void search5(__global hash_t* hashes) @@ -664,6 +645,7 @@ __kernel void search5(__global hash_t* hashes)
a21 ^= SWAP8(hash->h8[7]);
a31 ^= 0x8000000000000001;
KECCAK_F_1600;
// Finalize the "lane complement"
a10 = ~a10;
a20 = ~a20;
@ -677,7 +659,7 @@ __kernel void search5(__global hash_t* hashes) @@ -677,7 +659,7 @@ __kernel void search5(__global hash_t* hashes)
hash->h8[6] = SWAP8(a11);
hash->h8[7] = SWAP8(a21);
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -740,7 +722,7 @@ __kernel void search6(__global hash_t* hashes) @@ -740,7 +722,7 @@ __kernel void search6(__global hash_t* hashes)
hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47;
}
}
hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40;
hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41;
hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42;
@ -750,7 +732,7 @@ __kernel void search6(__global hash_t* hashes) @@ -750,7 +732,7 @@ __kernel void search6(__global hash_t* hashes)
hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46;
hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -794,7 +776,7 @@ __kernel void search7(__global hash_t* hashes) @@ -794,7 +776,7 @@ __kernel void search7(__global hash_t* hashes)
x6 ^= SWAP4(hash->h4[15]);
x7 ^= SWAP4(hash->h4[14]);
}
else if(i == 1)
else if(i == 1)
x0 ^= 0x80;
else if (i == 2)
xv ^= SPH_C32(1);
@ -817,7 +799,7 @@ __kernel void search7(__global hash_t* hashes) @@ -817,7 +799,7 @@ __kernel void search7(__global hash_t* hashes)
hash->h4[14] = xe;
hash->h4[15] = xf;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -825,11 +807,12 @@ __kernel void search8(__global hash_t* hashes) @@ -825,11 +807,12 @@ __kernel void search8(__global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
AES0[i] = AES0_C[i];
@ -837,7 +820,7 @@ __kernel void search8(__global hash_t* hashes) @@ -837,7 +820,7 @@ __kernel void search8(__global hash_t* hashes)
AES2[i] = AES2_C[i];
AES3[i] = AES3_C[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
// shavite
@ -853,7 +836,7 @@ __kernel void search8(__global hash_t* hashes) @@ -853,7 +836,7 @@ __kernel void search8(__global hash_t* hashes)
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
rk00 = hash->h4[0];
rk01 = hash->h4[1];
@ -896,7 +879,7 @@ __kernel void search8(__global hash_t* hashes) @@ -896,7 +879,7 @@ __kernel void search8(__global hash_t* hashes)
hash->h4[14] = hE;
hash->h4[15] = hF;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -908,10 +891,8 @@ __kernel void search9(__global hash_t* hashes) @@ -908,10 +891,8 @@ __kernel void search9(__global hash_t* hashes)
// simd
s32 q[256];
unsigned char x[128];
for(unsigned int i = 0; i < 64; i++)
x[i] = hash->h1[i];
for(unsigned int i = 64; i < 128; i++)
x[i] = 0;
@ -921,14 +902,15 @@ __kernel void search9(__global hash_t* hashes) @@ -921,14 +902,15 @@ __kernel void search9(__global hash_t* hashes)
u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22);
FFT256(0, 1, 0, ll1);
for (int i = 0; i < 256; i ++) {
s32 tq;
tq = q[i] + yoff_b_n[i];
tq = REDS2(tq);
tq = REDS1(tq);
tq = REDS1(tq);
q[i] = (tq <= 128 ? tq : tq - 257);
for (int i = 0; i < 256; i ++)
{
s32 tq;
tq = q[i] + yoff_b_n[i];
tq = REDS2(tq);
tq = REDS1(tq);
tq = REDS1(tq);
q[i] = (tq <= 128 ? tq : tq - 257);
}
A0 ^= hash->h4[0];
@ -954,21 +936,24 @@ __kernel void search9(__global hash_t* hashes) @@ -954,21 +936,24 @@ __kernel void search9(__global hash_t* hashes)
ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25);
STEP_BIG(
C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC),
C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558),
IF, 4, 13, PP8_4_);
C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC),
C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558),
IF, 4, 13, PP8_4_);
STEP_BIG(
C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F),
C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E),
IF, 13, 10, PP8_5_);
C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F),
C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E),
IF, 13, 10, PP8_5_);
STEP_BIG(
C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8),
C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257),
IF, 10, 25, PP8_6_);
C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8),
C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257),
IF, 10, 25, PP8_6_);
STEP_BIG(
C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4),
C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22),
IF, 25, 4, PP8_0_);
C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4),
C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22),
IF, 25, 4, PP8_0_);
u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7;
u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7;
@ -983,22 +968,27 @@ __kernel void search9(__global hash_t* hashes) @@ -983,22 +968,27 @@ __kernel void search9(__global hash_t* hashes)
ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7);
ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5);
ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25);
STEP_BIG(
COPY_A0, COPY_A1, COPY_A2, COPY_A3,
COPY_A4, COPY_A5, COPY_A6, COPY_A7,
IF, 4, 13, PP8_4_);
COPY_A0, COPY_A1, COPY_A2, COPY_A3,
COPY_A4, COPY_A5, COPY_A6, COPY_A7,
IF, 4, 13, PP8_4_);
STEP_BIG(
COPY_B0, COPY_B1, COPY_B2, COPY_B3,
COPY_B4, COPY_B5, COPY_B6, COPY_B7,
IF, 13, 10, PP8_5_);
COPY_B0, COPY_B1, COPY_B2, COPY_B3,
COPY_B4, COPY_B5, COPY_B6, COPY_B7,
IF, 13, 10, PP8_5_);
STEP_BIG(
COPY_C0, COPY_C1, COPY_C2, COPY_C3,
COPY_C4, COPY_C5, COPY_C6, COPY_C7,
IF, 10, 25, PP8_6_);
COPY_C0, COPY_C1, COPY_C2, COPY_C3,
COPY_C4, COPY_C5, COPY_C6, COPY_C7,
IF, 10, 25, PP8_6_);
STEP_BIG(
COPY_D0, COPY_D1, COPY_D2, COPY_D3,
COPY_D4, COPY_D5, COPY_D6, COPY_D7,
IF, 25, 4, PP8_0_);
COPY_D0, COPY_D1, COPY_D2, COPY_D3,
COPY_D4, COPY_D5, COPY_D6, COPY_D7,
IF, 25, 4, PP8_0_);
#undef q
hash->h4[0] = A0;
@ -1018,7 +1008,7 @@ __kernel void search9(__global hash_t* hashes) @@ -1018,7 +1008,7 @@ __kernel void search9(__global hash_t* hashes)
hash->h4[14] = B6;
hash->h4[15] = B7;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -1026,8 +1016,7 @@ __kernel void search10(__global hash_t* hashes) @@ -1026,8 +1016,7 @@ __kernel void search10(__global hash_t* hashes)
{
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
hash_t hash;
__global hash_t *hashp = &(hashes[gid-offset]);
__global hash_t *hash = &(hashes[gid-offset]);
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
@ -1045,7 +1034,7 @@ __kernel void search10(__global hash_t* hashes) @@ -1045,7 +1034,7 @@ __kernel void search10(__global hash_t* hashes)
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < 8; i++)
hash.h8[i] = hashes[gid-offset].h8[i];
hash->h8[i] = hashes[gid-offset].h8[i];
// echo
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1;
@ -1074,14 +1063,14 @@ __kernel void search10(__global hash_t* hashes) @@ -1074,14 +1063,14 @@ __kernel void search10(__global hash_t* hashes)
W61 = Vb61;
W70 = Vb70;
W71 = Vb71;
W80 = hash.h8[0];
W81 = hash.h8[1];
W90 = hash.h8[2];
W91 = hash.h8[3];
WA0 = hash.h8[4];
WA1 = hash.h8[5];
WB0 = hash.h8[6];
WB1 = hash.h8[7];
W80 = hash->h8[0];
W81 = hash->h8[1];
W90 = hash->h8[2];
W91 = hash->h8[3];
WA0 = hash->h8[4];
WA1 = hash->h8[5];
WB0 = hash->h8[6];
WB1 = hash->h8[7];
WC0 = 0x80;
WC1 = 0;
WD0 = 0;
@ -1094,14 +1083,14 @@ __kernel void search10(__global hash_t* hashes) @@ -1094,14 +1083,14 @@ __kernel void search10(__global hash_t* hashes)
for (unsigned u = 0; u < 10; u ++)
BIG_ROUND;
hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80;
hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81;
hashp->h8[2] = hash.h8[2] ^ Vb10 ^ W10 ^ W90;
hashp->h8[3] = hash.h8[3] ^ Vb11 ^ W11 ^ W91;
hashp->h8[4] = hash.h8[4] ^ Vb20 ^ W20 ^ WA0;
hashp->h8[5] = hash.h8[5] ^ Vb21 ^ W21 ^ WA1;
hashp->h8[6] = hash.h8[6] ^ Vb30 ^ W30 ^ WB0;
hashp->h8[7] = hash.h8[7] ^ Vb31 ^ W31 ^ WB1;
hash->h8[0] ^= Vb00 ^ W00 ^ W80;
hash->h8[1] ^= Vb01 ^ W01 ^ W81;
hash->h8[2] ^= Vb10 ^ W10 ^ W90;
hash->h8[3] ^= Vb11 ^ W11 ^ W91;
hash->h8[4] ^= Vb20 ^ W20 ^ WA0;
hash->h8[5] ^= Vb21 ^ W21 ^ WA1;
hash->h8[6] ^= Vb30 ^ W30 ^ WB0;
hash->h8[7] ^= Vb31 ^ W31 ^ WB1;
barrier(CLK_GLOBAL_MEM_FENCE);
}
@ -1110,9 +1099,22 @@ __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -1110,9 +1099,22 @@ __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search11(__global hash_t* hashes)
{
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
__global hash_t *hash = &(hashes[gid-offset]);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
#ifdef INPUT_BIG_LOCAL
__local sph_u32 T512_L[1024];
__constant const sph_u32 *T512_C = &T512[0][0];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 1024; i += step)
T512_L[i] = T512_C[i];
barrier(CLK_LOCAL_MEM_FENCE);
#else
#define INPUT_BIG_LOCAL INPUT_BIG
#endif
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3];
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7];
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11];
@ -1122,28 +1124,31 @@ __kernel void search11(__global hash_t* hashes) @@ -1122,28 +1124,31 @@ __kernel void search11(__global hash_t* hashes)
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
#define buf(u) hash->h1[i + u]
for(int i = 0; i < 64; i += 8)
{
INPUT_BIG;
INPUT_BIG_LOCAL;
P_BIG;
T_BIG;
}
#undef buf
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG;
INPUT_BIG_LOCAL;
P_BIG;
T_BIG;
#undef buf
#undef buf
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG;
INPUT_BIG_LOCAL;
PF_BIG;
T_BIG;
for(unsigned u = 0; u < 16; u ++)
hash->h4[u] = h[u];
for (unsigned u = 0; u < 16; u ++)
hash->h4[u] = h[u];
barrier(CLK_GLOBAL_MEM_FENCE);
}
@ -1153,14 +1158,27 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo @@ -1153,14 +1158,27 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
__global hash_t *hash = &(hashes[gid-offset]);
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
// fugue
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
sph_u32 S30, S31, S32, S33, S34, S35;
ulong fc_bit_count = (sph_u64) 64 << 3;
ulong fc_bit_count = (sph_u64) 0x200;
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);

51
kernel/marucoin-modold.cl

@ -30,8 +30,8 @@ @@ -30,8 +30,8 @@
* @author phm <phm@inbox.com>
*/
#ifndef X13MOD_CL
#define X13MOD_CL
#ifndef MARUCOIN_MOD_CL
#define MARUCOIN_MOD_CL
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
@ -55,12 +55,12 @@ typedef long sph_s64; @@ -55,12 +55,12 @@ typedef long sph_s64;
#define SPH_64_TRUE 1
#define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_T32(x) (as_uint(x))
#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#define SPH_T64(x) (as_ulong(x))
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
@ -74,9 +74,9 @@ typedef long sph_s64; @@ -74,9 +74,9 @@ typedef long sph_s64;
#define SPH_SMALL_FOOTPRINT_GROESTL 0
#define SPH_GROESTL_BIG_ENDIAN 0
#define SPH_CUBEHASH_UNROLL 0
#define SPH_KECCAK_UNROLL 0
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 4
#define SPH_KECCAK_UNROLL 1
#ifndef SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 4
#endif
#include "blake.cl"
@ -794,6 +794,31 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo @@ -794,6 +794,31 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
}
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef INPUT_BIG_LOCAL
__local sph_u32 T512_L[1024];
__constant const sph_u32 *T512_C = &T512[0][0];
for (int i = init; i < 1024; i += step)
T512_L[i] = T512_C[i];
barrier(CLK_LOCAL_MEM_FENCE);
#else
#define INPUT_BIG_LOCAL INPUT_BIG
#endif
// mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < 8; i++) {
hash.h8[i] = hashes[gid-offset].h8[i];
@ -875,18 +900,18 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo @@ -875,18 +900,18 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
#define buf(u) hash.h1[i + u]
for(int i = 0; i < 64; i += 8) {
INPUT_BIG;
INPUT_BIG_LOCAL;
P_BIG;
T_BIG;
}
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG;
INPUT_BIG_LOCAL;
P_BIG;
T_BIG;
#undef buf
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG;
INPUT_BIG_LOCAL;
PF_BIG;
T_BIG;
@ -976,7 +1001,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo @@ -976,7 +1001,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
hash.h4[15] = SWAP4(S30);
}
bool result = (hash.h8[3] <= target);
if (result)
output[atomic_inc(output+0xFF)] = SWAP4(gid);
@ -984,4 +1009,4 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo @@ -984,4 +1009,4 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
barrier(CLK_GLOBAL_MEM_FENCE);
}
#endif // X13MOD_CL
#endif // MARUCOIN_MOD_CL

350
kernel/shabal.cl

@ -0,0 +1,350 @@ @@ -0,0 +1,350 @@
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
/*
* Shabal implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
/*
* Part of this code was automatically generated (the part between
* the "BEGIN" and "END" markers).
*/
#define sM 16
#define C32 SPH_C32
#define T32 SPH_T32
#define O1 13
#define O2 9
#define O3 6
/*
* We copy the state into local variables, so that the compiler knows
* that it can optimize them at will.
*/
/* BEGIN -- automatically generated code. */
#define INPUT_BLOCK_ADD do { \
B0 = T32(B0 + M0); \
B1 = T32(B1 + M1); \
B2 = T32(B2 + M2); \
B3 = T32(B3 + M3); \
B4 = T32(B4 + M4); \
B5 = T32(B5 + M5); \
B6 = T32(B6 + M6); \
B7 = T32(B7 + M7); \
B8 = T32(B8 + M8); \
B9 = T32(B9 + M9); \
BA = T32(BA + MA); \
BB = T32(BB + MB); \
BC = T32(BC + MC); \
BD = T32(BD + MD); \
BE = T32(BE + ME); \
BF = T32(BF + MF); \
} while (0)
#define INPUT_BLOCK_SUB do { \
C0 = T32(C0 - M0); \
C1 = T32(C1 - M1); \
C2 = T32(C2 - M2); \
C3 = T32(C3 - M3); \
C4 = T32(C4 - M4); \
C5 = T32(C5 - M5); \
C6 = T32(C6 - M6); \
C7 = T32(C7 - M7); \
C8 = T32(C8 - M8); \
C9 = T32(C9 - M9); \
CA = T32(CA - MA); \
CB = T32(CB - MB); \
CC = T32(CC - MC); \
CD = T32(CD - MD); \
CE = T32(CE - ME); \
CF = T32(CF - MF); \
} while (0)
#define XOR_W do { \
A00 ^= Wlow; \
A01 ^= Whigh; \
} while (0)
#define SWAP(v1, v2) do { \
sph_u32 tmp = (v1); \
(v1) = (v2); \
(v2) = tmp; \
} while (0)
#define SWAP_BC do { \
SWAP(B0, C0); \
SWAP(B1, C1); \
SWAP(B2, C2); \
SWAP(B3, C3); \
SWAP(B4, C4); \
SWAP(B5, C5); \
SWAP(B6, C6); \
SWAP(B7, C7); \
SWAP(B8, C8); \
SWAP(B9, C9); \
SWAP(BA, CA); \
SWAP(BB, CB); \
SWAP(BC, CC); \
SWAP(BD, CD); \
SWAP(BE, CE); \
SWAP(BF, CF); \
} while (0)
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
xa0 = T32((xa0 \
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
^ xc) * 3U) \
^ xb1 ^ (xb2 & ~xb3) ^ xm; \
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
} while (0)
#define PERM_STEP_0 do { \
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_1 do { \
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_2 do { \
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
} while (0)
#define APPLY_P do { \
B0 = T32(B0 << 17) | (B0 >> 15); \
B1 = T32(B1 << 17) | (B1 >> 15); \
B2 = T32(B2 << 17) | (B2 >> 15); \
B3 = T32(B3 << 17) | (B3 >> 15); \
B4 = T32(B4 << 17) | (B4 >> 15); \
B5 = T32(B5 << 17) | (B5 >> 15); \
B6 = T32(B6 << 17) | (B6 >> 15); \
B7 = T32(B7 << 17) | (B7 >> 15); \
B8 = T32(B8 << 17) | (B8 >> 15); \
B9 = T32(B9 << 17) | (B9 >> 15); \
BA = T32(BA << 17) | (BA >> 15); \
BB = T32(BB << 17) | (BB >> 15); \
BC = T32(BC << 17) | (BC >> 15); \
BD = T32(BD << 17) | (BD >> 15); \
BE = T32(BE << 17) | (BE >> 15); \
BF = T32(BF << 17) | (BF >> 15); \
PERM_STEP_0; \
PERM_STEP_1; \
PERM_STEP_2; \
A0B = T32(A0B + C6); \
A0A = T32(A0A + C5); \
A09 = T32(A09 + C4); \
A08 = T32(A08 + C3); \
A07 = T32(A07 + C2); \
A06 = T32(A06 + C1); \
A05 = T32(A05 + C0); \
A04 = T32(A04 + CF); \
A03 = T32(A03 + CE); \
A02 = T32(A02 + CD); \
A01 = T32(A01 + CC); \
A00 = T32(A00 + CB); \
A0B = T32(A0B + CA); \
A0A = T32(A0A + C9); \
A09 = T32(A09 + C8); \
A08 = T32(A08 + C7); \
A07 = T32(A07 + C6); \
A06 = T32(A06 + C5); \
A05 = T32(A05 + C4); \
A04 = T32(A04 + C3); \
A03 = T32(A03 + C2); \
A02 = T32(A02 + C1); \
A01 = T32(A01 + C0); \
A00 = T32(A00 + CF); \
A0B = T32(A0B + CE); \
A0A = T32(A0A + CD); \
A09 = T32(A09 + CC); \
A08 = T32(A08 + CB); \
A07 = T32(A07 + CA); \
A06 = T32(A06 + C9); \
A05 = T32(A05 + C8); \
A04 = T32(A04 + C7); \
A03 = T32(A03 + C6); \
A02 = T32(A02 + C5); \
A01 = T32(A01 + C4); \
A00 = T32(A00 + C3); \
} while (0)
#define INCR_W do { \
if ((Wlow = T32(Wlow + 1)) == 0) \
Whigh = T32(Whigh + 1); \
} while (0)
__constant static const sph_u32 A_init_192[] = {
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
};
__constant static const sph_u32 B_init_192[] = {
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
};
__constant static const sph_u32 C_init_192[] = {
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
};
__constant static const sph_u32 A_init_224[] = {
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
};
__constant static const sph_u32 B_init_224[] = {
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
};
__constant static const sph_u32 C_init_224[] = {
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
};
__constant static const sph_u32 A_init_256[] = {
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
};
__constant static const sph_u32 B_init_256[] = {
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
};
__constant static const sph_u32 C_init_256[] = {
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
};
__constant static const sph_u32 A_init_384[] = {
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
};
__constant static const sph_u32 B_init_384[] = {
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
};
__constant static const sph_u32 C_init_384[] = {
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
};
__constant static const sph_u32 A_init_512[] = {
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
};
__constant static const sph_u32 B_init_512[] = {
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
};
__constant static const sph_u32 C_init_512[] = {
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
};
/* END -- automatically generated code. */

1167
kernel/whirlpool.cl

File diff suppressed because it is too large Load Diff

1341
kernel/x14.cl

File diff suppressed because it is too large Load Diff

1078
kernel/x14old.cl

File diff suppressed because it is too large Load Diff

1
miner.h

@ -1033,6 +1033,7 @@ extern int swork_id; @@ -1033,6 +1033,7 @@ extern int swork_id;
extern int opt_tcp_keepalive;
extern bool opt_incognito;
extern int opt_hamsi_expand_big;
extern bool opt_hamsi_short;
#if LOCK_TRACKING
extern pthread_mutex_t lockstat_lock;

6
sgminer.c

@ -192,6 +192,7 @@ int nDevs; @@ -192,6 +192,7 @@ int nDevs;
int opt_dynamic_interval = 7;
int opt_g_threads = -1;
int opt_hamsi_expand_big = 4;
bool opt_hamsi_short = false;
bool opt_restart = true;
struct list_head scan_devices;
@ -1459,7 +1460,10 @@ struct opt_table opt_config_table[] = { @@ -1459,7 +1460,10 @@ struct opt_table opt_config_table[] = {
"Set GPU lookup gap for scrypt mining, comma separated"),
OPT_WITH_ARG("--hamsi-expand-big",
set_int_1_to_10, opt_show_intval, &opt_hamsi_expand_big,
"Set SPH_HAMSI_EXPAND_BIG for X13 algorithms (1 or 4 are common)"),
"Set SPH_HAMSI_EXPAND_BIG for X13 derived algorithms (1 or 4 are common)"),
OPT_WITHOUT_ARG("--hamsi-short",
opt_set_bool, &opt_hamsi_short,
"Set SPH_HAMSI_SHORT for X13 derived algorithms (Can give better hashrate for some GPUs)"),
#ifdef HAVE_CURSES
OPT_WITHOUT_ARG("--incognito",
opt_set_bool, &opt_incognito,

2
sph/Makefile.am

@ -1,3 +1,3 @@ @@ -1,3 +1,3 @@
noinst_LIBRARIES = libsph.a
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c

806
sph/shabal.c

@ -0,0 +1,806 @@ @@ -0,0 +1,806 @@
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
/*
* Shabal implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_shabal.h"
#ifdef __cplusplus
extern "C"{
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
/*
* Part of this code was automatically generated (the part between
* the "BEGIN" and "END" markers).
*/
#define sM 16
#define C32 SPH_C32
#define T32 SPH_T32
#define O1 13
#define O2 9
#define O3 6
/*
* We copy the state into local variables, so that the compiler knows
* that it can optimize them at will.
*/
/* BEGIN -- automatically generated code. */
#define DECL_STATE \
sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
A08, A09, A0A, A0B; \
sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
B8, B9, BA, BB, BC, BD, BE, BF; \
sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
C8, C9, CA, CB, CC, CD, CE, CF; \
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
M8, M9, MA, MB, MC, MD, ME, MF; \
sph_u32 Wlow, Whigh;
#define READ_STATE(state) do { \
A00 = (state)->A[0]; \
A01 = (state)->A[1]; \
A02 = (state)->A[2]; \
A03 = (state)->A[3]; \
A04 = (state)->A[4]; \
A05 = (state)->A[5]; \
A06 = (state)->A[6]; \
A07 = (state)->A[7]; \
A08 = (state)->A[8]; \
A09 = (state)->A[9]; \
A0A = (state)->A[10]; \
A0B = (state)->A[11]; \
B0 = (state)->B[0]; \
B1 = (state)->B[1]; \
B2 = (state)->B[2]; \
B3 = (state)->B[3]; \
B4 = (state)->B[4]; \
B5 = (state)->B[5]; \
B6 = (state)->B[6]; \
B7 = (state)->B[7]; \
B8 = (state)->B[8]; \
B9 = (state)->B[9]; \
BA = (state)->B[10]; \
BB = (state)->B[11]; \
BC = (state)->B[12]; \
BD = (state)->B[13]; \
BE = (state)->B[14]; \
BF = (state)->B[15]; \
C0 = (state)->C[0]; \
C1 = (state)->C[1]; \
C2 = (state)->C[2]; \
C3 = (state)->C[3]; \
C4 = (state)->C[4]; \
C5 = (state)->C[5]; \
C6 = (state)->C[6]; \
C7 = (state)->C[7]; \
C8 = (state)->C[8]; \
C9 = (state)->C[9]; \
CA = (state)->C[10]; \
CB = (state)->C[11]; \
CC = (state)->C[12]; \
CD = (state)->C[13]; \
CE = (state)->C[14]; \
CF = (state)->C[15]; \
Wlow = (state)->Wlow; \
Whigh = (state)->Whigh; \
} while (0)
#define WRITE_STATE(state) do { \
(state)->A[0] = A00; \
(state)->A[1] = A01; \
(state)->A[2] = A02; \
(state)->A[3] = A03; \
(state)->A[4] = A04; \
(state)->A[5] = A05; \
(state)->A[6] = A06; \
(state)->A[7] = A07; \
(state)->A[8] = A08; \
(state)->A[9] = A09; \
(state)->A[10] = A0A; \
(state)->A[11] = A0B; \
(state)->B[0] = B0; \
(state)->B[1] = B1; \
(state)->B[2] = B2; \
(state)->B[3] = B3; \
(state)->B[4] = B4; \
(state)->B[5] = B5; \
(state)->B[6] = B6; \
(state)->B[7] = B7; \
(state)->B[8] = B8; \
(state)->B[9] = B9; \
(state)->B[10] = BA; \
(state)->B[11] = BB; \
(state)->B[12] = BC; \
(state)->B[13] = BD; \
(state)->B[14] = BE; \
(state)->B[15] = BF; \
(state)->C[0] = C0; \
(state)->C[1] = C1; \
(state)->C[2] = C2; \
(state)->C[3] = C3; \
(state)->C[4] = C4; \
(state)->C[5] = C5; \
(state)->C[6] = C6; \
(state)->C[7] = C7; \
(state)->C[8] = C8; \
(state)->C[9] = C9; \
(state)->C[10] = CA; \
(state)->C[11] = CB; \
(state)->C[12] = CC; \
(state)->C[13] = CD; \
(state)->C[14] = CE; \
(state)->C[15] = CF; \
(state)->Wlow = Wlow; \
(state)->Whigh = Whigh; \
} while (0)
#define DECODE_BLOCK do { \
M0 = sph_dec32le_aligned(buf + 0); \
M1 = sph_dec32le_aligned(buf + 4); \
M2 = sph_dec32le_aligned(buf + 8); \
M3 = sph_dec32le_aligned(buf + 12); \
M4 = sph_dec32le_aligned(buf + 16); \
M5 = sph_dec32le_aligned(buf + 20); \
M6 = sph_dec32le_aligned(buf + 24); \
M7 = sph_dec32le_aligned(buf + 28); \
M8 = sph_dec32le_aligned(buf + 32); \
M9 = sph_dec32le_aligned(buf + 36); \
MA = sph_dec32le_aligned(buf + 40); \
MB = sph_dec32le_aligned(buf + 44); \
MC = sph_dec32le_aligned(buf + 48); \
MD = sph_dec32le_aligned(buf + 52); \
ME = sph_dec32le_aligned(buf + 56); \
MF = sph_dec32le_aligned(buf + 60); \
} while (0)
#define INPUT_BLOCK_ADD do { \
B0 = T32(B0 + M0); \
B1 = T32(B1 + M1); \
B2 = T32(B2 + M2); \
B3 = T32(B3 + M3); \
B4 = T32(B4 + M4); \
B5 = T32(B5 + M5); \
B6 = T32(B6 + M6); \
B7 = T32(B7 + M7); \
B8 = T32(B8 + M8); \
B9 = T32(B9 + M9); \
BA = T32(BA + MA); \
BB = T32(BB + MB); \
BC = T32(BC + MC); \
BD = T32(BD + MD); \
BE = T32(BE + ME); \
BF = T32(BF + MF); \
} while (0)
#define INPUT_BLOCK_SUB do { \
C0 = T32(C0 - M0); \
C1 = T32(C1 - M1); \
C2 = T32(C2 - M2); \
C3 = T32(C3 - M3); \
C4 = T32(C4 - M4); \
C5 = T32(C5 - M5); \
C6 = T32(C6 - M6); \
C7 = T32(C7 - M7); \
C8 = T32(C8 - M8); \
C9 = T32(C9 - M9); \
CA = T32(CA - MA); \
CB = T32(CB - MB); \
CC = T32(CC - MC); \
CD = T32(CD - MD); \
CE = T32(CE - ME); \
CF = T32(CF - MF); \
} while (0)
#define XOR_W do { \
A00 ^= Wlow; \
A01 ^= Whigh; \
} while (0)
#define SWAP(v1, v2) do { \
sph_u32 tmp = (v1); \
(v1) = (v2); \
(v2) = tmp; \
} while (0)
#define SWAP_BC do { \
SWAP(B0, C0); \
SWAP(B1, C1); \
SWAP(B2, C2); \
SWAP(B3, C3); \
SWAP(B4, C4); \
SWAP(B5, C5); \
SWAP(B6, C6); \
SWAP(B7, C7); \
SWAP(B8, C8); \
SWAP(B9, C9); \
SWAP(BA, CA); \
SWAP(BB, CB); \
SWAP(BC, CC); \
SWAP(BD, CD); \
SWAP(BE, CE); \
SWAP(BF, CF); \
} while (0)
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
xa0 = T32((xa0 \
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
^ xc) * 3U) \
^ xb1 ^ (xb2 & ~xb3) ^ xm; \
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
} while (0)
#define PERM_STEP_0 do { \
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_1 do { \
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
} while (0)
#define PERM_STEP_2 do { \
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
} while (0)
#define APPLY_P do { \
B0 = T32(B0 << 17) | (B0 >> 15); \
B1 = T32(B1 << 17) | (B1 >> 15); \
B2 = T32(B2 << 17) | (B2 >> 15); \
B3 = T32(B3 << 17) | (B3 >> 15); \
B4 = T32(B4 << 17) | (B4 >> 15); \
B5 = T32(B5 << 17) | (B5 >> 15); \
B6 = T32(B6 << 17) | (B6 >> 15); \
B7 = T32(B7 << 17) | (B7 >> 15); \
B8 = T32(B8 << 17) | (B8 >> 15); \
B9 = T32(B9 << 17) | (B9 >> 15); \
BA = T32(BA << 17) | (BA >> 15); \
BB = T32(BB << 17) | (BB >> 15); \
BC = T32(BC << 17) | (BC >> 15); \
BD = T32(BD << 17) | (BD >> 15); \
BE = T32(BE << 17) | (BE >> 15); \
BF = T32(BF << 17) | (BF >> 15); \
PERM_STEP_0; \
PERM_STEP_1; \
PERM_STEP_2; \
A0B = T32(A0B + C6); \
A0A = T32(A0A + C5); \
A09 = T32(A09 + C4); \
A08 = T32(A08 + C3); \
A07 = T32(A07 + C2); \
A06 = T32(A06 + C1); \
A05 = T32(A05 + C0); \
A04 = T32(A04 + CF); \
A03 = T32(A03 + CE); \
A02 = T32(A02 + CD); \
A01 = T32(A01 + CC); \
A00 = T32(A00 + CB); \
A0B = T32(A0B + CA); \
A0A = T32(A0A + C9); \
A09 = T32(A09 + C8); \
A08 = T32(A08 + C7); \
A07 = T32(A07 + C6); \
A06 = T32(A06 + C5); \
A05 = T32(A05 + C4); \
A04 = T32(A04 + C3); \
A03 = T32(A03 + C2); \
A02 = T32(A02 + C1); \
A01 = T32(A01 + C0); \
A00 = T32(A00 + CF); \
A0B = T32(A0B + CE); \
A0A = T32(A0A + CD); \
A09 = T32(A09 + CC); \
A08 = T32(A08 + CB); \
A07 = T32(A07 + CA); \
A06 = T32(A06 + C9); \
A05 = T32(A05 + C8); \
A04 = T32(A04 + C7); \
A03 = T32(A03 + C6); \
A02 = T32(A02 + C5); \
A01 = T32(A01 + C4); \
A00 = T32(A00 + C3); \
} while (0)
#define INCR_W do { \
if ((Wlow = T32(Wlow + 1)) == 0) \
Whigh = T32(Whigh + 1); \
} while (0)
static const sph_u32 A_init_192[] = {
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
};
static const sph_u32 B_init_192[] = {
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
};
static const sph_u32 C_init_192[] = {
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
};
static const sph_u32 A_init_224[] = {
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
};
static const sph_u32 B_init_224[] = {
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
};
static const sph_u32 C_init_224[] = {
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
};
static const sph_u32 A_init_256[] = {
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
};
static const sph_u32 B_init_256[] = {
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
};
static const sph_u32 C_init_256[] = {
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
};
static const sph_u32 A_init_384[] = {
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
};
static const sph_u32 B_init_384[] = {
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
};
static const sph_u32 C_init_384[] = {
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
};
static const sph_u32 A_init_512[] = {
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
};
static const sph_u32 B_init_512[] = {
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
};
static const sph_u32 C_init_512[] = {
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
};
/* END -- automatically generated code. */
static void
shabal_init(void *cc, unsigned size)
{
/*
* We have precomputed initial states for all the supported
* output bit lengths.
*/
const sph_u32 *A_init, *B_init, *C_init;
sph_shabal_context *sc;
switch (size) {
case 192:
A_init = A_init_192;
B_init = B_init_192;
C_init = C_init_192;
break;
case 224:
A_init = A_init_224;
B_init = B_init_224;
C_init = C_init_224;
break;
case 256:
A_init = A_init_256;
B_init = B_init_256;
C_init = C_init_256;
break;
case 384:
A_init = A_init_384;
B_init = B_init_384;
C_init = C_init_384;
break;
case 512:
A_init = A_init_512;
B_init = B_init_512;
C_init = C_init_512;
break;
default:
return;
}
sc = (sph_shabal_context *)cc;
memcpy(sc->A, A_init, sizeof sc->A);
memcpy(sc->B, B_init, sizeof sc->B);
memcpy(sc->C, C_init, sizeof sc->C);
sc->Wlow = 1;
sc->Whigh = 0;
sc->ptr = 0;
}
static void
shabal_core(void *cc, const unsigned char *data, size_t len)
{
sph_shabal_context *sc;
unsigned char *buf;
size_t ptr;
DECL_STATE
sc = (sph_shabal_context *)cc;
buf = sc->buf;
ptr = sc->ptr;
/*
* We do not want to copy the state to local variables if the
* amount of data is less than what is needed to complete the
* current block. Note that it is anyway suboptimal to call
* this method many times for small chunks of data.
*/
if (len < (sizeof sc->buf) - ptr) {
memcpy(buf + ptr, data, len);
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE(sc);
while (len > 0) {
size_t clen;
clen = (sizeof sc->buf) - ptr;
if (clen > len)
clen = len;
memcpy(buf + ptr, data, clen);
ptr += clen;
data += clen;
len -= clen;
if (ptr == sizeof sc->buf) {
DECODE_BLOCK;
INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
INPUT_BLOCK_SUB;
SWAP_BC;
INCR_W;
ptr = 0;
}
}
WRITE_STATE(sc);
sc->ptr = ptr;
}
static void
shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
{
sph_shabal_context *sc;
unsigned char *buf;
size_t ptr;
int i;
unsigned z;
union {
unsigned char tmp_out[64];
sph_u32 dummy;
} u;
size_t out_len;
DECL_STATE
sc = (sph_shabal_context *)cc;
buf = sc->buf;
ptr = sc->ptr;
z = 0x80 >> n;
buf[ptr] = ((ub & -z) | z) & 0xFF;
memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
READ_STATE(sc);
DECODE_BLOCK;
INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
for (i = 0; i < 3; i ++) {
SWAP_BC;
XOR_W;
APPLY_P;
}
/*
* We just use our local variables; no need to go through
* the state structure. In order to share some code, we
* emit the relevant words into a temporary buffer, which
* we finally copy into the destination array.
*/
switch (size_words) {
case 16:
sph_enc32le_aligned(u.tmp_out + 0, B0);
sph_enc32le_aligned(u.tmp_out + 4, B1);
sph_enc32le_aligned(u.tmp_out + 8, B2);
sph_enc32le_aligned(u.tmp_out + 12, B3);
/* fall through */
case 12:
sph_enc32le_aligned(u.tmp_out + 16, B4);
sph_enc32le_aligned(u.tmp_out + 20, B5);
sph_enc32le_aligned(u.tmp_out + 24, B6);
sph_enc32le_aligned(u.tmp_out + 28, B7);
/* fall through */
case 8:
sph_enc32le_aligned(u.tmp_out + 32, B8);
/* fall through */
case 7:
sph_enc32le_aligned(u.tmp_out + 36, B9);
/* fall through */
case 6:
sph_enc32le_aligned(u.tmp_out + 40, BA);
sph_enc32le_aligned(u.tmp_out + 44, BB);
sph_enc32le_aligned(u.tmp_out + 48, BC);
sph_enc32le_aligned(u.tmp_out + 52, BD);
sph_enc32le_aligned(u.tmp_out + 56, BE);
sph_enc32le_aligned(u.tmp_out + 60, BF);
break;
default:
return;
}
out_len = size_words << 2;
memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
shabal_init(sc, size_words << 5);
}
/* see sph_shabal.h */
void
sph_shabal192_init(void *cc)
{
shabal_init(cc, 192);
}
/* see sph_shabal.h */
void
sph_shabal192(void *cc, const void *data, size_t len)
{
shabal_core(cc, (const unsigned char*)data, len);
}
/* see sph_shabal.h */
void
sph_shabal192_close(void *cc, void *dst)
{
shabal_close(cc, 0, 0, dst, 6);
}
/* see sph_shabal.h */
void
sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_close(cc, ub, n, dst, 6);
}
/* see sph_shabal.h */
void
sph_shabal224_init(void *cc)
{
shabal_init(cc, 224);
}
/* see sph_shabal.h */
void
sph_shabal224(void *cc, const void *data, size_t len)
{
shabal_core(cc, (const unsigned char*)data, len);
}
/* see sph_shabal.h */
void
sph_shabal224_close(void *cc, void *dst)
{
shabal_close(cc, 0, 0, dst, 7);
}
/* see sph_shabal.h */
void
sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_close(cc, ub, n, dst, 7);
}
/* see sph_shabal.h */
void
sph_shabal256_init(void *cc)
{
shabal_init(cc, 256);
}
/* see sph_shabal.h */
void
sph_shabal256(void *cc, const void *data, size_t len)
{
shabal_core(cc, (const unsigned char*)data, len);
}
/* see sph_shabal.h */
void
sph_shabal256_close(void *cc, void *dst)
{
shabal_close(cc, 0, 0, dst, 8);
}
/* see sph_shabal.h */
void
sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_close(cc, ub, n, dst, 8);
}
/* see sph_shabal.h */
void
sph_shabal384_init(void *cc)
{
shabal_init(cc, 384);
}
/* see sph_shabal.h */
void
sph_shabal384(void *cc, const void *data, size_t len)
{
shabal_core(cc, (const unsigned char*)data, len);
}
/* see sph_shabal.h */
void
sph_shabal384_close(void *cc, void *dst)
{
shabal_close(cc, 0, 0, dst, 12);
}
/* see sph_shabal.h */
void
sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_close(cc, ub, n, dst, 12);
}
/* see sph_shabal.h */
void
sph_shabal512_init(void *cc)
{
shabal_init(cc, 512);
}
/* see sph_shabal.h */
void
sph_shabal512(void *cc, const void *data, size_t len)
{
shabal_core(cc, (const unsigned char*)data, len);
}
/* see sph_shabal.h */
void
sph_shabal512_close(void *cc, void *dst)
{
shabal_close(cc, 0, 0, dst, 16);
}
/* see sph_shabal.h */
void
sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
shabal_close(cc, ub, n, dst, 16);
}
#ifdef __cplusplus
}
#endif

344
sph/sph_shabal.h

@ -0,0 +1,344 @@ @@ -0,0 +1,344 @@
/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
/**
* Shabal interface. Shabal is a family of functions which differ by
* their output size; this implementation defines Shabal for output
* sizes 192, 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_shabal.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_SHABAL_H__
#define SPH_SHABAL_H__
#include <stddef.h>
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
/**
* Output size (in bits) for Shabal-192.
*/
#define SPH_SIZE_shabal192 192
/**
* Output size (in bits) for Shabal-224.
*/
#define SPH_SIZE_shabal224 224
/**
* Output size (in bits) for Shabal-256.
*/
#define SPH_SIZE_shabal256 256
/**
* Output size (in bits) for Shabal-384.
*/
#define SPH_SIZE_shabal384 384
/**
* Output size (in bits) for Shabal-512.
*/
#define SPH_SIZE_shabal512 512
/**
* This structure is a context for Shabal computations: it contains the
* intermediate values and some data from the last entered block. Once
* a Shabal computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running Shabal computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
size_t ptr;
sph_u32 A[12], B[16], C[16];
sph_u32 Whigh, Wlow;
#endif
} sph_shabal_context;
/**
* Type for a Shabal-192 context (identical to the common context).
*/
typedef sph_shabal_context sph_shabal192_context;
/**
* Type for a Shabal-224 context (identical to the common context).
*/
typedef sph_shabal_context sph_shabal224_context;
/**
* Type for a Shabal-256 context (identical to the common context).
*/
typedef sph_shabal_context sph_shabal256_context;
/**
* Type for a Shabal-384 context (identical to the common context).
*/
typedef sph_shabal_context sph_shabal384_context;
/**
* Type for a Shabal-512 context (identical to the common context).
*/
typedef sph_shabal_context sph_shabal512_context;
/**
* Initialize a Shabal-192 context. This process performs no memory allocation.
*
* @param cc the Shabal-192 context (pointer to a
* <code>sph_shabal192_context</code>)
*/
void sph_shabal192_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Shabal-192 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shabal192(void *cc, const void *data, size_t len);
/**
* Terminate the current Shabal-192 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (24 bytes). The context is automatically
* reinitialized.
*
* @param cc the Shabal-192 context
* @param dst the destination buffer
*/
void sph_shabal192_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (24 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Shabal-192 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shabal192_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Shabal-224 context. This process performs no memory allocation.
*
* @param cc the Shabal-224 context (pointer to a
* <code>sph_shabal224_context</code>)
*/
void sph_shabal224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Shabal-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shabal224(void *cc, const void *data, size_t len);
/**
* Terminate the current Shabal-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Shabal-224 context
* @param dst the destination buffer
*/
void sph_shabal224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Shabal-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shabal224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Shabal-256 context. This process performs no memory allocation.
*
* @param cc the Shabal-256 context (pointer to a
* <code>sph_shabal256_context</code>)
*/
void sph_shabal256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Shabal-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shabal256(void *cc, const void *data, size_t len);
/**
* Terminate the current Shabal-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Shabal-256 context
* @param dst the destination buffer
*/
void sph_shabal256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Shabal-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shabal256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Shabal-384 context. This process performs no memory allocation.
*
* @param cc the Shabal-384 context (pointer to a
* <code>sph_shabal384_context</code>)
*/
void sph_shabal384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Shabal-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shabal384(void *cc, const void *data, size_t len);
/**
* Terminate the current Shabal-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Shabal-384 context
* @param dst the destination buffer
*/
void sph_shabal384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Shabal-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shabal384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Shabal-512 context. This process performs no memory allocation.
*
* @param cc the Shabal-512 context (pointer to a
* <code>sph_shabal512_context</code>)
*/
void sph_shabal512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Shabal-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_shabal512(void *cc, const void *data, size_t len);
/**
* Terminate the current Shabal-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Shabal-512 context
* @param dst the destination buffer
*/
void sph_shabal512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Shabal-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_shabal512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif

218
sph/sph_whirlpool.h

@ -0,0 +1,218 @@ @@ -0,0 +1,218 @@
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
/**
* WHIRLPOOL interface.
*
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
* version, 2003, with a new diffusion matrix, also described as "plain
* WHIRLPOOL"). All three variants are implemented here.
*
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
*
* The current WHIRLPOOL specification and a reference implementation
* can be found on the WHIRLPOOL web page:
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_whirlpool.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_WHIRLPOOL_H__
#define SPH_WHIRLPOOL_H__
#include <stddef.h>
#include "sph_types.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_64
/**
* Output size (in bits) for WHIRLPOOL.
*/
#define SPH_SIZE_whirlpool 512
/**
* Output size (in bits) for WHIRLPOOL-0.
*/
#define SPH_SIZE_whirlpool0 512
/**
* Output size (in bits) for WHIRLPOOL-1.
*/
#define SPH_SIZE_whirlpool1 512
/**
* This structure is a context for WHIRLPOOL computations: it contains the
* intermediate values and some data from the last entered block. Once
* a WHIRLPOOL computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running WHIRLPOOL computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
sph_u64 state[8];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_whirlpool_context;
/**
* Initialize a WHIRLPOOL context. This process performs no memory allocation.
*
* @param cc the WHIRLPOOL context (pointer to a
* <code>sph_whirlpool_context</code>)
*/
void sph_whirlpool_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing). This function applies the
* plain WHIRLPOOL algorithm.
*
* @param cc the WHIRLPOOL context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_whirlpool(void *cc, const void *data, size_t len);
/**
* Terminate the current WHIRLPOOL computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the WHIRLPOOL context
* @param dst the destination buffer
*/
void sph_whirlpool_close(void *cc, void *dst);
/**
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
*/
typedef sph_whirlpool_context sph_whirlpool0_context;
#ifdef DOXYGEN_IGNORE
/**
* Initialize a WHIRLPOOL-0 context. This function is identical to
* <code>sph_whirlpool_init()</code>.
*
* @param cc the WHIRLPOOL context (pointer to a
* <code>sph_whirlpool0_context</code>)
*/
void sph_whirlpool0_init(void *cc);
#endif
#ifndef DOXYGEN_IGNORE
#define sph_whirlpool0_init sph_whirlpool_init
#endif
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing). This function applies the
* WHIRLPOOL-0 algorithm.
*
* @param cc the WHIRLPOOL context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_whirlpool0(void *cc, const void *data, size_t len);
/**
* Terminate the current WHIRLPOOL-0 computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the WHIRLPOOL-0 context
* @param dst the destination buffer
*/
void sph_whirlpool0_close(void *cc, void *dst);
/**
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
*/
typedef sph_whirlpool_context sph_whirlpool1_context;
#ifdef DOXYGEN_IGNORE
/**
* Initialize a WHIRLPOOL-1 context. This function is identical to
* <code>sph_whirlpool_init()</code>.
*
* @param cc the WHIRLPOOL context (pointer to a
* <code>sph_whirlpool1_context</code>)
*/
void sph_whirlpool1_init(void *cc);
#endif
#ifndef DOXYGEN_IGNORE
#define sph_whirlpool1_init sph_whirlpool_init
#endif
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing). This function applies the
* WHIRLPOOL-1 algorithm.
*
* @param cc the WHIRLPOOL context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_whirlpool1(void *cc, const void *data, size_t len);
/**
* Terminate the current WHIRLPOOL-1 computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the WHIRLPOOL-1 context
* @param dst the destination buffer
*/
void sph_whirlpool1_close(void *cc, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif

3480
sph/whirlpool.c

File diff suppressed because it is too large Load Diff

16
winbuild/sgminer.vcxproj

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@ -28,23 +28,27 @@ @@ -28,23 +28,27 @@
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
@ -257,7 +261,9 @@ @@ -257,7 +261,9 @@
<ClCompile Include="..\adl.c" />
<ClCompile Include="..\algorithm.c" />
<ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\x14.c" />
<ClCompile Include="..\api.c" />
<ClCompile Include="..\ccan\opt\helpers.c" />
<ClCompile Include="..\ccan\opt\opt.c" />
@ -310,17 +316,21 @@ @@ -310,17 +316,21 @@
<ClCompile Include="..\sph\panama.c" />
<ClCompile Include="..\sph\sha2.c" />
<ClCompile Include="..\sph\sha2big.c" />
<ClCompile Include="..\sph\shabal.c" />
<ClCompile Include="..\sph\shavite.c" />
<ClCompile Include="..\sph\simd.c" />
<ClCompile Include="..\sph\skein.c" />
<ClCompile Include="..\algorithm\twecoin.c" />
<ClCompile Include="..\sph\whirlpool.c" />
<ClCompile Include="..\util.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\adl.h" />
<ClInclude Include="..\algorithm.h" />
<ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\x14.h" />
<ClInclude Include="..\api.h" />
<ClInclude Include="..\arg-nonnull.h" />
<ClInclude Include="..\bench_block.h" />
@ -365,11 +375,13 @@ @@ -365,11 +375,13 @@
<ClInclude Include="..\sph\sph_luffa.h" />
<ClInclude Include="..\sph\sph_panama.h" />
<ClInclude Include="..\sph\sph_sha2.h" />
<ClInclude Include="..\sph\sph_shabal.h" />
<ClInclude Include="..\sph\sph_shavite.h" />
<ClInclude Include="..\sph\sph_simd.h" />
<ClInclude Include="..\sph\sph_skein.h" />
<ClInclude Include="..\sph\sph_types.h" />
<ClInclude Include="..\algorithm\twecoin.h" />
<ClInclude Include="..\sph\sph_whirlpool.h" />
<ClInclude Include="..\uthash.h" />
<ClInclude Include="..\util.h" />
<ClInclude Include="..\warn-on-use.h" />
@ -382,4 +394,4 @@ @@ -382,4 +394,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

28
winbuild/sgminer.vcxproj.filters

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
@ -218,6 +218,18 @@ @@ -218,6 +218,18 @@
<ClCompile Include="..\algorithm\talkcoin.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\sph\shabal.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\sph\whirlpool.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\bitblock.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\x14.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\adl.h">
@ -394,8 +406,20 @@ @@ -394,8 +406,20 @@
<ClInclude Include="..\algorithm\talkcoin.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\sph\sph_shabal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\sph\sph_whirlpool.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\bitblock.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\x14.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="README.txt" />
</ItemGroup>
</Project>
</Project>

Loading…
Cancel
Save