From e7228b20f437d9555991879000fc3a9c674e836f Mon Sep 17 00:00:00 2001 From: lasybear Date: Sun, 15 Jun 2014 19:21:10 +0400 Subject: [PATCH 1/7] talkcoin-mod --- Makefile.am | 1 + algorithm.c | 41 +++++ algorithm/talkcoin.c | 177 +++++++++++++++++++ algorithm/talkcoin.h | 10 ++ kernel/talkcoin-mod.cl | 378 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 607 insertions(+) create mode 100644 algorithm/talkcoin.c create mode 100644 algorithm/talkcoin.h create mode 100644 kernel/talkcoin-mod.cl diff --git a/Makefile.am b/Makefile.am index 43ebec65..f2e00e25 100644 --- a/Makefile.am +++ b/Makefile.am @@ -60,6 +60,7 @@ sgminer_SOURCES += algorithm/sifcoin.c algorithm/sifcoin.h sgminer_SOURCES += algorithm/twecoin.c algorithm/twecoin.h sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h +sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index e58aea7f..66d88f2b 100644 --- a/algorithm.c +++ b/algorithm.c @@ -25,6 +25,7 @@ #include "algorithm/twecoin.h" #include "algorithm/marucoin.h" #include "algorithm/maxcoin.h" +#include "algorithm/talkcoin.h" #include "compat.h" @@ -326,6 +327,44 @@ static cl_int queue_marucoin_mod_old_kernel(struct __clState *clState, struct _d return status; } +static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // groestl - search1 + kernel = clState->extra_kernels; + num = 0; + CL_SET_ARG(clState->padbuffer8); + // jh - search2 + kernel++; + num = 0; + CL_SET_ARG(clState->padbuffer8); + // keccak - search3 + kernel++; + num = 0; + CL_SET_ARG(clState->padbuffer8); + // skein - search4 + kernel++; + num = 0; + CL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + typedef struct _algorithm_settings_t { const char *name; /* Human-readable identifier */ double diff_multiplier1; @@ -383,6 +422,8 @@ static algorithm_settings_t algos[] = { { "marucoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options}, { "marucoin-modold", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options}, + { "talkcoin-mod", 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL}, + // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b) \ { a, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL} diff --git a/algorithm/talkcoin.c b/algorithm/talkcoin.c new file mode 100644 index 00000000..ba0077af --- /dev/null +++ b/algorithm/talkcoin.c @@ -0,0 +1,177 @@ +/*- + * Copyright 2014 phm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_blake.h" +#include "sph/sph_groestl.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_skein.h" + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_blake512_context blake1; + sph_groestl512_context groestl1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_skein512_context skein1; +} Xhash_context_holder; + +Xhash_context_holder base_contexts; + + +void init_Nhash_contexts() +{ + sph_blake512_init(&base_contexts.blake1); + sph_groestl512_init(&base_contexts.groestl1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_skein512_init(&base_contexts.skein1); +} + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void talkhash(void *state, const void *input) +{ + init_Nhash_contexts(); + + Xhash_context_holder ctx; + + uint32_t hashA[16], hashB[16]; + //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo + memcpy(&ctx, &base_contexts, sizeof(base_contexts)); + + sph_blake512 (&ctx.blake1, input, 80); + sph_blake512_close (&ctx.blake1, hashA); + + sph_groestl512 (&ctx.groestl1, hashA, 64); + sph_groestl512_close(&ctx.groestl1, hashB); + + sph_jh512 (&ctx.jh1, hashB, 64); + sph_jh512_close(&ctx.jh1, hashA); + + sph_keccak512 (&ctx.keccak1, hashA, 64); + sph_keccak512_close(&ctx.keccak1, hashB); + + sph_skein512 (&ctx.skein1, hashB, 64); + sph_skein512_close(&ctx.skein1, hashA); + + memcpy(state, hashA, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + +/* Used externally as confirmation of correct OCL code */ +int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + talkhash(ohash, data); + + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void talkcoin_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + talkhash(ohash, data); +} + +bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + talkhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} diff --git a/algorithm/talkcoin.h b/algorithm/talkcoin.h new file mode 100644 index 00000000..551a20a6 --- /dev/null +++ b/algorithm/talkcoin.h @@ -0,0 +1,10 @@ +#ifndef TALKCOIN_H +#define TALKCOIN_H + +#include "miner.h" + +extern int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void talkcoin_regenhash(struct work *work); + +#endif /* TALKCOIN_H */ diff --git a/kernel/talkcoin-mod.cl b/kernel/talkcoin-mod.cl new file mode 100644 index 00000000..225247d2 --- /dev/null +++ b/kernel/talkcoin-mod.cl @@ -0,0 +1,378 @@ +/* + * TalkCoin kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef TALKCOIN_MOD_CL +#define TALKCOIN_MOD_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_JH_64 1 +#define SPH_KECCAK_64 1 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_KECCAK_UNROLL 0 + +#include "blake.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +// blake +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +// groestl +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T4_L[i] = T4[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + +#define T0 T0_L +#define T1 T1_L +#define T2 T2_L +#define T3 T3_L +#define T4 T4_L +#define T5 T5_L +#define T6 T6_L +#define T7 T7_L + + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash->h8[0]); + m[1] = DEC64E(hash->h8[1]); + m[2] = DEC64E(hash->h8[2]); + m[3] = DEC64E(hash->h8[3]); + m[4] = DEC64E(hash->h8[4]); + m[5] = DEC64E(hash->h8[5]); + m[6] = DEC64E(hash->h8[6]); + m[7] = DEC64E(hash->h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash->h8[u] = DEC64E(H[u + 8]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +// jh +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + + +// keccak +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + + +// skein +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + __global hash_t *hashp = &(hashes[gid-offset]); + + for (int i = 0; i < 8; i++) { + hash.h8[i] = hashes[gid-offset].h8[i]; + } + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + + bool result = (SWAP8(hash.h8[3]) <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); + +} + +#endif // TALKCOIN_MOD_CL From a85e669d0215946be90b5bcc355b83a4f9e70570 Mon Sep 17 00:00:00 2001 From: lasybear Date: Mon, 16 Jun 2014 15:47:50 +0400 Subject: [PATCH 2/7] Fix compatibility with AMD drivers 14.6 --- kernel/darkcoin-mod.cl | 8 ++++---- kernel/marucoin-mod.cl | 8 ++++---- kernel/talkcoin-mod.cl | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/darkcoin-mod.cl b/kernel/darkcoin-mod.cl index e72396d3..4eac05d2 100644 --- a/kernel/darkcoin-mod.cl +++ b/kernel/darkcoin-mod.cl @@ -58,13 +58,13 @@ typedef long sph_s64; #define SPH_64_TRUE 1 #define SPH_C32(x) ((sph_u32)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_C64(x) ((sph_u64)(x ## UL)) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_ECHO_64 1 diff --git a/kernel/marucoin-mod.cl b/kernel/marucoin-mod.cl index 1618efce..d07a6872 100644 --- a/kernel/marucoin-mod.cl +++ b/kernel/marucoin-mod.cl @@ -55,13 +55,13 @@ typedef long sph_s64; #define SPH_64_TRUE 1 #define SPH_C32(x) ((sph_u32)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_C64(x) ((sph_u64)(x ## UL)) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_ECHO_64 1 diff --git a/kernel/talkcoin-mod.cl b/kernel/talkcoin-mod.cl index 225247d2..a8c94a3c 100644 --- a/kernel/talkcoin-mod.cl +++ b/kernel/talkcoin-mod.cl @@ -54,13 +54,13 @@ typedef long sph_s64; #define SPH_64_TRUE 1 #define SPH_C32(x) ((sph_u32)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_C64(x) ((sph_u64)(x ## UL)) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_COMPACT_BLAKE_64 0 From 83b2ac9ec751ee0a72a18add6e8bd9ba000098c5 Mon Sep 17 00:00:00 2001 From: "Yann St. Arnaud" Date: Fri, 20 Jun 2014 17:23:26 -0400 Subject: [PATCH 3/7] Compatibility update for MSVS Added an implementation of strsep() that should work with MSVS. Hopefully resolves issue #278 --- compat.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/compat.h b/compat.h index b0867206..2a7003b5 100644 --- a/compat.h +++ b/compat.h @@ -70,6 +70,25 @@ static inline int setpriority(__maybe_unused int which, __maybe_unused int who, return 0; } +//implement strsep() for windows +static char* strsep(char** stringp, const char* delim) +{ + char* start = *stringp; + char* p; + + p = ((start != NULL)?strpbrk(start, delim):NULL); + + if(p == NULL) + *stringp = NULL; + else + { + *p = '\0'; + *stringp = p + 1; + } + + return start; +} + typedef unsigned long int ulong; typedef unsigned short int ushort; typedef unsigned int uint; From 19651317844d7f7760b70bd0224c256709a9a1f1 Mon Sep 17 00:00:00 2001 From: troky Date: Sat, 21 Jun 2014 10:33:06 +0200 Subject: [PATCH 4/7] Moved strsep() implementation from winbuild.h to compat.h --- compat.h | 29 +++++++++++++++++------------ winbuild/dist/include/winbuild.h | 32 ++++---------------------------- 2 files changed, 21 insertions(+), 40 deletions(-) diff --git a/compat.h b/compat.h index 2a7003b5..582348a4 100644 --- a/compat.h +++ b/compat.h @@ -70,24 +70,29 @@ static inline int setpriority(__maybe_unused int which, __maybe_unused int who, return 0; } -//implement strsep() for windows -static char* strsep(char** stringp, const char* delim) +#ifndef HAVE_STRSEP +inline char *strsep(char **stringp, const char *delim) { - char* start = *stringp; - char* p; + char *res; - p = ((start != NULL)?strpbrk(start, delim):NULL); + if (!stringp || !*stringp || !**stringp) { + return NULL; + } + + res = *stringp; + while(**stringp && !strchr(delim, **stringp)) { + ++(*stringp); + } - if(p == NULL) - *stringp = NULL; - else - { - *p = '\0'; - *stringp = p + 1; + if (**stringp) { + **stringp = '\0'; + ++(*stringp); } - return start; + return res; } +#endif + typedef unsigned long int ulong; typedef unsigned short int ushort; diff --git a/winbuild/dist/include/winbuild.h b/winbuild/dist/include/winbuild.h index 014f8461..a5b5da07 100644 --- a/winbuild/dist/include/winbuild.h +++ b/winbuild/dist/include/winbuild.h @@ -1,6 +1,5 @@ -#ifndef __WINBUILD_H__ -#define __WINBUILD_H__ -#endif +#ifndef WINBUILD_H +#define WINBUILD_H #if defined(_MSC_VER) @@ -131,29 +130,6 @@ inline void* memmem (void* buf, size_t buflen, void* pat, size_t patlen) return 0; } -#ifndef HAVE_STRSEP -inline char *strsep(char **stringp, const char *delim) -{ - char *res; - - if (!stringp || !*stringp || !**stringp) { - return NULL; - } - - res = *stringp; - while(**stringp && !strchr(delim, **stringp)) { - ++(*stringp); - } - - if (**stringp) { - **stringp = '\0'; - ++(*stringp); - } - - return res; -} -#endif - #define va_copy(a, b) memcpy(&(a), &(b), sizeof(va_list)) #define usleep(x) Sleep((x)/1000) @@ -162,5 +138,5 @@ inline char *strsep(char **stringp, const char *delim) #define __func__ __FUNCTION__ #define __attribute__(x) - -#endif \ No newline at end of file +#endif /* _MSC_VER */ +#endif /* WINBUILD_H */ From 5e8bba2af9b2e8426ec8f03690ae680624f0884c Mon Sep 17 00:00:00 2001 From: ystarnaud Date: Sat, 21 Jun 2014 04:46:05 -0400 Subject: [PATCH 5/7] Update configuration.md Added more details about the default profile, globals and profiles along with example configurations. --- doc/configuration.md | 264 +++++++++++++++++++++++++++++++++---------- 1 file changed, 204 insertions(+), 60 deletions(-) diff --git a/doc/configuration.md b/doc/configuration.md index c2c1fec6..6839f2c2 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -4,18 +4,214 @@ ### Table of contents -* [CLI Only options](#cli-only-options) -* [Config-file and CLI options](#config-file-and-cli-options) +* [Configuration Settings Order](#configuration-settings-order) +* [Globals and the Default Profile](#globals-and-the-default-profile) * [Working with Profiles and Pool Specific Settings](#working-with-profiles-and-pool-specific-settings) * [Includes](#includes) +* [CLI Only options](#cli-only-options) +* [Config-file and CLI options](#config-file-and-cli-options) + +--- -### Configuration Settings Order +## Configuration Settings Order The configuration settings in sgminer are applied in this order: ``` -Command Line > Config File Globals/Default Profile > Pool Profile > Pool Specific +Command Line > Config File Globals > Default Profile > Pool's Profile > Pool-Specific Settings +``` + +[Top](#configuration-and-command-line-options) + +## Globals and the Default Profile + +The default profile contains the settings that are to be used as defaults throughout sgminer. Typically, unless you specify `default-profile`, those settings will be read from the global level of the config file or use sgminer's core defaults if nothing is at the global level. The pool or profile level settings will override the default profile's settings. + +The example below has `algorithm` set at the global level. Anytime a pool or profile doesn't specify `algorithm`, "darkcoin-mod" will be used. +``` +{ + "pools": [...], + "algorithm":"darkcoin-mod", + "intensity":"19", + ... +``` + +In the example below, `algorithm` is not specified at the global level and no profile is used as `default-profile`. This means that the default profile's `algorithm` will be set to sgminer's core default: "scrypt". +``` +{ + "pools": [ + { + "url":"poolA:8334", + ... + "profile":"A" + }, + { + "url":"poolB:8334", + ... + } + ], + "profiles":[ + { + "name":"A", + "algorithm":"darkcoin-mod" + } + ], + "intensity":"19" +} +``` +When using the first pool, Profile A will be applied, so `algorithm` will be set to "darkcoin-mod". When using the second pool, the default profile is applied, and `algorithm` will be set to "scrypt". `intensity`, being set at the global level, will be the default profile's `intensity` value. `intensity` will be set to "19" for both pools, because it is never specified in the pool or profile settings. + +When `default-profile` is specified, any settings contained in that profile will override globals. For example: +``` + "pools": [ + { + "url":"poolA:8334", + ... + "profile":"A" + }, + { + "url":"poolB:8334", + ... + } + ], + "profiles":[ + { + "name":"A", + "algorithm":"darkcoin-mod" + }, + { + "name":"B", + "algorithm":"ckolivas" + } + ], + "default-profile":"B", + "algorithm":"marucoin-mod", + "intensity":"19" +} ``` +Profile B will be used to set the default profile's settings, which means `algorithm` will be set to "ckolivas" and the global value of "marucoin-mod" will be discarded. The first pool will use Profile A's "darkcoin-mod" and the second pool will use the default profile's "ckolivas". + +See the [configuration settings order](#configuration-settings-order) for more information about the order in which settings are applied. + +[Top](#configuration-and-command-line-options) + +## Working with Profiles and Pool Specific Settings + +Profiles have been added assist in specifying different GPU and/or algorithm settings that could be (re-)used by one or more pools. Pool-specific settings will override profile settings, and profile settings will override the default profile/globals. + +See the [configuration settings order](#configuration-settings-order) for more information about the order in which settings are applied. + +``` + "pools": [ + { + "url":"poolA:8334", + ... + "profile":"A" + }, + { + "url":"poolB:8334", + ... + "profile":"A", + "gpu-engine":"1000" + }, + { + "url":"poolC:8334", + ... + "intensity":"13" + } + ], + "profiles":[ + { + "name":"A", + "algorithm":"darkcoin-mod", + "gpu-engine":"1050" + }, + { + "name":"B", + "algorithm":"ckolivas" + } + ], + "default-profile":"B", + "intensity":"19", + "gpu-engine":"1100" +} +``` +In the example above, when using the second pool, Profile A is applied, which sets the `algorithm` to "darkcoin-mod", but since a `gpu-engine` of "1000" is specified in the pool, the value of "1050" is discarded. + +A similar situation occurs in the third pool. No profile is specified so the default `algorithm` "ckolivas" is set along with the default `gpu-engine` of "1100". Because `intensity` is set to "13" in the pool, the default profile's value of "19" is discarded. + +The end result of the above would look like this: +``` + "pools": [ + { + "url":"poolA:8334", + ... + "algorithm":"darkcoin-mod", + "intensity":"19", + "gpu-engine":"1050" + }, + { + "url":"poolB:8334", + ... + "algorithm":"darkcoin-mod", + "intensity":"19", + "gpu-engine":"1000" + }, + { + "url":"poolC:8334", + ... + "algorithm":"ckolivas" + "intensity":"13" + "gpu-engine":"1100" + } + ] +} +``` + + +[Top](#configuration-and-command-line-options) + +## Includes + +`Include` is a special keyword only available in the configuration file. You can include json-formatted files at any level of the configuration parsing. The values read in the included +files are applied to the current object being parsed. + +``` +/etc/pool.ip.credentials: +{ + "user":"user", + "pass":"x" +} + +sgminer.conf: +... +"pools":[ + { + "url":"stratum+tcp://pool.ip:8334", + "include":"/etc/pool.ip.credentials" + } +], +... +``` + +In the example above, the parser will include the contents of the file `/etc/pool.ip.credentials` directly where it was called from. This will produce the following result: + +``` +sgminer.conf: +... +"pools":[ + { + "url":"stratum+tcp://pool.ip:8334", + "user":"user", + "pass":"x" + } +], +... +``` + +There is no limit as to how includes can be used as long as they follow proper json syntax. + +[Top](#configuration-and-command-line-options) --- @@ -35,9 +231,9 @@ Load a JSON-formatted configuration file. See `example.conf` for an example conf Note that the configuration file's settings will override any settings passed via command line. For more information, see [Configuration Settings Order](#configuration-settings-order). -*Syntax:* `--config ` or `-c ` +*Syntax:* `--config ` or `-c ` -*Argument:* string +*Argument:* `string` Filename *Example:* @@ -51,9 +247,9 @@ Note that the configuration file's settings will override any settings passed vi Specifies the name of the default configuration file to be loaded at start up and also used to save any settings changes during operation. -*Syntax:* `--default-config ` +*Syntax:* `--default-config ` -*Argument:* string +*Argument:* `string` Filename *Example:* @@ -2121,55 +2317,3 @@ Displays extra work time debug information. *Default:* `false` [Top](#configuration-and-command-line-options) :: [Config-file and CLI options](#config-file-and-cli-options) :: [Miscellaneous Options](#miscellaneous-options) - ---- - -## Working with Profiles and Pool Specific Settings - -Profiles are there to assist you in specifying different GPU or algorithm settings that could be (re-)used by one or more pools. - -[Top](#configuration-and-command-line-options) - ---- - -## Includes - -`Include` is a special keyword only available in the configuration file. You can include json-formatted files at any level of the configuration parsing. The values read in the included -files are applied to the current object being parsed. - -``` -/etc/pool.ip.credentials: -{ - "user":"user", - "pass":"x" -} - -sgminer.conf: -... -"pools":[ - { - "url":"stratum+tcp://pool.ip:8334", - "include":"/etc/pool.ip.credentials" - } -], -... -``` - -In the example above, the parser will include the contents of the file `/etc/pool.ip.credentials` directly where it was called from. This will produce the following result: - -``` -sgminer.conf: -... -"pools":[ - { - "url":"stratum+tcp://pool.ip:8334", - "user":"user", - "pass":"x" - } -], -... -``` - -There is no limit as to how includes can be used as long as they follow proper json syntax. - -[Top](#configuration-and-command-line-options) From 1fb86b33778333690203d4cad29192f3f48727d8 Mon Sep 17 00:00:00 2001 From: ystarnaud Date: Sat, 21 Jun 2014 04:47:31 -0400 Subject: [PATCH 6/7] Update configuration.md --- doc/configuration.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/configuration.md b/doc/configuration.md index 6839f2c2..caabd13f 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -63,6 +63,7 @@ When using the first pool, Profile A will be applied, so `algorithm` will be set When `default-profile` is specified, any settings contained in that profile will override globals. For example: ``` +{ "pools": [ { "url":"poolA:8334", @@ -102,6 +103,7 @@ Profiles have been added assist in specifying different GPU and/or algorithm set See the [configuration settings order](#configuration-settings-order) for more information about the order in which settings are applied. ``` +{ "pools": [ { "url":"poolA:8334", @@ -142,6 +144,7 @@ A similar situation occurs in the third pool. No profile is specified so the def The end result of the above would look like this: ``` +{ "pools": [ { "url":"poolA:8334", @@ -168,7 +171,6 @@ The end result of the above would look like this: } ``` - [Top](#configuration-and-command-line-options) ## Includes From 56055754eb682cdf9b063fb3f4a366be28d06850 Mon Sep 17 00:00:00 2001 From: ystarnaud Date: Sat, 21 Jun 2014 12:31:48 -0400 Subject: [PATCH 7/7] Whitespace and indentation fix Fixed whitespace and indentation of lasybear's pull request #272. --- algorithm/talkcoin.c | 131 +++++----- algorithm/talkcoin.h | 3 +- kernel/talkcoin-mod.cl | 528 +++++++++++++++++++++-------------------- 3 files changed, 342 insertions(+), 320 deletions(-) diff --git a/algorithm/talkcoin.c b/algorithm/talkcoin.c index ba0077af..aa5f737c 100644 --- a/algorithm/talkcoin.c +++ b/algorithm/talkcoin.c @@ -38,24 +38,24 @@ #include "sph/sph_skein.h" /* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ -typedef struct { - sph_blake512_context blake1; - sph_groestl512_context groestl1; - sph_jh512_context jh1; - sph_keccak512_context keccak1; - sph_skein512_context skein1; +typedef struct +{ + sph_blake512_context blake1; + sph_groestl512_context groestl1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_skein512_context skein1; } Xhash_context_holder; Xhash_context_holder base_contexts; - void init_Nhash_contexts() { - sph_blake512_init(&base_contexts.blake1); - sph_groestl512_init(&base_contexts.groestl1); - sph_jh512_init(&base_contexts.jh1); - sph_keccak512_init(&base_contexts.keccak1); - sph_skein512_init(&base_contexts.skein1); + sph_blake512_init(&base_contexts.blake1); + sph_groestl512_init(&base_contexts.groestl1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_skein512_init(&base_contexts.skein1); } /* @@ -65,39 +65,39 @@ void init_Nhash_contexts() static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) { - uint32_t i; + uint32_t i; - for (i = 0; i < len; i++) - dst[i] = htobe32(src[i]); + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); } inline void talkhash(void *state, const void *input) { - init_Nhash_contexts(); + init_Nhash_contexts(); - Xhash_context_holder ctx; + Xhash_context_holder ctx; - uint32_t hashA[16], hashB[16]; - //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo - memcpy(&ctx, &base_contexts, sizeof(base_contexts)); + uint32_t hashA[16], hashB[16]; + //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo + memcpy(&ctx, &base_contexts, sizeof(base_contexts)); - sph_blake512 (&ctx.blake1, input, 80); - sph_blake512_close (&ctx.blake1, hashA); + sph_blake512 (&ctx.blake1, input, 80); + sph_blake512_close (&ctx.blake1, hashA); - sph_groestl512 (&ctx.groestl1, hashA, 64); - sph_groestl512_close(&ctx.groestl1, hashB); + sph_groestl512 (&ctx.groestl1, hashA, 64); + sph_groestl512_close(&ctx.groestl1, hashB); - sph_jh512 (&ctx.jh1, hashB, 64); - sph_jh512_close(&ctx.jh1, hashA); + sph_jh512 (&ctx.jh1, hashB, 64); + sph_jh512_close(&ctx.jh1, hashA); - sph_keccak512 (&ctx.keccak1, hashA, 64); - sph_keccak512_close(&ctx.keccak1, hashB); + sph_keccak512 (&ctx.keccak1, hashA, 64); + sph_keccak512_close(&ctx.keccak1, hashB); - sph_skein512 (&ctx.skein1, hashB, 64); - sph_skein512_close(&ctx.skein1, hashA); + sph_skein512 (&ctx.skein1, hashB, 64); + sph_skein512_close(&ctx.skein1, hashA); - memcpy(state, hashA, 32); + memcpy(state, hashA, 32); } static const uint32_t diff1targ = 0x0000ffff; @@ -114,26 +114,29 @@ int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t n tmp_hash7 = be32toh(ohash[7]); - applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", - (long unsigned int)Htarg, - (long unsigned int)diff1targ, - (long unsigned int)tmp_hash7); + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) return -1; + if (tmp_hash7 > Htarg) return 0; + return 1; } void talkcoin_regenhash(struct work *work) { - uint32_t data[20]; - uint32_t *nonce = (uint32_t *)(work->data + 76); - uint32_t *ohash = (uint32_t *)(work->hash); + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); - be32enc_vect(data, (const uint32_t *)work->data, 19); - data[19] = htobe32(*nonce); - talkhash(ohash, data); + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + talkhash(ohash, data); } bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, @@ -149,28 +152,30 @@ bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused be32enc_vect(data, (const uint32_t *)pdata, 19); - while(1) { - uint32_t ostate[8]; - - *nonce = ++n; - data[19] = (n); - talkhash(ostate, data); - tmp_hash7 = (ostate[7]); - - applog(LOG_INFO, "data7 %08lx", - (long unsigned int)data[7]); - - if (unlikely(tmp_hash7 <= Htarg)) { - ((uint32_t *)pdata)[19] = htobe32(n); - *last_nonce = n; - ret = true; - break; - } - - if (unlikely((n >= max_nonce) || thr->work_restart)) { - *last_nonce = n; - break; - } + while(1) + { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + talkhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) + { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) + { + *last_nonce = n; + break; + } } return ret; diff --git a/algorithm/talkcoin.h b/algorithm/talkcoin.h index 551a20a6..35a3920f 100644 --- a/algorithm/talkcoin.h +++ b/algorithm/talkcoin.h @@ -3,8 +3,7 @@ #include "miner.h" -extern int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, - uint32_t nonce); +extern int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); extern void talkcoin_regenhash(struct work *work); #endif /* TALKCOIN_H */ diff --git a/kernel/talkcoin-mod.cl b/kernel/talkcoin-mod.cl index a8c94a3c..5813cf50 100644 --- a/kernel/talkcoin-mod.cl +++ b/kernel/talkcoin-mod.cl @@ -33,9 +33,9 @@ #define TALKCOIN_MOD_CL #if __ENDIAN_LITTLE__ -#define SPH_LITTLE_ENDIAN 1 + #define SPH_LITTLE_ENDIAN 1 #else -#define SPH_BIG_ENDIAN 1 + #define SPH_BIG_ENDIAN 1 #endif #define SPH_UPTR sph_u64 @@ -43,11 +43,11 @@ typedef unsigned int sph_u32; typedef int sph_s32; #ifndef __OPENCL_VERSION__ -typedef unsigned long long sph_u64; -typedef long long sph_s64; + typedef unsigned long long sph_u64; + typedef long long sph_s64; #else -typedef unsigned long sph_u64; -typedef long sph_s64; + typedef unsigned long sph_u64; + typedef long sph_s64; #endif #define SPH_64 1 @@ -81,204 +81,219 @@ typedef long sph_s64; #define SWAP8(x) as_ulong(as_uchar8(x).s76543210) #if SPH_BIG_ENDIAN - #define DEC64E(x) (x) - #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); #else - #define DEC64E(x) SWAP8(x) - #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); #endif typedef union { - unsigned char h1[64]; - uint h4[16]; - ulong h8[8]; + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; } hash_t; // blake __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global unsigned char* block, __global hash_t* hashes) { - uint gid = get_global_id(0); - __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) - { - T1 = SPH_T64(T1 + 1); - } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = DEC64BE(block + 0); - M1 = DEC64BE(block + 8); - M2 = DEC64BE(block + 16); - M3 = DEC64BE(block + 24); - M4 = DEC64BE(block + 32); - M5 = DEC64BE(block + 40); - M6 = DEC64BE(block + 48); - M7 = DEC64BE(block + 56); - M8 = DEC64BE(block + 64); - M9 = DEC64BE(block + 72); - M9 &= 0xFFFFFFFF00000000; - M9 ^= SWAP4(gid); - MA = 0x8000000000000000; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x280; - - COMPRESS64; - - hash->h8[0] = H0; - hash->h8[1] = H1; - hash->h8[2] = H2; - hash->h8[3] = H3; - hash->h8[4] = H4; - hash->h8[5] = H5; - hash->h8[6] = H6; - hash->h8[7] = H7; - - barrier(CLK_GLOBAL_MEM_FENCE); + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); } // groestl __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search1(__global hash_t* hashes) { - uint gid = get_global_id(0); - __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - - __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; - - int init = get_local_id(0); - int step = get_local_size(0); - - for (int i = init; i < 256; i += step) - { - T0_L[i] = T0[i]; - T1_L[i] = T1[i]; - T2_L[i] = T2[i]; - T3_L[i] = T3[i]; - T4_L[i] = T4[i]; - T5_L[i] = T5[i]; - T6_L[i] = T6[i]; - T7_L[i] = T7[i]; - } - barrier(CLK_LOCAL_MEM_FENCE); - -#define T0 T0_L -#define T1 T1_L -#define T2 T2_L -#define T3 T3_L -#define T4 T4_L -#define T5 T5_L -#define T6 T6_L -#define T7 T7_L - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T4_L[i] = T4[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #define T0 T0_L + #define T1 T1_L + #define T2 T2_L + #define T3 T3_L + #define T4 T4_L + #define T5 T5_L + #define T6 T6_L + #define T7 T7_L + + sph_u64 H[16]; + + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + + #if USE_LE H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else + #else H[15] = (sph_u64)512; -#endif - - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash->h8[0]); - m[1] = DEC64E(hash->h8[1]); - m[2] = DEC64E(hash->h8[2]); - m[3] = DEC64E(hash->h8[3]); - m[4] = DEC64E(hash->h8[4]); - m[5] = DEC64E(hash->h8[5]); - m[6] = DEC64E(hash->h8[6]); - m[7] = DEC64E(hash->h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash->h8[u] = DEC64E(H[u + 8]); - - barrier(CLK_GLOBAL_MEM_FENCE); + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash->h8[0]); + m[1] = DEC64E(hash->h8[1]); + m[2] = DEC64E(hash->h8[2]); + m[3] = DEC64E(hash->h8[3]); + m[4] = DEC64E(hash->h8[4]); + m[5] = DEC64E(hash->h8[5]); + m[6] = DEC64E(hash->h8[6]); + m[7] = DEC64E(hash->h8[7]); + + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + + PERM_BIG_P(g); + PERM_BIG_Q(m); + + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + + sph_u64 xH[16]; + + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + + PERM_BIG_P(xH); + + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + + for (unsigned int u = 0; u < 8; u ++) + hash->h8[u] = DEC64E(H[u + 8]); + + barrier(CLK_GLOBAL_MEM_FENCE); } // jh __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search2(__global hash_t* hashes) { - uint gid = get_global_id(0); - __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; - - for(int i = 0; i < 2; i++) + for(int i = 0; i < 2; i++) + { + if (i == 0) + { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } + else if(i == 1) { - if (i == 0) { - h0h ^= DEC64E(hash->h8[0]); - h0l ^= DEC64E(hash->h8[1]); - h1h ^= DEC64E(hash->h8[2]); - h1l ^= DEC64E(hash->h8[3]); - h2h ^= DEC64E(hash->h8[4]); - h2l ^= DEC64E(hash->h8[5]); - h3h ^= DEC64E(hash->h8[6]); - h3l ^= DEC64E(hash->h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash->h8[0]); - h4l ^= DEC64E(hash->h8[1]); - h5h ^= DEC64E(hash->h8[2]); - h5l ^= DEC64E(hash->h8[3]); - h6h ^= DEC64E(hash->h8[4]); - h6l ^= DEC64E(hash->h8[5]); - h7h ^= DEC64E(hash->h8[6]); - h7l ^= DEC64E(hash->h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; - } - E8; + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash->h8[0] = DEC64E(h4h); - hash->h8[1] = DEC64E(h4l); - hash->h8[2] = DEC64E(h5h); - hash->h8[3] = DEC64E(h5l); - hash->h8[4] = DEC64E(h6h); - hash->h8[5] = DEC64E(h6l); - hash->h8[6] = DEC64E(h7h); - hash->h8[7] = DEC64E(h7l); - - barrier(CLK_GLOBAL_MEM_FENCE); + E8; + } + + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); } @@ -286,48 +301,49 @@ __kernel void search2(__global hash_t* hashes) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search3(__global hash_t* hashes) { - uint gid = get_global_id(0); - __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - - // keccak - - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash->h8[0]); - a10 ^= SWAP8(hash->h8[1]); - a20 ^= SWAP8(hash->h8[2]); - a30 ^= SWAP8(hash->h8[3]); - a40 ^= SWAP8(hash->h8[4]); - a01 ^= SWAP8(hash->h8[5]); - a11 ^= SWAP8(hash->h8[6]); - a21 ^= SWAP8(hash->h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash->h8[0] = SWAP8(a00); - hash->h8[1] = SWAP8(a10); - hash->h8[2] = SWAP8(a20); - hash->h8[3] = SWAP8(a30); - hash->h8[4] = SWAP8(a40); - hash->h8[5] = SWAP8(a01); - hash->h8[6] = SWAP8(a11); - hash->h8[7] = SWAP8(a21); - - barrier(CLK_GLOBAL_MEM_FENCE); + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); } @@ -335,44 +351,46 @@ __kernel void search3(__global hash_t* hashes) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search4(__global hash_t* hashes, __global uint* output, const ulong target) { - uint gid = get_global_id(0); - uint offset = get_global_offset(0); - hash_t hash; - __global hash_t *hashp = &(hashes[gid-offset]); - - for (int i = 0; i < 8; i++) { - hash.h8[i] = hashes[gid-offset].h8[i]; - } - - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); - - bool result = (SWAP8(hash.h8[3]) <= target); - if (result) - output[atomic_inc(output+0xFF)] = SWAP4(gid); - + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + __global hash_t *hashp = &(hashes[gid-offset]); + + for (int i = 0; i < 8; i++) + hash.h8[i] = hashes[gid-offset].h8[i]; + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + + UBI_BIG(480, 64); + + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + + UBI_BIG(510, 8); + + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + + bool result = (SWAP8(hash.h8[3]) <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); } #endif // TALKCOIN_MOD_CL