mirror of
https://github.com/GOSTSec/sgminer
synced 2025-03-13 06:01:03 +00:00
X14 Implementation
Added X14 and cleaned up the X13/X15 kernels so all 3 offshoots are in sync. New option "--hamsi-short" or "hamsi-short":true to add a small boost. May not work on all GPUs.
This commit is contained in:
parent
e0647546de
commit
5c9126fd61
@ -63,6 +63,7 @@ sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h
|
||||
sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h
|
||||
sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h
|
||||
sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h
|
||||
sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
|
||||
|
||||
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl
|
||||
|
||||
|
110
algorithm.c
110
algorithm.c
@ -27,6 +27,7 @@
|
||||
#include "algorithm/maxcoin.h"
|
||||
#include "algorithm/talkcoin.h"
|
||||
#include "algorithm/bitblock.h"
|
||||
#include "algorithm/x14.h"
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
@ -40,6 +41,8 @@ const char *algorithm_type_str[] = {
|
||||
"NScrypt",
|
||||
"X11",
|
||||
"X13",
|
||||
"X14",
|
||||
"X15",
|
||||
"Keccak",
|
||||
"Quarkcoin",
|
||||
"Twecoin",
|
||||
@ -91,11 +94,11 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru
|
||||
static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
|
||||
{
|
||||
char buf[255];
|
||||
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d",
|
||||
opt_hamsi_expand_big);
|
||||
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d%s ",
|
||||
opt_hamsi_expand_big, ((opt_hamsi_short)?" -D SPH_HAMSI_SHORT=1 ":""));
|
||||
strcat(data->compiler_options, buf);
|
||||
|
||||
sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big);
|
||||
sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?"hs":""));
|
||||
strcat(data->binary_filename, buf);
|
||||
}
|
||||
|
||||
@ -419,6 +422,100 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_x14_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel;
|
||||
unsigned int num;
|
||||
cl_ulong le_target;
|
||||
cl_int status = 0;
|
||||
|
||||
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||
flip80(clState->cldata, blk->work->data);
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
||||
|
||||
// blake - search
|
||||
kernel = &clState->kernel;
|
||||
num = 0;
|
||||
CL_SET_ARG(clState->CLbuffer0);
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
// bmw - search1
|
||||
kernel = clState->extra_kernels;
|
||||
CL_SET_ARG_0(clState->padbuffer8);
|
||||
// groestl - search2
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// skein - search3
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// jh - search4
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// keccak - search5
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// luffa - search6
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// cubehash - search7
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// shavite - search8
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// simd - search9
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// echo - search10
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// hamsi - search11
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// fugue - search12
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// shabal - search13
|
||||
num = 0;
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_x14_old_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel;
|
||||
unsigned int num;
|
||||
cl_ulong le_target;
|
||||
cl_int status = 0;
|
||||
|
||||
le_target = *(cl_ulong *)(blk->work->device_target + 24);
|
||||
flip80(clState->cldata, blk->work->data);
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
|
||||
|
||||
// blake - search
|
||||
kernel = &clState->kernel;
|
||||
num = 0;
|
||||
CL_SET_ARG(clState->CLbuffer0);
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
// bmw - search1
|
||||
kernel = clState->extra_kernels;
|
||||
CL_SET_ARG_0(clState->padbuffer8);
|
||||
// groestl - search2
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// skein - search3
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// jh - search4
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// keccak - search5
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// luffa - search6
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// cubehash - search7
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// shavite - search8
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// simd - search9
|
||||
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
|
||||
// combined echo, hamsi, fugue - shabal - search10
|
||||
num = 0;
|
||||
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(le_target);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
typedef struct _algorithm_settings_t {
|
||||
const char *name; /* Human-readable identifier */
|
||||
algorithm_type_t type; //common algorithm type
|
||||
@ -477,8 +574,13 @@ static algorithm_settings_t algos[] = {
|
||||
{ "marucoin-mod", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
{ "marucoin-modold", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
|
||||
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL},
|
||||
{ "x14", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
{ "x14old", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
|
||||
{ "bitblock", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
{ "bitblockold", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_hamsi_compiler_options},
|
||||
|
||||
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL},
|
||||
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
|
||||
#define A_FUGUE(a, b) \
|
||||
{ a, ALGO_FUGUE, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL}
|
||||
|
@ -16,6 +16,8 @@ typedef enum {
|
||||
ALGO_NSCRYPT,
|
||||
ALGO_X11,
|
||||
ALGO_X13,
|
||||
ALGO_X14,
|
||||
ALGO_X15,
|
||||
ALGO_KECCAK,
|
||||
ALGO_QUARK,
|
||||
ALGO_TWE,
|
||||
|
247
algorithm/x14.c
Normal file
247
algorithm/x14.c
Normal file
@ -0,0 +1,247 @@
|
||||
/*-
|
||||
* Copyright 2009 Colin Percival, 2011 ArtForz
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "miner.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "sph/sph_blake.h"
|
||||
#include "sph/sph_bmw.h"
|
||||
#include "sph/sph_groestl.h"
|
||||
#include "sph/sph_jh.h"
|
||||
#include "sph/sph_keccak.h"
|
||||
#include "sph/sph_skein.h"
|
||||
#include "sph/sph_luffa.h"
|
||||
#include "sph/sph_cubehash.h"
|
||||
#include "sph/sph_shavite.h"
|
||||
#include "sph/sph_simd.h"
|
||||
#include "sph/sph_echo.h"
|
||||
#include "sph/sph_hamsi.h"
|
||||
#include "sph/sph_fugue.h"
|
||||
#include "sph/sph_shabal.h"
|
||||
|
||||
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
|
||||
typedef struct {
|
||||
sph_blake512_context blake1;
|
||||
sph_bmw512_context bmw1;
|
||||
sph_groestl512_context groestl1;
|
||||
sph_skein512_context skein1;
|
||||
sph_jh512_context jh1;
|
||||
sph_keccak512_context keccak1;
|
||||
sph_luffa512_context luffa1;
|
||||
sph_cubehash512_context cubehash1;
|
||||
sph_shavite512_context shavite1;
|
||||
sph_simd512_context simd1;
|
||||
sph_echo512_context echo1;
|
||||
sph_hamsi512_context hamsi1;
|
||||
sph_fugue512_context fugue1;
|
||||
sph_shabal512_context shabal1;
|
||||
} Xhash_context_holder;
|
||||
|
||||
static Xhash_context_holder base_contexts;
|
||||
|
||||
void init_X14hash_contexts()
|
||||
{
|
||||
sph_blake512_init(&base_contexts.blake1);
|
||||
sph_bmw512_init(&base_contexts.bmw1);
|
||||
sph_groestl512_init(&base_contexts.groestl1);
|
||||
sph_skein512_init(&base_contexts.skein1);
|
||||
sph_jh512_init(&base_contexts.jh1);
|
||||
sph_keccak512_init(&base_contexts.keccak1);
|
||||
sph_luffa512_init(&base_contexts.luffa1);
|
||||
sph_cubehash512_init(&base_contexts.cubehash1);
|
||||
sph_shavite512_init(&base_contexts.shavite1);
|
||||
sph_simd512_init(&base_contexts.simd1);
|
||||
sph_echo512_init(&base_contexts.echo1);
|
||||
sph_hamsi512_init(&base_contexts.hamsi1);
|
||||
sph_fugue512_init(&base_contexts.fugue1);
|
||||
sph_shabal512_init(&base_contexts.shabal1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode a length len/4 vector of (uint32_t) into a length len vector of
|
||||
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
|
||||
*/
|
||||
static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = htobe32(src[i]);
|
||||
}
|
||||
|
||||
|
||||
inline void x14hash(void *state, const void *input)
|
||||
{
|
||||
init_X14hash_contexts();
|
||||
|
||||
Xhash_context_holder ctx;
|
||||
|
||||
uint32_t hashA[16], hashB[16];
|
||||
//blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo
|
||||
memcpy(&ctx, &base_contexts, sizeof(base_contexts));
|
||||
|
||||
sph_blake512 (&ctx.blake1, input, 80);
|
||||
sph_blake512_close (&ctx.blake1, hashA);
|
||||
|
||||
sph_bmw512 (&ctx.bmw1, hashA, 64);
|
||||
sph_bmw512_close(&ctx.bmw1, hashB);
|
||||
|
||||
sph_groestl512 (&ctx.groestl1, hashB, 64);
|
||||
sph_groestl512_close(&ctx.groestl1, hashA);
|
||||
|
||||
sph_skein512 (&ctx.skein1, hashA, 64);
|
||||
sph_skein512_close(&ctx.skein1, hashB);
|
||||
|
||||
sph_jh512 (&ctx.jh1, hashB, 64);
|
||||
sph_jh512_close(&ctx.jh1, hashA);
|
||||
|
||||
sph_keccak512 (&ctx.keccak1, hashA, 64);
|
||||
sph_keccak512_close(&ctx.keccak1, hashB);
|
||||
|
||||
sph_luffa512 (&ctx.luffa1, hashB, 64);
|
||||
sph_luffa512_close (&ctx.luffa1, hashA);
|
||||
|
||||
sph_cubehash512 (&ctx.cubehash1, hashA, 64);
|
||||
sph_cubehash512_close(&ctx.cubehash1, hashB);
|
||||
|
||||
sph_shavite512 (&ctx.shavite1, hashB, 64);
|
||||
sph_shavite512_close(&ctx.shavite1, hashA);
|
||||
|
||||
sph_simd512 (&ctx.simd1, hashA, 64);
|
||||
sph_simd512_close(&ctx.simd1, hashB);
|
||||
|
||||
sph_echo512 (&ctx.echo1, hashB, 64);
|
||||
sph_echo512_close(&ctx.echo1, hashA);
|
||||
|
||||
sph_hamsi512 (&ctx.hamsi1, hashA, 64);
|
||||
sph_hamsi512_close(&ctx.hamsi1, hashB);
|
||||
|
||||
sph_fugue512 (&ctx.fugue1, hashB, 64);
|
||||
sph_fugue512_close(&ctx.fugue1, hashA);
|
||||
|
||||
sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64);
|
||||
sph_shabal512_close(&ctx.shabal1, hashB);
|
||||
|
||||
memcpy(state, hashB, 32);
|
||||
}
|
||||
|
||||
static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
/* Used externally as confirmation of correct OCL code */
|
||||
int x14_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
{
|
||||
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||
uint32_t data[20], ohash[8];
|
||||
|
||||
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||
data[19] = htobe32(nonce);
|
||||
x14hash(ohash, data);
|
||||
tmp_hash7 = be32toh(ohash[7]);
|
||||
|
||||
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
|
||||
(long unsigned int)Htarg,
|
||||
(long unsigned int)diff1targ,
|
||||
(long unsigned int)tmp_hash7);
|
||||
|
||||
if (tmp_hash7 > diff1targ)
|
||||
return -1;
|
||||
|
||||
if (tmp_hash7 > Htarg)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void x14_regenhash(struct work *work)
|
||||
{
|
||||
uint32_t data[20];
|
||||
uint32_t *nonce = (uint32_t *)(work->data + 76);
|
||||
uint32_t *ohash = (uint32_t *)(work->hash);
|
||||
|
||||
be32enc_vect(data, (const uint32_t *)work->data, 19);
|
||||
data[19] = htobe32(*nonce);
|
||||
x14hash(ohash, data);
|
||||
}
|
||||
|
||||
static inline void be32enc(void *pp, uint32_t x)
|
||||
{
|
||||
uint8_t *p = (uint8_t *)pp;
|
||||
p[3] = x & 0xff;
|
||||
p[2] = (x >> 8) & 0xff;
|
||||
p[1] = (x >> 16) & 0xff;
|
||||
p[0] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
bool scanhash_x14(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
|
||||
unsigned char *pdata, unsigned char __maybe_unused *phash1,
|
||||
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
|
||||
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
|
||||
{
|
||||
uint32_t *nonce = (uint32_t *)(pdata + 76);
|
||||
uint32_t data[20];
|
||||
uint32_t tmp_hash7;
|
||||
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||
bool ret = false;
|
||||
|
||||
be32enc_vect(data, (const uint32_t *)pdata, 19);
|
||||
|
||||
while(1)
|
||||
{
|
||||
uint32_t ostate[8];
|
||||
*nonce = ++n;
|
||||
data[19] = (n);
|
||||
x14hash(ostate, data);
|
||||
tmp_hash7 = (ostate[7]);
|
||||
|
||||
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
|
||||
|
||||
if(unlikely(tmp_hash7 <= Htarg))
|
||||
{
|
||||
((uint32_t *)pdata)[19] = htobe32(n);
|
||||
*last_nonce = n;
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely((n >= max_nonce) || thr->work_restart))
|
||||
{
|
||||
*last_nonce = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
10
algorithm/x14.h
Normal file
10
algorithm/x14.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef X14_H
|
||||
#define X14_H
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
extern int x14_test(unsigned char *pdata, const unsigned char *ptarget,
|
||||
uint32_t nonce);
|
||||
extern void x14_regenhash(struct work *work);
|
||||
|
||||
#endif /* X14_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -95,8 +95,6 @@
|
||||
#include "shavite.cl"
|
||||
#include "simd.cl"
|
||||
#include "echo.cl"
|
||||
#include "hamsi.cl"
|
||||
#include "fugue.cl"
|
||||
|
||||
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
|
||||
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
|
||||
@ -181,7 +179,7 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
__kernel void search1(__global hash_t* hashes)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
uint gid = get_global_id(0);
|
||||
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||
|
||||
// bmw
|
||||
@ -456,93 +454,69 @@ __kernel void search2(__global hash_t* hashes)
|
||||
uint gid = get_global_id(0);
|
||||
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
|
||||
|
||||
#if !SPH_SMALL_FOOTPRINT_GROESTL
|
||||
__local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256];
|
||||
__local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256];
|
||||
#else
|
||||
__local sph_u64 T0_C[256], T4_C[256];
|
||||
#endif
|
||||
|
||||
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
|
||||
|
||||
int init = get_local_id(0);
|
||||
int step = get_local_size(0);
|
||||
|
||||
|
||||
for (int i = init; i < 256; i += step)
|
||||
{
|
||||
T0_C[i] = T0[i];
|
||||
T4_C[i] = T4[i];
|
||||
#if !SPH_SMALL_FOOTPRINT_GROESTL
|
||||
T1_C[i] = T1[i];
|
||||
T2_C[i] = T2[i];
|
||||
T3_C[i] = T3[i];
|
||||
T5_C[i] = T5[i];
|
||||
T6_C[i] = T6[i];
|
||||
T7_C[i] = T7[i];
|
||||
#endif
|
||||
T0_L[i] = T0[i];
|
||||
T4_L[i] = T4[i];
|
||||
T1_L[i] = T1[i];
|
||||
T2_L[i] = T2[i];
|
||||
T3_L[i] = T3[i];
|
||||
T5_L[i] = T5[i];
|
||||
T6_L[i] = T6[i];
|
||||
T7_L[i] = T7[i];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); // groestl
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#define T0 T0_C
|
||||
#define T1 T1_C
|
||||
#define T2 T2_C
|
||||
#define T3 T3_C
|
||||
#define T4 T4_C
|
||||
#define T5 T5_C
|
||||
#define T6 T6_C
|
||||
#define T7 T7_C
|
||||
|
||||
sph_u64 H[16];
|
||||
|
||||
for (unsigned int u = 0; u < 15; u ++)
|
||||
H[u] = 0;
|
||||
|
||||
#if USE_LE
|
||||
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
|
||||
#else
|
||||
H[15] = (sph_u64)512;
|
||||
#endif
|
||||
#define T0 T0_L
|
||||
#define T1 T1_L
|
||||
#define T2 T2_L
|
||||
#define T3 T3_L
|
||||
#define T4 T4_L
|
||||
#define T5 T5_L
|
||||
#define T6 T6_L
|
||||
#define T7 T7_L
|
||||
|
||||
// groestl
|
||||
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000};
|
||||
|
||||
sph_u64 g[16], m[16];
|
||||
m[0] = DEC64E(hash->h8[0]);
|
||||
m[1] = DEC64E(hash->h8[1]);
|
||||
m[2] = DEC64E(hash->h8[2]);
|
||||
m[3] = DEC64E(hash->h8[3]);
|
||||
m[4] = DEC64E(hash->h8[4]);
|
||||
m[5] = DEC64E(hash->h8[5]);
|
||||
m[6] = DEC64E(hash->h8[6]);
|
||||
m[7] = DEC64E(hash->h8[7]);
|
||||
|
||||
for (unsigned int u = 0; u < 16; u ++)
|
||||
g[u] = m[u] ^ H[u];
|
||||
|
||||
m[8] = 0x80; g[8] = m[8] ^ H[8];
|
||||
m[9] = 0; g[9] = m[9] ^ H[9];
|
||||
m[10] = 0; g[10] = m[10] ^ H[10];
|
||||
m[11] = 0; g[11] = m[11] ^ H[11];
|
||||
m[12] = 0; g[12] = m[12] ^ H[12];
|
||||
m[13] = 0; g[13] = m[13] ^ H[13];
|
||||
m[14] = 0; g[14] = m[14] ^ H[14];
|
||||
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
|
||||
g[0] = m[0] = DEC64E(hash->h8[0]);
|
||||
g[1] = m[1] = DEC64E(hash->h8[1]);
|
||||
g[2] = m[2] = DEC64E(hash->h8[2]);
|
||||
g[3] = m[3] = DEC64E(hash->h8[3]);
|
||||
g[4] = m[4] = DEC64E(hash->h8[4]);
|
||||
g[5] = m[5] = DEC64E(hash->h8[5]);
|
||||
g[6] = m[6] = DEC64E(hash->h8[6]);
|
||||
g[7] = m[7] = DEC64E(hash->h8[7]);
|
||||
g[8] = m[8] = 0x80;
|
||||
g[9] = m[9] = 0;
|
||||
g[10] = m[10] = 0;
|
||||
g[11] = m[11] = 0;
|
||||
g[12] = m[12] = 0;
|
||||
g[13] = m[13] = 0;
|
||||
g[14] = m[14] = 0;
|
||||
g[15] = 0x102000000000000;
|
||||
m[15] = 0x100000000000000;
|
||||
|
||||
PERM_BIG_P(g);
|
||||
PERM_BIG_Q(m);
|
||||
|
||||
for (unsigned int u = 0; u < 16; u ++)
|
||||
H[u] ^= g[u] ^ m[u];
|
||||
|
||||
sph_u64 xH[16];
|
||||
|
||||
for (unsigned int u = 0; u < 16; u ++)
|
||||
xH[u] = H[u];
|
||||
xH[u] = H[u] ^= g[u] ^ m[u];
|
||||
|
||||
PERM_BIG_P(xH);
|
||||
|
||||
for (unsigned int u = 0; u < 16; u ++)
|
||||
H[u] ^= xH[u];
|
||||
|
||||
for (unsigned int u = 0; u < 8; u ++)
|
||||
hash->h8[u] = DEC64E(H[u + 8]);
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
for (unsigned int u = 8; u < 16; u ++)
|
||||
hash->h8[u-8] = DEC64E(H[u] ^ xH[u]);
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
@ -863,7 +837,7 @@ __kernel void search8(__global hash_t* hashes)
|
||||
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
||||
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
||||
|
||||
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
|
||||
sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
|
||||
|
||||
rk00 = hash->h4[0];
|
||||
rk01 = hash->h4[1];
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1338
kernel/x14.cl
Normal file
1338
kernel/x14.cl
Normal file
File diff suppressed because it is too large
Load Diff
1294
kernel/x14old.cl
Normal file
1294
kernel/x14old.cl
Normal file
File diff suppressed because it is too large
Load Diff
1
miner.h
1
miner.h
@ -1033,6 +1033,7 @@ extern int swork_id;
|
||||
extern int opt_tcp_keepalive;
|
||||
extern bool opt_incognito;
|
||||
extern int opt_hamsi_expand_big;
|
||||
extern bool opt_hamsi_short;
|
||||
|
||||
#if LOCK_TRACKING
|
||||
extern pthread_mutex_t lockstat_lock;
|
||||
|
@ -192,6 +192,7 @@ int nDevs;
|
||||
int opt_dynamic_interval = 7;
|
||||
int opt_g_threads = -1;
|
||||
int opt_hamsi_expand_big = 4;
|
||||
bool opt_hamsi_short = false;
|
||||
bool opt_restart = true;
|
||||
|
||||
struct list_head scan_devices;
|
||||
@ -1459,7 +1460,10 @@ struct opt_table opt_config_table[] = {
|
||||
"Set GPU lookup gap for scrypt mining, comma separated"),
|
||||
OPT_WITH_ARG("--hamsi-expand-big",
|
||||
set_int_1_to_10, opt_show_intval, &opt_hamsi_expand_big,
|
||||
"Set SPH_HAMSI_EXPAND_BIG for X13 algorithms (1 or 4 are common)"),
|
||||
"Set SPH_HAMSI_EXPAND_BIG for X13 derived algorithms (1 or 4 are common)"),
|
||||
OPT_WITHOUT_ARG("--hamsi-short",
|
||||
opt_set_bool, &opt_hamsi_short,
|
||||
"Set SPH_HAMSI_SHORT for X13 derived algorithms (Can give better hashrate for some GPUs)"),
|
||||
#ifdef HAVE_CURSES
|
||||
OPT_WITHOUT_ARG("--incognito",
|
||||
opt_set_bool, &opt_incognito,
|
||||
|
@ -263,6 +263,7 @@
|
||||
<ClCompile Include="..\algorithm\animecoin.c" />
|
||||
<ClCompile Include="..\algorithm\bitblock.c" />
|
||||
<ClCompile Include="..\algorithm\talkcoin.c" />
|
||||
<ClCompile Include="..\algorithm\x14.c" />
|
||||
<ClCompile Include="..\api.c" />
|
||||
<ClCompile Include="..\ccan\opt\helpers.c" />
|
||||
<ClCompile Include="..\ccan\opt\opt.c" />
|
||||
@ -329,6 +330,7 @@
|
||||
<ClInclude Include="..\algorithm\animecoin.h" />
|
||||
<ClInclude Include="..\algorithm\bitblock.h" />
|
||||
<ClInclude Include="..\algorithm\talkcoin.h" />
|
||||
<ClInclude Include="..\algorithm\x14.h" />
|
||||
<ClInclude Include="..\api.h" />
|
||||
<ClInclude Include="..\arg-nonnull.h" />
|
||||
<ClInclude Include="..\bench_block.h" />
|
||||
|
@ -227,6 +227,9 @@
|
||||
<ClCompile Include="..\algorithm\bitblock.c">
|
||||
<Filter>Source Files\algorithm</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\algorithm\x14.c">
|
||||
<Filter>Source Files\algorithm</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\adl.h">
|
||||
@ -412,6 +415,9 @@
|
||||
<ClInclude Include="..\algorithm\bitblock.h">
|
||||
<Filter>Header Files\algorithm</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\algorithm\x14.h">
|
||||
<Filter>Header Files\algorithm</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="README.txt" />
|
||||
|
Loading…
x
Reference in New Issue
Block a user