Browse Source

X14 Implementation

Added X14 and cleaned up the X13/X15 kernels so all 3 offshoots are in
sync. New option "--hamsi-short" or "hamsi-short":true to add a small
boost. May not work on all GPUs.
djm34
ystarnaud 11 years ago
parent
commit
5c9126fd61
  1. 1
      Makefile.am
  2. 110
      algorithm.c
  3. 2
      algorithm.h
  4. 247
      algorithm/x14.c
  5. 10
      algorithm/x14.h
  6. 697
      kernel/bitblock.cl
  7. 2175
      kernel/bitblockold.cl
  8. 126
      kernel/darkcoin-mod.cl
  9. 1642
      kernel/marucoin-mod.cl
  10. 1808
      kernel/marucoin-modold.cl
  11. 1338
      kernel/x14.cl
  12. 1294
      kernel/x14old.cl
  13. 1
      miner.h
  14. 6
      sgminer.c
  15. 2
      winbuild/sgminer.vcxproj
  16. 6
      winbuild/sgminer.vcxproj.filters

1
Makefile.am

@ -63,6 +63,7 @@ sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h
sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h
sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h
sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h
sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

110
algorithm.c

@ -27,6 +27,7 @@
#include "algorithm/maxcoin.h" #include "algorithm/maxcoin.h"
#include "algorithm/talkcoin.h" #include "algorithm/talkcoin.h"
#include "algorithm/bitblock.h" #include "algorithm/bitblock.h"
#include "algorithm/x14.h"
#include "compat.h" #include "compat.h"
@ -40,6 +41,8 @@ const char *algorithm_type_str[] = {
"NScrypt", "NScrypt",
"X11", "X11",
"X13", "X13",
"X14",
"X15",
"Keccak", "Keccak",
"Quarkcoin", "Quarkcoin",
"Twecoin", "Twecoin",
@ -91,11 +94,11 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru
static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{ {
char buf[255]; char buf[255];
sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d", sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d%s ",
opt_hamsi_expand_big); opt_hamsi_expand_big, ((opt_hamsi_short)?" -D SPH_HAMSI_SHORT=1 ":""));
strcat(data->compiler_options, buf); strcat(data->compiler_options, buf);
sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big); sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?"hs":""));
strcat(data->binary_filename, buf); strcat(data->binary_filename, buf);
} }
@ -419,6 +422,100 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b
return status; return status;
} }
static cl_int queue_x14_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// echo - search10
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// hamsi - search11
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// fugue - search12
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shabal - search13
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_x14_old_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
cl_ulong le_target;
cl_int status = 0;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
// blake - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
// bmw - search1
kernel = clState->extra_kernels;
CL_SET_ARG_0(clState->padbuffer8);
// groestl - search2
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// skein - search3
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// jh - search4
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// keccak - search5
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// luffa - search6
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// cubehash - search7
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// shavite - search8
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// simd - search9
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
// combined echo, hamsi, fugue - shabal - search10
num = 0;
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
typedef struct _algorithm_settings_t { typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */ const char *name; /* Human-readable identifier */
algorithm_type_t type; //common algorithm type algorithm_type_t type; //common algorithm type
@ -477,8 +574,13 @@ static algorithm_settings_t algos[] = {
{ "marucoin-mod", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options}, { "marucoin-mod", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options},
{ "marucoin-modold", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options}, { "marucoin-modold", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options},
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL}, { "x14", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_hamsi_compiler_options},
{ "x14old", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_hamsi_compiler_options},
{ "bitblock", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_hamsi_compiler_options},
{ "bitblockold", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_hamsi_compiler_options},
{ "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL},
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b) \ #define A_FUGUE(a, b) \
{ a, ALGO_FUGUE, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL} { a, ALGO_FUGUE, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL}

2
algorithm.h

@ -16,6 +16,8 @@ typedef enum {
ALGO_NSCRYPT, ALGO_NSCRYPT,
ALGO_X11, ALGO_X11,
ALGO_X13, ALGO_X13,
ALGO_X14,
ALGO_X15,
ALGO_KECCAK, ALGO_KECCAK,
ALGO_QUARK, ALGO_QUARK,
ALGO_TWE, ALGO_TWE,

247
algorithm/x14.c

@ -0,0 +1,247 @@
/*-
* Copyright 2009 Colin Percival, 2011 ArtForz
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "sph/sph_skein.h"
#include "sph/sph_luffa.h"
#include "sph/sph_cubehash.h"
#include "sph/sph_shavite.h"
#include "sph/sph_simd.h"
#include "sph/sph_echo.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_fugue.h"
#include "sph/sph_shabal.h"
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
typedef struct {
sph_blake512_context blake1;
sph_bmw512_context bmw1;
sph_groestl512_context groestl1;
sph_skein512_context skein1;
sph_jh512_context jh1;
sph_keccak512_context keccak1;
sph_luffa512_context luffa1;
sph_cubehash512_context cubehash1;
sph_shavite512_context shavite1;
sph_simd512_context simd1;
sph_echo512_context echo1;
sph_hamsi512_context hamsi1;
sph_fugue512_context fugue1;
sph_shabal512_context shabal1;
} Xhash_context_holder;
static Xhash_context_holder base_contexts;
void init_X14hash_contexts()
{
sph_blake512_init(&base_contexts.blake1);
sph_bmw512_init(&base_contexts.bmw1);
sph_groestl512_init(&base_contexts.groestl1);
sph_skein512_init(&base_contexts.skein1);
sph_jh512_init(&base_contexts.jh1);
sph_keccak512_init(&base_contexts.keccak1);
sph_luffa512_init(&base_contexts.luffa1);
sph_cubehash512_init(&base_contexts.cubehash1);
sph_shavite512_init(&base_contexts.shavite1);
sph_simd512_init(&base_contexts.simd1);
sph_echo512_init(&base_contexts.echo1);
sph_hamsi512_init(&base_contexts.hamsi1);
sph_fugue512_init(&base_contexts.fugue1);
sph_shabal512_init(&base_contexts.shabal1);
}
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{
uint32_t i;
for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]);
}
inline void x14hash(void *state, const void *input)
{
init_X14hash_contexts();
Xhash_context_holder ctx;
uint32_t hashA[16], hashB[16];
//blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo
memcpy(&ctx, &base_contexts, sizeof(base_contexts));
sph_blake512 (&ctx.blake1, input, 80);
sph_blake512_close (&ctx.blake1, hashA);
sph_bmw512 (&ctx.bmw1, hashA, 64);
sph_bmw512_close(&ctx.bmw1, hashB);
sph_groestl512 (&ctx.groestl1, hashB, 64);
sph_groestl512_close(&ctx.groestl1, hashA);
sph_skein512 (&ctx.skein1, hashA, 64);
sph_skein512_close(&ctx.skein1, hashB);
sph_jh512 (&ctx.jh1, hashB, 64);
sph_jh512_close(&ctx.jh1, hashA);
sph_keccak512 (&ctx.keccak1, hashA, 64);
sph_keccak512_close(&ctx.keccak1, hashB);
sph_luffa512 (&ctx.luffa1, hashB, 64);
sph_luffa512_close (&ctx.luffa1, hashA);
sph_cubehash512 (&ctx.cubehash1, hashA, 64);
sph_cubehash512_close(&ctx.cubehash1, hashB);
sph_shavite512 (&ctx.shavite1, hashB, 64);
sph_shavite512_close(&ctx.shavite1, hashA);
sph_simd512 (&ctx.simd1, hashA, 64);
sph_simd512_close(&ctx.simd1, hashB);
sph_echo512 (&ctx.echo1, hashB, 64);
sph_echo512_close(&ctx.echo1, hashA);
sph_hamsi512 (&ctx.hamsi1, hashA, 64);
sph_hamsi512_close(&ctx.hamsi1, hashB);
sph_fugue512 (&ctx.fugue1, hashB, 64);
sph_fugue512_close(&ctx.fugue1, hashA);
sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64);
sph_shabal512_close(&ctx.shabal1, hashB);
memcpy(state, hashB, 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int x14_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
x14hash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void x14_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
x14hash(ohash, data);
}
static inline void be32enc(void *pp, uint32_t x)
{
uint8_t *p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
bool scanhash_x14(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1)
{
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
x14hash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
if(unlikely(tmp_hash7 <= Htarg))
{
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart))
{
*last_nonce = n;
break;
}
}
return ret;
}

10
algorithm/x14.h

@ -0,0 +1,10 @@
#ifndef X14_H
#define X14_H
#include "miner.h"
extern int x14_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void x14_regenhash(struct work *work);
#endif /* X14_H */

697
kernel/bitblock.cl

File diff suppressed because it is too large Load Diff

2175
kernel/bitblockold.cl

File diff suppressed because it is too large Load Diff

126
kernel/darkcoin-mod.cl

@ -95,8 +95,6 @@
#include "shavite.cl" #include "shavite.cl"
#include "simd.cl" #include "simd.cl"
#include "echo.cl" #include "echo.cl"
#include "hamsi.cl"
#include "fugue.cl"
#define SWAP4(x) as_uint(as_uchar4(x).wzyx) #define SWAP4(x) as_uint(as_uchar4(x).wzyx)
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) #define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
@ -181,7 +179,7 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global hash_t* hashes) __kernel void search1(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// bmw // bmw
@ -456,93 +454,69 @@ __kernel void search2(__global hash_t* hashes)
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
#if !SPH_SMALL_FOOTPRINT_GROESTL __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
__local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256];
__local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256];
#else
__local sph_u64 T0_C[256], T4_C[256];
#endif
int init = get_local_id(0); int init = get_local_id(0);
int step = get_local_size(0); int step = get_local_size(0);
for (int i = init; i < 256; i += step) for (int i = init; i < 256; i += step)
{ {
T0_C[i] = T0[i]; T0_L[i] = T0[i];
T4_C[i] = T4[i]; T4_L[i] = T4[i];
#if !SPH_SMALL_FOOTPRINT_GROESTL T1_L[i] = T1[i];
T1_C[i] = T1[i]; T2_L[i] = T2[i];
T2_C[i] = T2[i]; T3_L[i] = T3[i];
T3_C[i] = T3[i]; T5_L[i] = T5[i];
T5_C[i] = T5[i]; T6_L[i] = T6[i];
T6_C[i] = T6[i]; T7_L[i] = T7[i];
T7_C[i] = T7[i];
#endif
} }
barrier(CLK_LOCAL_MEM_FENCE); // groestl barrier(CLK_LOCAL_MEM_FENCE);
#define T0 T0_C #define T0 T0_L
#define T1 T1_C #define T1 T1_L
#define T2 T2_C #define T2 T2_L
#define T3 T3_C #define T3 T3_L
#define T4 T4_C #define T4 T4_L
#define T5 T5_C #define T5 T5_L
#define T6 T6_C #define T6 T6_L
#define T7 T7_C #define T7 T7_L
sph_u64 H[16]; // groestl
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000};
for (unsigned int u = 0; u < 15; u ++)
H[u] = 0;
#if USE_LE
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
#else
H[15] = (sph_u64)512;
#endif
sph_u64 g[16], m[16]; sph_u64 g[16], m[16];
m[0] = DEC64E(hash->h8[0]); g[0] = m[0] = DEC64E(hash->h8[0]);
m[1] = DEC64E(hash->h8[1]); g[1] = m[1] = DEC64E(hash->h8[1]);
m[2] = DEC64E(hash->h8[2]); g[2] = m[2] = DEC64E(hash->h8[2]);
m[3] = DEC64E(hash->h8[3]); g[3] = m[3] = DEC64E(hash->h8[3]);
m[4] = DEC64E(hash->h8[4]); g[4] = m[4] = DEC64E(hash->h8[4]);
m[5] = DEC64E(hash->h8[5]); g[5] = m[5] = DEC64E(hash->h8[5]);
m[6] = DEC64E(hash->h8[6]); g[6] = m[6] = DEC64E(hash->h8[6]);
m[7] = DEC64E(hash->h8[7]); g[7] = m[7] = DEC64E(hash->h8[7]);
g[8] = m[8] = 0x80;
for (unsigned int u = 0; u < 16; u ++) g[9] = m[9] = 0;
g[u] = m[u] ^ H[u]; g[10] = m[10] = 0;
g[11] = m[11] = 0;
m[8] = 0x80; g[8] = m[8] ^ H[8]; g[12] = m[12] = 0;
m[9] = 0; g[9] = m[9] ^ H[9]; g[13] = m[13] = 0;
m[10] = 0; g[10] = m[10] ^ H[10]; g[14] = m[14] = 0;
m[11] = 0; g[11] = m[11] ^ H[11]; g[15] = 0x102000000000000;
m[12] = 0; g[12] = m[12] ^ H[12]; m[15] = 0x100000000000000;
m[13] = 0; g[13] = m[13] ^ H[13];
m[14] = 0; g[14] = m[14] ^ H[14];
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
PERM_BIG_P(g); PERM_BIG_P(g);
PERM_BIG_Q(m); PERM_BIG_Q(m);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= g[u] ^ m[u];
sph_u64 xH[16]; sph_u64 xH[16];
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u]; xH[u] = H[u] ^= g[u] ^ m[u];
PERM_BIG_P(xH); PERM_BIG_P(xH);
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 8; u < 16; u ++)
H[u] ^= xH[u]; hash->h8[u-8] = DEC64E(H[u] ^ xH[u]);
for (unsigned int u = 0; u < 8; u ++) barrier(CLK_GLOBAL_MEM_FENCE);
hash->h8[u] = DEC64E(H[u + 8]);
barrier(CLK_GLOBAL_MEM_FENCE);
} }
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
@ -863,7 +837,7 @@ __kernel void search8(__global hash_t* hashes)
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
rk00 = hash->h4[0]; rk00 = hash->h4[0];
rk01 = hash->h4[1]; rk01 = hash->h4[1];

1642
kernel/marucoin-mod.cl

File diff suppressed because it is too large Load Diff

1808
kernel/marucoin-modold.cl

File diff suppressed because it is too large Load Diff

1338
kernel/x14.cl

File diff suppressed because it is too large Load Diff

1294
kernel/x14old.cl

File diff suppressed because it is too large Load Diff

1
miner.h

@ -1033,6 +1033,7 @@ extern int swork_id;
extern int opt_tcp_keepalive; extern int opt_tcp_keepalive;
extern bool opt_incognito; extern bool opt_incognito;
extern int opt_hamsi_expand_big; extern int opt_hamsi_expand_big;
extern bool opt_hamsi_short;
#if LOCK_TRACKING #if LOCK_TRACKING
extern pthread_mutex_t lockstat_lock; extern pthread_mutex_t lockstat_lock;

6
sgminer.c

@ -192,6 +192,7 @@ int nDevs;
int opt_dynamic_interval = 7; int opt_dynamic_interval = 7;
int opt_g_threads = -1; int opt_g_threads = -1;
int opt_hamsi_expand_big = 4; int opt_hamsi_expand_big = 4;
bool opt_hamsi_short = false;
bool opt_restart = true; bool opt_restart = true;
struct list_head scan_devices; struct list_head scan_devices;
@ -1459,7 +1460,10 @@ struct opt_table opt_config_table[] = {
"Set GPU lookup gap for scrypt mining, comma separated"), "Set GPU lookup gap for scrypt mining, comma separated"),
OPT_WITH_ARG("--hamsi-expand-big", OPT_WITH_ARG("--hamsi-expand-big",
set_int_1_to_10, opt_show_intval, &opt_hamsi_expand_big, set_int_1_to_10, opt_show_intval, &opt_hamsi_expand_big,
"Set SPH_HAMSI_EXPAND_BIG for X13 algorithms (1 or 4 are common)"), "Set SPH_HAMSI_EXPAND_BIG for X13 derived algorithms (1 or 4 are common)"),
OPT_WITHOUT_ARG("--hamsi-short",
opt_set_bool, &opt_hamsi_short,
"Set SPH_HAMSI_SHORT for X13 derived algorithms (Can give better hashrate for some GPUs)"),
#ifdef HAVE_CURSES #ifdef HAVE_CURSES
OPT_WITHOUT_ARG("--incognito", OPT_WITHOUT_ARG("--incognito",
opt_set_bool, &opt_incognito, opt_set_bool, &opt_incognito,

2
winbuild/sgminer.vcxproj

@ -263,6 +263,7 @@
<ClCompile Include="..\algorithm\animecoin.c" /> <ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" /> <ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\talkcoin.c" /> <ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\x14.c" />
<ClCompile Include="..\api.c" /> <ClCompile Include="..\api.c" />
<ClCompile Include="..\ccan\opt\helpers.c" /> <ClCompile Include="..\ccan\opt\helpers.c" />
<ClCompile Include="..\ccan\opt\opt.c" /> <ClCompile Include="..\ccan\opt\opt.c" />
@ -329,6 +330,7 @@
<ClInclude Include="..\algorithm\animecoin.h" /> <ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" /> <ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\talkcoin.h" /> <ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\x14.h" />
<ClInclude Include="..\api.h" /> <ClInclude Include="..\api.h" />
<ClInclude Include="..\arg-nonnull.h" /> <ClInclude Include="..\arg-nonnull.h" />
<ClInclude Include="..\bench_block.h" /> <ClInclude Include="..\bench_block.h" />

6
winbuild/sgminer.vcxproj.filters

@ -227,6 +227,9 @@
<ClCompile Include="..\algorithm\bitblock.c"> <ClCompile Include="..\algorithm\bitblock.c">
<Filter>Source Files\algorithm</Filter> <Filter>Source Files\algorithm</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\algorithm\x14.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\adl.h"> <ClInclude Include="..\adl.h">
@ -412,6 +415,9 @@
<ClInclude Include="..\algorithm\bitblock.h"> <ClInclude Include="..\algorithm\bitblock.h">
<Filter>Header Files\algorithm</Filter> <Filter>Header Files\algorithm</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\algorithm\x14.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="README.txt" /> <None Include="README.txt" />

Loading…
Cancel
Save