mirror of
https://github.com/GOSTSec/sgminer
synced 2025-09-02 17:22:33 +00:00
Update poclbm kernel for better performance on GCN and new SDKs with bitalign support when not BFI INT patching.
Update phatk kernel to work properly for non BFI INT patched kernels, providing support for phatk to run on GCN and non-ATI cards.
This commit is contained in:
parent
c0e8819d86
commit
ebaa2be1df
22
Makefile.am
22
Makefile.am
@ -17,27 +17,14 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
|
||||
|
||||
bin_PROGRAMS = cgminer
|
||||
|
||||
bin_SCRIPTS = phatk110817.cl poclbm110817.cl
|
||||
bin_SCRIPTS = phatk120203.cl poclbm120203.cl
|
||||
|
||||
if HAS_CPUMINE
|
||||
cgminer_SOURCES = elist.h miner.h compat.h bench_block.h \
|
||||
main.c util.c uthash.h \
|
||||
ocl.c ocl.h findnonce.c findnonce.h \
|
||||
sha256_generic.c sha256_4way.c sha256_via.c \
|
||||
sha256_cryptopp.c sha256_sse2_amd64.c \
|
||||
sha256_sse4_amd64.c sha256_sse2_i386.c \
|
||||
sha256_altivec_4way.c \
|
||||
adl.c adl.h adl_functions.h \
|
||||
phatk110817.cl poclbm110817.cl \
|
||||
sha2.c sha2.h api.c
|
||||
else
|
||||
cgminer_SOURCES = elist.h miner.h compat.h bench_block.h \
|
||||
main.c util.c uthash.h \
|
||||
ocl.c ocl.h findnonce.c findnonce.h \
|
||||
adl.c adl.h adl_functions.h \
|
||||
phatk110817.cl poclbm110817.cl \
|
||||
phatk120203.cl poclbm120203.cl \
|
||||
sha2.c sha2.h api.c
|
||||
endif
|
||||
|
||||
cgminer_LDFLAGS = $(PTHREAD_FLAGS)
|
||||
cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
|
||||
@ -46,6 +33,11 @@ cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
|
||||
cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@
|
||||
|
||||
if HAS_CPUMINE
|
||||
cgminer_SOURCES += sha256_generic.c sha256_4way.c sha256_via.c \
|
||||
sha256_cryptopp.c sha256_sse2_amd64.c \
|
||||
sha256_sse4_amd64.c sha256_sse2_i386.c \
|
||||
sha256_altivec_4way.c
|
||||
|
||||
if HAVE_x86_64
|
||||
if HAS_YASM
|
||||
SUBDIRS += x86_64
|
||||
|
8
ocl.c
8
ocl.c
@ -366,13 +366,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
|
||||
switch (chosen_kernel) {
|
||||
case KL_POCLBM:
|
||||
strcpy(filename, "poclbm110817.cl");
|
||||
strcpy(binaryfilename, "poclbm110817");
|
||||
strcpy(filename, "poclbm120203.cl");
|
||||
strcpy(binaryfilename, "poclbm120203");
|
||||
break;
|
||||
case KL_NONE: /* Shouldn't happen */
|
||||
case KL_PHATK:
|
||||
strcpy(filename, "phatk110817.cl");
|
||||
strcpy(binaryfilename, "phatk110817");
|
||||
strcpy(filename, "phatk120203.cl");
|
||||
strcpy(binaryfilename, "phatk120203");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
// This file is taken and modified from the public-domain poclbm project, and
|
||||
// I have therefore decided to keep it public-domain.
|
||||
|
||||
// Modified version copyright 2011-2012 Con Kolivas
|
||||
|
||||
#ifdef VECTORS4
|
||||
typedef uint4 u;
|
||||
@ -51,9 +51,6 @@ __constant uint H[8] = {
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rot(x, y) amd_bitalign(x, x, (uint)(32 - y))
|
||||
#else
|
||||
#define rot(x, y) rotate(x, (uint)y)
|
||||
#endif
|
||||
|
||||
// This part is not from the stock poclbm kernel. It's part of an optimization
|
||||
// added in the Phoenix Miner.
|
||||
@ -75,11 +72,20 @@ __constant uint H[8] = {
|
||||
#define Ch(x, y, z) amd_bytealign(x,y,z)
|
||||
// Ma can also be implemented in terms of BFI_INT...
|
||||
#define Ma(z, x, y) amd_bytealign(z^x,y,x)
|
||||
#else
|
||||
#define Ch(x, y, z) bitselect(x,y,z)
|
||||
// Ma can also be implemented in terms of bitselect
|
||||
#define Ma(z, x, y) bitselect(z^x,y,x)
|
||||
#else // BFI_INT
|
||||
// Later SDKs optimise this to BFI INT without patching and GCN
|
||||
// actually fails if manually patched with BFI_INT
|
||||
|
||||
#define Ch(x, y, z) bitselect((u)z, (u)y, (u)x)
|
||||
#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
|
||||
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
|
||||
#endif
|
||||
#else // BITALIGN
|
||||
#define Ch(x, y, z) (z ^ (x & (y ^ z)))
|
||||
#define Ma(x, y, z) ((x & z) | (y & (x | z)))
|
||||
#define rotr(x, y) rotate((u)x, (u)(32-y))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//Various intermediate calculations for each SHA round
|
@ -1,6 +1,6 @@
|
||||
// -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
|
||||
// phatk and others.
|
||||
// Modified version copyright 2011 Con Kolivas
|
||||
// Modified version copyright 2011-2012 Con Kolivas
|
||||
|
||||
// This file is taken and modified from the public-domain poclbm project, and
|
||||
// we have therefore decided to keep it public-domain in Phoenix.
|
||||
@ -33,6 +33,9 @@ __constant uint K[64] = {
|
||||
// detected, use it for ch. Otherwise, construct ch out of simpler logical
|
||||
// primitives.
|
||||
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
|
||||
#ifdef BFI_INT
|
||||
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
||||
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
||||
@ -46,15 +49,16 @@ __constant uint K[64] = {
|
||||
|
||||
// Ma can also be implemented in terms of BFI_INT...
|
||||
#define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
|
||||
#else
|
||||
#else // BFI_INT
|
||||
// Later SDKs optimise this to BFI INT without patching and GCN
|
||||
// actually fails if manually patched with BFI_INT
|
||||
|
||||
#define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
|
||||
#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
|
||||
#endif
|
||||
#else // BITALIGN
|
||||
#define ch(x, y, z) (z ^ (x & (y ^ z)))
|
||||
#define Ma(x, y, z) ((x & z) | (y & (x | z)))
|
||||
#endif
|
||||
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
|
||||
#else
|
||||
#define rotr(x, y) rotate((u)x, (u)(32 - y))
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user