mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-10 23:08:07 +00:00
Clean up use of macros in poclbm and use bitselect everywhere possible.
This commit is contained in:
parent
5fd7d462ab
commit
2a3b7359eb
@ -36,7 +36,10 @@ __constant uint K[64] = {
|
||||
#ifdef BITALIGN
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
|
||||
#ifdef BFI_INT
|
||||
#else
|
||||
#define rotr(x, y) rotate((u)x, (u)(32 - y))
|
||||
#endif
|
||||
#ifdef BFI_INT
|
||||
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
|
||||
// OpenCL (or CAL IL for that matter) in any way. However, there is
|
||||
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
|
||||
@ -49,23 +52,19 @@ __constant uint K[64] = {
|
||||
|
||||
// Ma can also be implemented in terms of BFI_INT...
|
||||
#define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
|
||||
#else // BFI_INT
|
||||
// Later SDKs optimise this to BFI INT without patching and GCN
|
||||
// actually fails if manually patched with BFI_INT
|
||||
|
||||
// AMD's KernelAnalyzer throws errors compiling the kernel if we use
|
||||
// amd_bytealign on constants with vectors enabled, so we use this to avoid
|
||||
// problems. (this is used 4 times, and likely optimized out by the compiler.)
|
||||
#define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
|
||||
#else // BFI_INT
|
||||
//GCN actually fails if manually patched with BFI_INT
|
||||
|
||||
#define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
|
||||
#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
|
||||
#endif
|
||||
#else // BITALIGN
|
||||
#define ch(x, y, z) (z ^ (x & (y ^ z)))
|
||||
#define Ma(x, y, z) ((x & z) | (y & (x | z)))
|
||||
#define rotr(x, y) rotate((u)x, (u)(32 - y))
|
||||
#define Ma2(x, y, z) Ma(x, y, z)
|
||||
#endif
|
||||
|
||||
// AMD's KernelAnalyzer throws errors compiling the kernel if we use
|
||||
// amd_bytealign on constants with vectors enabled, so we use this to avoid
|
||||
// problems. (this is used 4 times, and likely optimized out by the compiler.)
|
||||
#define Ma2(x, y, z) ((y & z) | (x & (y | z)))
|
||||
|
||||
__kernel
|
||||
__attribute__((vec_type_hint(u)))
|
||||
|
Loading…
Reference in New Issue
Block a user