Browse Source

Whitespace and indentation fix

Fixed whitespace and indentation of lasybear's pull request #272.
djm34
ystarnaud 10 years ago
parent
commit
56055754eb
  1. 131
      algorithm/talkcoin.c
  2. 3
      algorithm/talkcoin.h
  3. 528
      kernel/talkcoin-mod.cl

131
algorithm/talkcoin.c

@ -38,24 +38,24 @@
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ /* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
typedef struct { typedef struct
sph_blake512_context blake1; {
sph_groestl512_context groestl1; sph_blake512_context blake1;
sph_jh512_context jh1; sph_groestl512_context groestl1;
sph_keccak512_context keccak1; sph_jh512_context jh1;
sph_skein512_context skein1; sph_keccak512_context keccak1;
sph_skein512_context skein1;
} Xhash_context_holder; } Xhash_context_holder;
Xhash_context_holder base_contexts; Xhash_context_holder base_contexts;
void init_Nhash_contexts() void init_Nhash_contexts()
{ {
sph_blake512_init(&base_contexts.blake1); sph_blake512_init(&base_contexts.blake1);
sph_groestl512_init(&base_contexts.groestl1); sph_groestl512_init(&base_contexts.groestl1);
sph_jh512_init(&base_contexts.jh1); sph_jh512_init(&base_contexts.jh1);
sph_keccak512_init(&base_contexts.keccak1); sph_keccak512_init(&base_contexts.keccak1);
sph_skein512_init(&base_contexts.skein1); sph_skein512_init(&base_contexts.skein1);
} }
/* /*
@ -65,39 +65,39 @@ void init_Nhash_contexts()
static inline void static inline void
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{ {
uint32_t i; uint32_t i;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]); dst[i] = htobe32(src[i]);
} }
inline void talkhash(void *state, const void *input) inline void talkhash(void *state, const void *input)
{ {
init_Nhash_contexts(); init_Nhash_contexts();
Xhash_context_holder ctx; Xhash_context_holder ctx;
uint32_t hashA[16], hashB[16]; uint32_t hashA[16], hashB[16];
//blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo
memcpy(&ctx, &base_contexts, sizeof(base_contexts)); memcpy(&ctx, &base_contexts, sizeof(base_contexts));
sph_blake512 (&ctx.blake1, input, 80); sph_blake512 (&ctx.blake1, input, 80);
sph_blake512_close (&ctx.blake1, hashA); sph_blake512_close (&ctx.blake1, hashA);
sph_groestl512 (&ctx.groestl1, hashA, 64); sph_groestl512 (&ctx.groestl1, hashA, 64);
sph_groestl512_close(&ctx.groestl1, hashB); sph_groestl512_close(&ctx.groestl1, hashB);
sph_jh512 (&ctx.jh1, hashB, 64); sph_jh512 (&ctx.jh1, hashB, 64);
sph_jh512_close(&ctx.jh1, hashA); sph_jh512_close(&ctx.jh1, hashA);
sph_keccak512 (&ctx.keccak1, hashA, 64); sph_keccak512 (&ctx.keccak1, hashA, 64);
sph_keccak512_close(&ctx.keccak1, hashB); sph_keccak512_close(&ctx.keccak1, hashB);
sph_skein512 (&ctx.skein1, hashB, 64); sph_skein512 (&ctx.skein1, hashB, 64);
sph_skein512_close(&ctx.skein1, hashA); sph_skein512_close(&ctx.skein1, hashA);
memcpy(state, hashA, 32); memcpy(state, hashA, 32);
} }
static const uint32_t diff1targ = 0x0000ffff; static const uint32_t diff1targ = 0x0000ffff;
@ -114,26 +114,29 @@ int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t n
tmp_hash7 = be32toh(ohash[7]); tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg, (long unsigned int)Htarg,
(long unsigned int)diff1targ, (long unsigned int)diff1targ,
(long unsigned int)tmp_hash7); (long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ) if (tmp_hash7 > diff1targ)
return -1; return -1;
if (tmp_hash7 > Htarg) if (tmp_hash7 > Htarg)
return 0; return 0;
return 1; return 1;
} }
void talkcoin_regenhash(struct work *work) void talkcoin_regenhash(struct work *work)
{ {
uint32_t data[20]; uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76); uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash); uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19); be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce); data[19] = htobe32(*nonce);
talkhash(ohash, data); talkhash(ohash, data);
} }
bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
@ -149,28 +152,30 @@ bool scanhash_talkcoin(struct thr_info *thr, const unsigned char __maybe_unused
be32enc_vect(data, (const uint32_t *)pdata, 19); be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1) { while(1)
uint32_t ostate[8]; {
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n); *nonce = ++n;
talkhash(ostate, data); data[19] = (n);
tmp_hash7 = (ostate[7]); talkhash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx",
(long unsigned int)data[7]); applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg)) { if (unlikely(tmp_hash7 <= Htarg))
((uint32_t *)pdata)[19] = htobe32(n); {
*last_nonce = n; ((uint32_t *)pdata)[19] = htobe32(n);
ret = true; *last_nonce = n;
break; ret = true;
} break;
}
if (unlikely((n >= max_nonce) || thr->work_restart)) {
*last_nonce = n; if (unlikely((n >= max_nonce) || thr->work_restart))
break; {
} *last_nonce = n;
break;
}
} }
return ret; return ret;

3
algorithm/talkcoin.h

@ -3,8 +3,7 @@
#include "miner.h" #include "miner.h"
extern int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, extern int talkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
uint32_t nonce);
extern void talkcoin_regenhash(struct work *work); extern void talkcoin_regenhash(struct work *work);
#endif /* TALKCOIN_H */ #endif /* TALKCOIN_H */

528
kernel/talkcoin-mod.cl

@ -33,9 +33,9 @@
#define TALKCOIN_MOD_CL #define TALKCOIN_MOD_CL
#if __ENDIAN_LITTLE__ #if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1 #define SPH_LITTLE_ENDIAN 1
#else #else
#define SPH_BIG_ENDIAN 1 #define SPH_BIG_ENDIAN 1
#endif #endif
#define SPH_UPTR sph_u64 #define SPH_UPTR sph_u64
@ -43,11 +43,11 @@
typedef unsigned int sph_u32; typedef unsigned int sph_u32;
typedef int sph_s32; typedef int sph_s32;
#ifndef __OPENCL_VERSION__ #ifndef __OPENCL_VERSION__
typedef unsigned long long sph_u64; typedef unsigned long long sph_u64;
typedef long long sph_s64; typedef long long sph_s64;
#else #else
typedef unsigned long sph_u64; typedef unsigned long sph_u64;
typedef long sph_s64; typedef long sph_s64;
#endif #endif
#define SPH_64 1 #define SPH_64 1
@ -81,204 +81,219 @@ typedef long sph_s64;
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) #define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
#if SPH_BIG_ENDIAN #if SPH_BIG_ENDIAN
#define DEC64E(x) (x) #define DEC64E(x) (x)
#define DEC64BE(x) (*(const __global sph_u64 *) (x)); #define DEC64BE(x) (*(const __global sph_u64 *) (x));
#else #else
#define DEC64E(x) SWAP8(x) #define DEC64E(x) SWAP8(x)
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
#endif #endif
typedef union { typedef union {
unsigned char h1[64]; unsigned char h1[64];
uint h4[16]; uint h4[16];
ulong h8[8]; ulong h8[8];
} hash_t; } hash_t;
// blake // blake
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global unsigned char* block, __global hash_t* hashes) __kernel void search(__global unsigned char* block, __global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B);
sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1);
sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F);
sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179);
sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0;
sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;;
if ((T0 = SPH_T64(T0 + 1024)) < 1024) if ((T0 = SPH_T64(T0 + 1024)) < 1024)
{ {
T1 = SPH_T64(T1 + 1); T1 = SPH_T64(T1 + 1);
} }
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; sph_u64 M0, M1, M2, M3, M4, M5, M6, M7;
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; sph_u64 M8, M9, MA, MB, MC, MD, ME, MF;
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; sph_u64 V0, V1, V2, V3, V4, V5, V6, V7;
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; sph_u64 V8, V9, VA, VB, VC, VD, VE, VF;
M0 = DEC64BE(block + 0); M0 = DEC64BE(block + 0);
M1 = DEC64BE(block + 8); M1 = DEC64BE(block + 8);
M2 = DEC64BE(block + 16); M2 = DEC64BE(block + 16);
M3 = DEC64BE(block + 24); M3 = DEC64BE(block + 24);
M4 = DEC64BE(block + 32); M4 = DEC64BE(block + 32);
M5 = DEC64BE(block + 40); M5 = DEC64BE(block + 40);
M6 = DEC64BE(block + 48); M6 = DEC64BE(block + 48);
M7 = DEC64BE(block + 56); M7 = DEC64BE(block + 56);
M8 = DEC64BE(block + 64); M8 = DEC64BE(block + 64);
M9 = DEC64BE(block + 72); M9 = DEC64BE(block + 72);
M9 &= 0xFFFFFFFF00000000; M9 &= 0xFFFFFFFF00000000;
M9 ^= SWAP4(gid); M9 ^= SWAP4(gid);
MA = 0x8000000000000000; MA = 0x8000000000000000;
MB = 0; MB = 0;
MC = 0; MC = 0;
MD = 1; MD = 1;
ME = 0; ME = 0;
MF = 0x280; MF = 0x280;
COMPRESS64; COMPRESS64;
hash->h8[0] = H0; hash->h8[0] = H0;
hash->h8[1] = H1; hash->h8[1] = H1;
hash->h8[2] = H2; hash->h8[2] = H2;
hash->h8[3] = H3; hash->h8[3] = H3;
hash->h8[4] = H4; hash->h8[4] = H4;
hash->h8[5] = H5; hash->h8[5] = H5;
hash->h8[6] = H6; hash->h8[6] = H6;
hash->h8[7] = H7; hash->h8[7] = H7;
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
} }
// groestl // groestl
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global hash_t* hashes) __kernel void search1(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
int init = get_local_id(0); int init = get_local_id(0);
int step = get_local_size(0); int step = get_local_size(0);
for (int i = init; i < 256; i += step) for (int i = init; i < 256; i += step)
{ {
T0_L[i] = T0[i]; T0_L[i] = T0[i];
T1_L[i] = T1[i]; T1_L[i] = T1[i];
T2_L[i] = T2[i]; T2_L[i] = T2[i];
T3_L[i] = T3[i]; T3_L[i] = T3[i];
T4_L[i] = T4[i]; T4_L[i] = T4[i];
T5_L[i] = T5[i]; T5_L[i] = T5[i];
T6_L[i] = T6[i]; T6_L[i] = T6[i];
T7_L[i] = T7[i]; T7_L[i] = T7[i];
} }
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
#define T0 T0_L
#define T1 T1_L #define T0 T0_L
#define T2 T2_L #define T1 T1_L
#define T3 T3_L #define T2 T2_L
#define T4 T4_L #define T3 T3_L
#define T5 T5_L #define T4 T4_L
#define T6 T6_L #define T5 T5_L
#define T7 T7_L #define T6 T6_L
#define T7 T7_L
sph_u64 H[16];
for (unsigned int u = 0; u < 15; u ++) sph_u64 H[16];
H[u] = 0;
#if USE_LE for (unsigned int u = 0; u < 15; u ++)
H[u] = 0;
#if USE_LE
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
#else #else
H[15] = (sph_u64)512; H[15] = (sph_u64)512;
#endif #endif
sph_u64 g[16], m[16]; sph_u64 g[16], m[16];
m[0] = DEC64E(hash->h8[0]); m[0] = DEC64E(hash->h8[0]);
m[1] = DEC64E(hash->h8[1]); m[1] = DEC64E(hash->h8[1]);
m[2] = DEC64E(hash->h8[2]); m[2] = DEC64E(hash->h8[2]);
m[3] = DEC64E(hash->h8[3]); m[3] = DEC64E(hash->h8[3]);
m[4] = DEC64E(hash->h8[4]); m[4] = DEC64E(hash->h8[4]);
m[5] = DEC64E(hash->h8[5]); m[5] = DEC64E(hash->h8[5]);
m[6] = DEC64E(hash->h8[6]); m[6] = DEC64E(hash->h8[6]);
m[7] = DEC64E(hash->h8[7]); m[7] = DEC64E(hash->h8[7]);
for (unsigned int u = 0; u < 16; u ++)
g[u] = m[u] ^ H[u]; for (unsigned int u = 0; u < 16; u ++)
m[8] = 0x80; g[8] = m[8] ^ H[8]; g[u] = m[u] ^ H[u];
m[9] = 0; g[9] = m[9] ^ H[9];
m[10] = 0; g[10] = m[10] ^ H[10]; m[8] = 0x80; g[8] = m[8] ^ H[8];
m[11] = 0; g[11] = m[11] ^ H[11]; m[9] = 0; g[9] = m[9] ^ H[9];
m[12] = 0; g[12] = m[12] ^ H[12]; m[10] = 0; g[10] = m[10] ^ H[10];
m[13] = 0; g[13] = m[13] ^ H[13]; m[11] = 0; g[11] = m[11] ^ H[11];
m[14] = 0; g[14] = m[14] ^ H[14]; m[12] = 0; g[12] = m[12] ^ H[12];
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; m[13] = 0; g[13] = m[13] ^ H[13];
PERM_BIG_P(g); m[14] = 0; g[14] = m[14] ^ H[14];
PERM_BIG_Q(m); m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= g[u] ^ m[u]; PERM_BIG_P(g);
sph_u64 xH[16]; PERM_BIG_Q(m);
for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u]; for (unsigned int u = 0; u < 16; u ++)
PERM_BIG_P(xH); H[u] ^= g[u] ^ m[u];
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= xH[u]; sph_u64 xH[16];
for (unsigned int u = 0; u < 8; u ++)
hash->h8[u] = DEC64E(H[u + 8]); for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u];
barrier(CLK_GLOBAL_MEM_FENCE);
PERM_BIG_P(xH);
for (unsigned int u = 0; u < 16; u ++)
H[u] ^= xH[u];
for (unsigned int u = 0; u < 8; u ++)
hash->h8[u] = DEC64E(H[u + 8]);
barrier(CLK_GLOBAL_MEM_FENCE);
} }
// jh // jh
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search2(__global hash_t* hashes) __kernel void search2(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7);
sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b);
sph_u64 tmp; sph_u64 tmp;
for(int i = 0; i < 2; i++)
for(int i = 0; i < 2; i++) {
if (i == 0)
{
h0h ^= DEC64E(hash->h8[0]);
h0l ^= DEC64E(hash->h8[1]);
h1h ^= DEC64E(hash->h8[2]);
h1l ^= DEC64E(hash->h8[3]);
h2h ^= DEC64E(hash->h8[4]);
h2l ^= DEC64E(hash->h8[5]);
h3h ^= DEC64E(hash->h8[6]);
h3l ^= DEC64E(hash->h8[7]);
}
else if(i == 1)
{ {
if (i == 0) { h4h ^= DEC64E(hash->h8[0]);
h0h ^= DEC64E(hash->h8[0]); h4l ^= DEC64E(hash->h8[1]);
h0l ^= DEC64E(hash->h8[1]); h5h ^= DEC64E(hash->h8[2]);
h1h ^= DEC64E(hash->h8[2]); h5l ^= DEC64E(hash->h8[3]);
h1l ^= DEC64E(hash->h8[3]); h6h ^= DEC64E(hash->h8[4]);
h2h ^= DEC64E(hash->h8[4]); h6l ^= DEC64E(hash->h8[5]);
h2l ^= DEC64E(hash->h8[5]); h7h ^= DEC64E(hash->h8[6]);
h3h ^= DEC64E(hash->h8[6]); h7l ^= DEC64E(hash->h8[7]);
h3l ^= DEC64E(hash->h8[7]);
} else if(i == 1) { h0h ^= 0x80;
h4h ^= DEC64E(hash->h8[0]); h3l ^= 0x2000000000000;
h4l ^= DEC64E(hash->h8[1]);
h5h ^= DEC64E(hash->h8[2]);
h5l ^= DEC64E(hash->h8[3]);
h6h ^= DEC64E(hash->h8[4]);
h6l ^= DEC64E(hash->h8[5]);
h7h ^= DEC64E(hash->h8[6]);
h7l ^= DEC64E(hash->h8[7]);
h0h ^= 0x80;
h3l ^= 0x2000000000000;
}
E8;
} }
h4h ^= 0x80; E8;
h7l ^= 0x2000000000000; }
hash->h8[0] = DEC64E(h4h); h4h ^= 0x80;
hash->h8[1] = DEC64E(h4l); h7l ^= 0x2000000000000;
hash->h8[2] = DEC64E(h5h);
hash->h8[3] = DEC64E(h5l); hash->h8[0] = DEC64E(h4h);
hash->h8[4] = DEC64E(h6h); hash->h8[1] = DEC64E(h4l);
hash->h8[5] = DEC64E(h6l); hash->h8[2] = DEC64E(h5h);
hash->h8[6] = DEC64E(h7h); hash->h8[3] = DEC64E(h5l);
hash->h8[7] = DEC64E(h7l); hash->h8[4] = DEC64E(h6h);
hash->h8[5] = DEC64E(h6l);
barrier(CLK_GLOBAL_MEM_FENCE); hash->h8[6] = DEC64E(h7h);
hash->h8[7] = DEC64E(h7l);
barrier(CLK_GLOBAL_MEM_FENCE);
} }
@ -286,48 +301,49 @@ __kernel void search2(__global hash_t* hashes)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search3(__global hash_t* hashes) __kernel void search3(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// keccak // keccak
sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0;
sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0;
sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0;
sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0;
sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0;
a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); a10 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); a20 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); a31 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); a22 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); a23 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); a04 = SPH_C64(0xFFFFFFFFFFFFFFFF);
a00 ^= SWAP8(hash->h8[0]); a00 ^= SWAP8(hash->h8[0]);
a10 ^= SWAP8(hash->h8[1]); a10 ^= SWAP8(hash->h8[1]);
a20 ^= SWAP8(hash->h8[2]); a20 ^= SWAP8(hash->h8[2]);
a30 ^= SWAP8(hash->h8[3]); a30 ^= SWAP8(hash->h8[3]);
a40 ^= SWAP8(hash->h8[4]); a40 ^= SWAP8(hash->h8[4]);
a01 ^= SWAP8(hash->h8[5]); a01 ^= SWAP8(hash->h8[5]);
a11 ^= SWAP8(hash->h8[6]); a11 ^= SWAP8(hash->h8[6]);
a21 ^= SWAP8(hash->h8[7]); a21 ^= SWAP8(hash->h8[7]);
a31 ^= 0x8000000000000001; a31 ^= 0x8000000000000001;
KECCAK_F_1600; KECCAK_F_1600;
// Finalize the "lane complement"
a10 = ~a10; // Finalize the "lane complement"
a20 = ~a20; a10 = ~a10;
a20 = ~a20;
hash->h8[0] = SWAP8(a00);
hash->h8[1] = SWAP8(a10); hash->h8[0] = SWAP8(a00);
hash->h8[2] = SWAP8(a20); hash->h8[1] = SWAP8(a10);
hash->h8[3] = SWAP8(a30); hash->h8[2] = SWAP8(a20);
hash->h8[4] = SWAP8(a40); hash->h8[3] = SWAP8(a30);
hash->h8[5] = SWAP8(a01); hash->h8[4] = SWAP8(a40);
hash->h8[6] = SWAP8(a11); hash->h8[5] = SWAP8(a01);
hash->h8[7] = SWAP8(a21); hash->h8[6] = SWAP8(a11);
hash->h8[7] = SWAP8(a21);
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
} }
@ -335,44 +351,46 @@ __kernel void search3(__global hash_t* hashes)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search4(__global hash_t* hashes, __global uint* output, const ulong target) __kernel void search4(__global hash_t* hashes, __global uint* output, const ulong target)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
uint offset = get_global_offset(0); uint offset = get_global_offset(0);
hash_t hash; hash_t hash;
__global hash_t *hashp = &(hashes[gid-offset]); __global hash_t *hashp = &(hashes[gid-offset]);
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++)
hash.h8[i] = hashes[gid-offset].h8[i]; hash.h8[i] = hashes[gid-offset].h8[i];
}
sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33);
sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); sph_u64 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; sph_u64 bcount = 0;
sph_u64 bcount = 0;
m0 = SWAP8(hash.h8[0]);
m0 = SWAP8(hash.h8[0]); m1 = SWAP8(hash.h8[1]);
m1 = SWAP8(hash.h8[1]); m2 = SWAP8(hash.h8[2]);
m2 = SWAP8(hash.h8[2]); m3 = SWAP8(hash.h8[3]);
m3 = SWAP8(hash.h8[3]); m4 = SWAP8(hash.h8[4]);
m4 = SWAP8(hash.h8[4]); m5 = SWAP8(hash.h8[5]);
m5 = SWAP8(hash.h8[5]); m6 = SWAP8(hash.h8[6]);
m6 = SWAP8(hash.h8[6]); m7 = SWAP8(hash.h8[7]);
m7 = SWAP8(hash.h8[7]);
UBI_BIG(480, 64); UBI_BIG(480, 64);
bcount = 0;
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; bcount = 0;
UBI_BIG(510, 8); m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0;
hash.h8[0] = SWAP8(h0);
hash.h8[1] = SWAP8(h1); UBI_BIG(510, 8);
hash.h8[2] = SWAP8(h2);
hash.h8[3] = SWAP8(h3); hash.h8[0] = SWAP8(h0);
hash.h8[4] = SWAP8(h4); hash.h8[1] = SWAP8(h1);
hash.h8[5] = SWAP8(h5); hash.h8[2] = SWAP8(h2);
hash.h8[6] = SWAP8(h6); hash.h8[3] = SWAP8(h3);
hash.h8[7] = SWAP8(h7); hash.h8[4] = SWAP8(h4);
hash.h8[5] = SWAP8(h5);
bool result = (SWAP8(hash.h8[3]) <= target); hash.h8[6] = SWAP8(h6);
if (result) hash.h8[7] = SWAP8(h7);
output[atomic_inc(output+0xFF)] = SWAP4(gid);
bool result = (SWAP8(hash.h8[3]) <= target);
if (result)
output[atomic_inc(output+0xFF)] = SWAP4(gid);
} }
#endif // TALKCOIN_MOD_CL #endif // TALKCOIN_MOD_CL

Loading…
Cancel
Save