diff --git a/Makefile.am b/Makefile.am index 6a24a21b..cb7bf850 100644 --- a/Makefile.am +++ b/Makefile.am @@ -24,6 +24,7 @@ cgminer_SOURCES = elist.h miner.h compat.h bench_block.h \ sha256_generic.c sha256_4way.c sha256_via.c \ sha256_cryptopp.c sha256_sse2_amd64.c \ sha256_sse4_amd64.c sha256_sse2_i386.c \ + sha256_altivec_4way.c \ adl.c adl.h adl_functions.h \ phatk110817.cl poclbm110817.cl \ sha2.c sha2.h diff --git a/configure.ac b/configure.ac index bf9acbb4..333c490d 100644 --- a/configure.ac +++ b/configure.ac @@ -83,6 +83,11 @@ case $target in DLOPEN_FLAGS="" WS2_LIBS="-lws2_32" ;; + powerpc-*-darwin*) + CFLAGS="$CFLAGS -faltivec" + OPENCL_LIBS="" + PTHREAD_FLAGS="" + ;; *-*-darwin*) OPENCL_LIBS="-framework OpenCL" ;; diff --git a/main.c b/main.c index 91197d41..28436799 100644 --- a/main.c +++ b/main.c @@ -106,6 +106,7 @@ enum sha256_algos { ALGO_SSE2_32, /* SSE2 for x86_32 */ ALGO_SSE2_64, /* SSE2 for x86_64 */ ALGO_SSE4_64, /* SSE4 for x86_64 */ + ALGO_ALTIVEC_4WAY, /* parallel Altivec */ }; enum pool_strategy { @@ -149,6 +150,9 @@ static const char *algo_names[] = { #ifdef WANT_X8664_SSE4 [ALGO_SSE4_64] = "sse4_64", #endif +#ifdef WANT_ALTIVEC_4WAY + [ALGO_ALTIVEC_4WAY] = "altivec_4way", +#endif }; typedef void (*sha256_func)(); @@ -157,6 +161,9 @@ static const sha256_func sha256_funcs[] = { #ifdef WANT_SSE2_4WAY [ALGO_4WAY] = (sha256_func)ScanHash_4WaySSE2, #endif +#ifdef WANT_ALTIVEC_4WAY + [ALGO_ALTIVEC_4WAY] = (sha256_func) ScanHash_altivec_4way, +#endif #ifdef WANT_VIA_PADLOCK [ALGO_VIA] = (sha256_func)scanhash_via, #endif @@ -896,6 +903,10 @@ static enum sha256_algos pick_fastest_algo() bench_algo(&best_rate, &best_algo, ALGO_SSE4_64); #endif + #if defined(WANT_ALTIVEC_4WAY) + bench_algo(&best_rate, &best_algo, ALGO_ALTIVEC_4WAY); + #endif + size_t n = max_name_len - strlen(algo_names[best_algo]); memset(name_spaces_pad, ' ', n); name_spaces_pad[n] = 0; @@ -1451,6 +1462,9 @@ static struct opt_table opt_config_table[] = { #endif #ifdef WANT_X8664_SSE4 "\n\tsse4_64\t\tSSE4.1 64 bit implementation for x86_64 machines" +#endif +#ifdef WANT_ALTIVEC_4WAY + "\n\taltivec_4way\tAltivec implementation for PowerPC G4 and G5 machines" #endif ), #ifdef HAVE_ADL @@ -1787,6 +1801,19 @@ static bool work_decode(const json_t *val, struct work *work) } memset(work->hash, 0, sizeof(work->hash)); + +#ifdef __BIG_ENDIAN__ + int swapcounter = 0; + for (swapcounter = 0; swapcounter < 32; swapcounter++) + (((uint32_t*) (work->data))[swapcounter]) = swab32(((uint32_t*) (work->data))[swapcounter]); + for (swapcounter = 0; swapcounter < 16; swapcounter++) + (((uint32_t*) (work->hash1))[swapcounter]) = swab32(((uint32_t*) (work->hash1))[swapcounter]); + for (swapcounter = 0; swapcounter < 8; swapcounter++) + (((uint32_t*) (work->midstate))[swapcounter]) = swab32(((uint32_t*) (work->midstate))[swapcounter]); + for (swapcounter = 0; swapcounter < 8; swapcounter++) + (((uint32_t*) (work->target))[swapcounter]) = swab32(((uint32_t*) (work->target))[swapcounter]); +#endif + gettimeofday(&work->tv_staged, NULL); return true; @@ -2141,6 +2168,12 @@ static bool submit_upstream_work(const struct work *work) return rc; } +#ifdef __BIG_ENDIAN__ + int swapcounter = 0; + for (swapcounter = 0; swapcounter < 32; swapcounter++) + (((uint32_t*) (work->data))[swapcounter]) = swab32(((uint32_t*) (work->data))[swapcounter]); +#endif + /* build hex string */ hexstr = bin2hex(work->data, sizeof(work->data)); if (unlikely(!hexstr)) { @@ -4081,6 +4114,19 @@ static void *miner_thread(void *userdata) break; #endif +#ifdef WANT_ALTIVEC_4WAY + case ALGO_ALTIVEC_4WAY: + { + unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64, + work->hash1, work->hash, + work->target, + max_nonce, &hashes_done, + work->blk.nonce); + rc = (rc4 == -1) ? false : true; + } + break; +#endif + #ifdef WANT_VIA_PADLOCK case ALGO_VIA: rc = scanhash_via(thr_id, work->data, work->target, diff --git a/miner.h b/miner.h index c21f1893..f187268d 100644 --- a/miner.h +++ b/miner.h @@ -64,6 +64,10 @@ void *alloca (size_t); #define WANT_SSE2_4WAY 1 #endif +#ifdef __ALTIVEC__ +#define WANT_ALTIVEC_4WAY 1 +#endif + #if defined(__i386__) && defined(HAS_YASM) && defined(__SSE2__) #define WANT_X8632_SSE2 1 #endif @@ -329,6 +333,12 @@ extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate, const unsigned char *ptarget, uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce); +extern unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, + unsigned char *pdata, + unsigned char *phash1, unsigned char *phash, + const unsigned char *ptarget, + uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce); + extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, @@ -342,14 +352,17 @@ extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, uint32_t max_nonce, unsigned long *hashes_done, uint32_t n); + extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, uint32_t max_nonce, unsigned long *hashes_done, uint32_t n); + extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, uint32_t max_nonce, unsigned long *hashes_done, uint32_t nonce); + extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, diff --git a/sha256_altivec_4way.c b/sha256_altivec_4way.c new file mode 100644 index 00000000..41dc5a09 --- /dev/null +++ b/sha256_altivec_4way.c @@ -0,0 +1,469 @@ +// Copyright (c) 2010 Satoshi Nakamoto +// Copyright (c) 2011 Gilles Risch +// Distributed under the MIT/X11 software license, see the accompanying +// file license.txt or http://www.opensource.org/licenses/mit-license.php. + + +// 4-way 128-bit Altivec SHA-256, +// based on tcatm's 4-way 128-bit SSE2 SHA-256 +// + + +//#include "config.h" +#include "miner.h" + +#ifdef WANT_ALTIVEC_4WAY + +#include +#include + +//#include +#include +#include + +#define NPAR 32 + +static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2); + +static const unsigned int sha256_consts[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + + +static inline vector unsigned int Ch(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) { + return vec_sel(d,c,b); +} + +static inline vector unsigned int Maj(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) { + return vec_sel(b,c, vec_xor(b,d)); +} + +/* RotateRight(x, n) := RotateLeft(x, 32-n) */ +/* SHA256 Functions */ +#define BIGSIGMA0_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-2)),vec_rl((x), (vector unsigned int)(32-13))),vec_rl((x), (vector unsigned int)(32-22)))) +#define BIGSIGMA1_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-6)),vec_rl((x), (vector unsigned int)(32-11))),vec_rl((x), (vector unsigned int)(32-25)))) + +#define SIGMA0_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32- 7)),vec_rl((x), (vector unsigned int)(32-18))), vec_sr((x), (vector unsigned int)(3 )))) +#define SIGMA1_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-17)),vec_rl((x), (vector unsigned int)(32-19))), vec_sr((x), (vector unsigned int)(10)))) + +#define add4(x0, x1, x2, x3) vec_add(vec_add(x0, x1),vec_add( x2,x3)) +#define add5(x0, x1, x2, x3, x4) vec_add(add4(x0, x1, x2, x3), x4) + +#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \ + T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), (vector unsigned int)(sha256_consts[i],sha256_consts[i],sha256_consts[i],sha256_consts[i]), w); \ + d = vec_add(d, T1); \ + h = vec_add(T1, vec_add(BIGSIGMA0_256(a), Maj(a, b, c))); + + +static const unsigned int pSHA256InitState[8] = +{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; + + +unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, + unsigned char *pdata, + unsigned char *phash1, unsigned char *phash, + const unsigned char *ptarget, + uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t nonce) +{ + unsigned int *nNonce_p = (unsigned int*)(pdata + 12); + + work_restart[thr_id].restart = 0; + + for (;;) + { + unsigned int thash[9][NPAR] __attribute__((aligned(128))); + int j; + + *nNonce_p = nonce; + + DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState); + + for (j = 0; j < NPAR; j++) + { + if (unlikely(thash[7][j] == 0)) + { + int i; + + for (i = 0; i < 32/4; i++) + ((unsigned int*)phash)[i] = thash[i][j]; + + if (fulltest(phash, ptarget)) { + *nHashesDone = nonce; + *nNonce_p = nonce + j; + return nonce + j; + } + } + } + + if ((nonce >= max_nonce) || work_restart[thr_id].restart) + { + *nHashesDone = nonce; + return -1; + } + + nonce += NPAR; + } +} + + +static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init) +{ + unsigned int* In = (unsigned int*)pin; + unsigned int* Pad = (unsigned int*)pad; + unsigned int* hPre = (unsigned int*)pre; + unsigned int* hInit = (unsigned int*)init; + unsigned int /* i, j, */ k; + + /* vectors used in calculation */ + vector unsigned int w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; + vector unsigned int T1; + vector unsigned int a, b, c, d, e, f, g, h; + vector unsigned int nonce, preNonce; + + /* nonce offset for vector */ + vector unsigned int offset = (vector unsigned int)(0, 1, 2, 3); + + preNonce = vec_add((vector unsigned int)(In[3],In[3],In[3],In[3]), offset); + + for(k = 0; k try if it´s faster to compare the results with the target inside this function */ + } + +} + +#endif /* WANT_ALTIVEC_4WAY */ +