1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-18 18:50:11 +00:00

quark/blake512 opt. pointed by sp without asm

indeed, the pragma unroll doesnt always make things faster

asm part... to check later
This commit is contained in:
Tanguy Pruvot 2014-11-17 00:00:15 +01:00
parent 438308b3a2
commit c859041993
6 changed files with 16 additions and 15 deletions

View File

@ -59,18 +59,19 @@ const uint64_t c_u512[16] =
v[b] = ROTR( v[b] ^ v[c], 11); \ v[b] = ROTR( v[b] ^ v[c], 11); \
} }
__device__ static __device__ __forceinline__
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0) void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
{ {
uint64_t v[16], m[16], i; uint64_t v[16];
uint64_t m[16];
#pragma unroll 16 #pragma unroll
for( i = 0; i < 16; i++) { for(int i=0; i < 16; i++) {
m[i] = cuda_swab64(block[i]); m[i] = cuda_swab64(block[i]);
} }
#pragma unroll 8 //#pragma unroll 8
for (i = 0; i < 8; i++) for(int i=0; i < 8; i++)
v[i] = h[i]; v[i] = h[i];
v[ 8] = u512[0]; v[ 8] = u512[0];
@ -83,7 +84,7 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
v[15] = u512[7]; v[15] = u512[7];
//#pragma unroll 16 //#pragma unroll 16
for( i = 0; i < 16; ++i ) for(int i=0; i < 16; i++)
{ {
/* column step */ /* column step */
G( 0, 4, 8, 12, 0 ); G( 0, 4, 8, 12, 0 );
@ -97,9 +98,14 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
G( 3, 4, 9, 14, 14 ); G( 3, 4, 9, 14, 14 );
} }
#pragma unroll 16 h[0] ^= v[0] ^ v[8];
for( i = 0; i < 16; ++i ) h[1] ^= v[1] ^ v[9];
h[i % 8] ^= v[i]; h[2] ^= v[2] ^ v[10];
h[3] ^= v[3] ^ v[11];
h[4] ^= v[4] ^ v[12];
h[5] ^= v[5] ^ v[13];
h[6] ^= v[6] ^ v[14];
h[7] ^= v[7] ^ v[15];
} }
// Hash-Padding // Hash-Padding

View File

@ -25,7 +25,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads); extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -29,7 +29,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads); extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -32,7 +32,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads); extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -33,7 +33,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads); extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -36,7 +36,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads); extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);