1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-18 10:40:14 +00:00

quark/blake512 opt. pointed by sp without asm

indeed, the pragma unroll doesnt always make things faster

asm part... to check later
This commit is contained in:
Tanguy Pruvot 2014-11-17 00:00:15 +01:00
parent 438308b3a2
commit c859041993
6 changed files with 16 additions and 15 deletions

View File

@ -59,18 +59,19 @@ const uint64_t c_u512[16] =
v[b] = ROTR( v[b] ^ v[c], 11); \
}
__device__ static
__device__ __forceinline__
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
{
uint64_t v[16], m[16], i;
uint64_t v[16];
uint64_t m[16];
#pragma unroll 16
for( i = 0; i < 16; i++) {
#pragma unroll
for(int i=0; i < 16; i++) {
m[i] = cuda_swab64(block[i]);
}
#pragma unroll 8
for (i = 0; i < 8; i++)
//#pragma unroll 8
for(int i=0; i < 8; i++)
v[i] = h[i];
v[ 8] = u512[0];
@ -83,7 +84,7 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
v[15] = u512[7];
//#pragma unroll 16
for( i = 0; i < 16; ++i )
for(int i=0; i < 16; i++)
{
/* column step */
G( 0, 4, 8, 12, 0 );
@ -97,9 +98,14 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
G( 3, 4, 9, 14, 14 );
}
#pragma unroll 16
for( i = 0; i < 16; ++i )
h[i % 8] ^= v[i];
h[0] ^= v[0] ^ v[8];
h[1] ^= v[1] ^ v[9];
h[2] ^= v[2] ^ v[10];
h[3] ^= v[3] ^ v[11];
h[4] ^= v[4] ^ v[12];
h[5] ^= v[5] ^ v[13];
h[6] ^= v[6] ^ v[14];
h[7] ^= v[7] ^ v[15];
}
// Hash-Padding

View File

@ -25,7 +25,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -29,7 +29,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -32,7 +32,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -33,7 +33,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);

View File

@ -36,7 +36,6 @@ static uint32_t *d_hash[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);