mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-18 18:50:11 +00:00
quark/blake512 opt. pointed by sp without asm
indeed, the pragma unroll doesnt always make things faster asm part... to check later
This commit is contained in:
parent
438308b3a2
commit
c859041993
@ -59,18 +59,19 @@ const uint64_t c_u512[16] =
|
|||||||
v[b] = ROTR( v[b] ^ v[c], 11); \
|
v[b] = ROTR( v[b] ^ v[c], 11); \
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ static
|
__device__ __forceinline__
|
||||||
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
|
void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t ((*sigma)[16]), const uint64_t *u512, const int T0)
|
||||||
{
|
{
|
||||||
uint64_t v[16], m[16], i;
|
uint64_t v[16];
|
||||||
|
uint64_t m[16];
|
||||||
|
|
||||||
#pragma unroll 16
|
#pragma unroll
|
||||||
for( i = 0; i < 16; i++) {
|
for(int i=0; i < 16; i++) {
|
||||||
m[i] = cuda_swab64(block[i]);
|
m[i] = cuda_swab64(block[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll 8
|
//#pragma unroll 8
|
||||||
for (i = 0; i < 8; i++)
|
for(int i=0; i < 8; i++)
|
||||||
v[i] = h[i];
|
v[i] = h[i];
|
||||||
|
|
||||||
v[ 8] = u512[0];
|
v[ 8] = u512[0];
|
||||||
@ -83,7 +84,7 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
|
|||||||
v[15] = u512[7];
|
v[15] = u512[7];
|
||||||
|
|
||||||
//#pragma unroll 16
|
//#pragma unroll 16
|
||||||
for( i = 0; i < 16; ++i )
|
for(int i=0; i < 16; i++)
|
||||||
{
|
{
|
||||||
/* column step */
|
/* column step */
|
||||||
G( 0, 4, 8, 12, 0 );
|
G( 0, 4, 8, 12, 0 );
|
||||||
@ -97,9 +98,14 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t
|
|||||||
G( 3, 4, 9, 14, 14 );
|
G( 3, 4, 9, 14, 14 );
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll 16
|
h[0] ^= v[0] ^ v[8];
|
||||||
for( i = 0; i < 16; ++i )
|
h[1] ^= v[1] ^ v[9];
|
||||||
h[i % 8] ^= v[i];
|
h[2] ^= v[2] ^ v[10];
|
||||||
|
h[3] ^= v[3] ^ v[11];
|
||||||
|
h[4] ^= v[4] ^ v[12];
|
||||||
|
h[5] ^= v[5] ^ v[13];
|
||||||
|
h[6] ^= v[6] ^ v[14];
|
||||||
|
h[7] ^= v[7] ^ v[15];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hash-Padding
|
// Hash-Padding
|
||||||
|
@ -25,7 +25,6 @@ static uint32_t *d_hash[8];
|
|||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
||||||
|
|
||||||
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
@ -29,7 +29,6 @@ static uint32_t *d_hash[8];
|
|||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
||||||
|
|
||||||
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
@ -32,7 +32,6 @@ static uint32_t *d_hash[8];
|
|||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
||||||
|
|
||||||
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
@ -33,7 +33,6 @@ static uint32_t *d_hash[8];
|
|||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
||||||
|
|
||||||
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
@ -36,7 +36,6 @@ static uint32_t *d_hash[8];
|
|||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
extern void quark_blake512_cpu_setBlock_80(void *pdata);
|
||||||
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
|
||||||
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
|
||||||
|
|
||||||
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
extern void quark_bmw512_cpu_init(int thr_id, int threads);
|
||||||
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user