Browse Source

decred: magic improvement in one line

+ ifdef the 4WAY commented code...
2upstream
Tanguy Pruvot 8 years ago
parent
commit
c0fca5c932
  1. 12
      Algo256/decred.cu

12
Algo256/decred.cu

@ -144,7 +144,7 @@ uint32_t blake256_compress_14(uint32_t *m, uint32_t *v_init, uint32_t d_data6, u
// round 7 // round 7
GSPREC4(0, 4, 0x8, 0xC, 12, 5, 1, 5, 0x9, 0xD, 1, 15, 2, 6, 0xA, 0xE, 14,13, 3, 7, 0xB, 0xF, 4, 10); GSPREC4(0, 4, 0x8, 0xC, 12, 5, 1, 5, 0x9, 0xD, 1, 15, 2, 6, 0xA, 0xE, 14,13, 3, 7, 0xB, 0xF, 4, 10);
GSPREC4(0, 5, 0xA, 0xF, 0, 7, 1, 6, 0xB, 0xC, 6, 3, 2, 7, 0x8, 0xD, 9, 2, 3, 4, 0x9, 0xE, 8, 11); GSPREC4(0, 5, 0xA, 0xF, 0, 7, 1, 6, 0xB, 0xC, 6, 3, 2, 7, 0x8, 0xD, 9, 2, 3, 4, 0x9, 0xE, 8, 11);
/* #ifdef FULL_4WAY
// round 8 // round 8
GSPREC4(0, 4, 0x8, 0xC, 13,11, 1, 5, 0x9, 0xD, 7, 14, 2, 6, 0xA, 0xE, 12, 1, 3, 7, 0xB, 0xF, 3, 9); GSPREC4(0, 4, 0x8, 0xC, 13,11, 1, 5, 0x9, 0xD, 7, 14, 2, 6, 0xA, 0xE, 12, 1, 3, 7, 0xB, 0xF, 3, 9);
GSPREC4(0, 5, 0xA, 0xF, 5, 0, 1, 6, 0xB, 0xC, 15, 4, 2, 7, 0x8, 0xD, 8, 6, 3, 4, 0x9, 0xE, 2, 10); GSPREC4(0, 5, 0xA, 0xF, 5, 0, 1, 6, 0xB, 0xC, 15, 4, 2, 7, 0x8, 0xD, 8, 6, 3, 4, 0x9, 0xE, 2, 10);
@ -163,7 +163,7 @@ uint32_t blake256_compress_14(uint32_t *m, uint32_t *v_init, uint32_t d_data6, u
// round 13 // round 13
GSPREC4(0, 4, 0x8, 0xC, 11, 8, 1, 5, 0x9, 0xD, 12, 0, 2, 6, 0xA, 0xE, 5, 2, 3, 7, 0xB, 0xF, 15,13); GSPREC4(0, 4, 0x8, 0xC, 11, 8, 1, 5, 0x9, 0xD, 12, 0, 2, 6, 0xA, 0xE, 5, 2, 3, 7, 0xB, 0xF, 15,13);
GSPREC4(0, 5, 0xA, 0xF, 10,14, 1, 6, 0xB, 0xC, 3, 6, 2, 7, 0x8, 0xD, 7, 1, 3, 4, 0x9, 0xE, 9, 4); GSPREC4(0, 5, 0xA, 0xF, 10,14, 1, 6, 0xB, 0xC, 3, 6, 2, 7, 0x8, 0xD, 7, 1, 3, 4, 0x9, 0xE, 9, 4);
*/ #else
// round 8 // round 8
GSPREC(0, 4, 0x8, 0xC, 13,11); GSPREC(0, 4, 0x8, 0xC, 13,11);
GSPREC(1, 5, 0x9, 0xD, 7, 14); GSPREC(1, 5, 0x9, 0xD, 7, 14);
@ -218,6 +218,7 @@ uint32_t blake256_compress_14(uint32_t *m, uint32_t *v_init, uint32_t d_data6, u
GSPREC(1, 6, 0xB, 0xC, 3, 6); GSPREC(1, 6, 0xB, 0xC, 3, 6);
GSPREC(2, 7, 0x8, 0xD, 7, 1); GSPREC(2, 7, 0x8, 0xD, 7, 1);
GSPREC(3, 4, 0x9, 0xE, 9, 4); GSPREC(3, 4, 0x9, 0xE, 9, 4);
#endif
// round 14 // round 14
GSPREC(0, 4, 0x8, 0xC, 7, 9); GSPREC(0, 4, 0x8, 0xC, 7, 9);
GSPREC(1, 5, 0x9, 0xD, 3, 1); GSPREC(1, 5, 0x9, 0xD, 3, 1);
@ -279,12 +280,17 @@ void blake256_gpu_hash_nonce(const uint32_t threads, const uint32_t startNonce,
#else #else
resNonce[0] = m[3]; resNonce[0] = m[3];
#endif #endif
// from alexis78:
// return statement allows CUDA7.5 to :
// 1. Store the values fetched from constant memory in registers.
// 2. Perform more precomputations on the outside of the for loop.
// 3. Stop the continuous fetches from the constant memory while iterating
return;
} }
} }
} }
} }
__host__ __host__
static uint32_t decred_cpu_hash_nonce(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget) static uint32_t decred_cpu_hash_nonce(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget)
{ {

Loading…
Cancel
Save