Browse Source

Minor performance tweaks.

master
samr7 14 years ago
parent
commit
30f3787471
  1. 38
      calc_addrs.cl

38
calc_addrs.cl

@ -125,22 +125,21 @@ bn_lshift1(bignum *bn)
void void
bn_rshift(bignum *bn, int shift) bn_rshift(bignum *bn, int shift)
{ {
int i, wd, iws; int i, wd, iws, iwr;
bn_word *op, *ip, ihw, ilw; bn_word ihw, ilw;
iws = (shift & (BN_WBITS-1)); iws = (shift & (BN_WBITS-1));
iwr = BN_WBITS - iws;
wd = (shift >> BN_WSHIFT); wd = (shift >> BN_WSHIFT);
ip = ((bn_word*)bn); ihw = (wd < BN_WBITS) ? bn->d[wd] : 0;
op = ip + wd; #ifdef UNROLL_MAX
wd = BN_NWORDS - wd; #pragma unroll UNROLL_MAX
ihw = ip[0]; #endif
for (i = 1; i < wd; i++) { for (i = 0, wd++; i < (BN_NWORDS-1); i++, wd++) {
ilw = ihw; ilw = ihw;
ihw = ip[i]; ihw = (wd < BN_WBITS) ? bn->d[wd] : 0;
op[i-1] = ((ilw >> iws) | (ihw << (BN_WBITS - iws))); bn->d[i] = (ilw >> iws) | (ihw << iwr);
} }
op[i-1] = (ihw >> iws); bn->d[i] = (ihw >> iws);
while (i < BN_NWORDS)
op[i++] = 0;
} }
void void
@ -253,10 +252,11 @@ bn_uadd_c(bignum *r, bignum *a, __constant bn_word *b)
r = t; \ r = t; \
} while (0) } while (0)
#define bn_subb_word(r, a, b, t, c) do { \ #define bn_subb_word(r, a, b, t, c) do { \
t = a - (b + c); \ t = a - (b + c); \
c = (a < b) ? 1 : (((!a) & c) ? 1 : 0); \ c = (!(a) && c) ? 1 : 0; \
r = t; \ c |= (a < b) ? 1 : 0; \
r = t; \
} while (0) } while (0)
bn_word bn_word
@ -449,6 +449,9 @@ bn_from_mont(bignum *rb, bignum *b)
for (j = 0; j < BN_NWORDS; j++) for (j = 0; j < BN_NWORDS; j++)
bn_subb_word(rb->d[j], r[BN_NWORDS + j], modulus[j], p, c); bn_subb_word(rb->d[j], r[BN_NWORDS + j], modulus[j], p, c);
if (c) { if (c) {
#ifdef UNROLL_MAX
#pragma unroll UNROLL_MAX
#endif
for (j = 0; j < BN_NWORDS; j++) for (j = 0; j < BN_NWORDS; j++)
rb->d[j] = r[BN_NWORDS + j]; rb->d[j] = r[BN_NWORDS + j];
} }
@ -1029,6 +1032,9 @@ heap_invert(__global bignum *z_heap, int ncols)
/* Invert the root, fix up 1/ZR -> R/Z */ /* Invert the root, fix up 1/ZR -> R/Z */
bn_mod_inverse(&z, &z); bn_mod_inverse(&z, &z);
#ifdef UNROLL_MAX
#pragma unroll UNROLL_MAX
#endif
for (i = 0; i < BN_NWORDS; i++) for (i = 0; i < BN_NWORDS; i++)
a.d[i] = mont_rr[i]; a.d[i] = mont_rr[i];
bn_mul_mont(&z, &z, &a); bn_mul_mont(&z, &z, &a);

Loading…
Cancel
Save