You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1141 lines
32 KiB
1141 lines
32 KiB
|
|
/*-------------------------------------------------------------*/ |
|
/*--- Block sorting machinery ---*/ |
|
/*--- blocksort.c ---*/ |
|
/*-------------------------------------------------------------*/ |
|
|
|
/*-- |
|
This file is a part of bzip2 and/or libbzip2, a program and |
|
library for lossless, block-sorting data compression. |
|
|
|
Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
|
|
|
Redistribution and use in source and binary forms, with or without |
|
modification, are permitted provided that the following conditions |
|
are met: |
|
|
|
1. Redistributions of source code must retain the above copyright |
|
notice, this list of conditions and the following disclaimer. |
|
|
|
2. The origin of this software must not be misrepresented; you must |
|
not claim that you wrote the original software. If you use this |
|
software in a product, an acknowledgment in the product |
|
documentation would be appreciated but is not required. |
|
|
|
3. Altered source versions must be plainly marked as such, and must |
|
not be misrepresented as being the original software. |
|
|
|
4. The name of the author may not be used to endorse or promote |
|
products derived from this software without specific prior written |
|
permission. |
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS |
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
Julian Seward, Cambridge, UK. |
|
jseward@acm.org |
|
bzip2/libbzip2 version 1.0 of 21 March 2000 |
|
|
|
This program is based on (at least) the work of: |
|
Mike Burrows |
|
David Wheeler |
|
Peter Fenwick |
|
Alistair Moffat |
|
Radford Neal |
|
Ian H. Witten |
|
Robert Sedgewick |
|
Jon L. Bentley |
|
|
|
For more information on these sources, see the manual. |
|
|
|
To get some idea how the block sorting algorithms in this file |
|
work, read my paper |
|
On the Performance of BWT Sorting Algorithms |
|
in Proceedings of the IEEE Data Compression Conference 2000, |
|
Snowbird, Utah, USA, 27-30 March 2000. The main sort in this |
|
file implements the algorithm called cache in the paper. |
|
--*/ |
|
|
|
|
|
#include "bzlib_private.h" |
|
|
|
/*---------------------------------------------*/ |
|
/*--- Fallback O(N log(N)^2) sorting ---*/ |
|
/*--- algorithm, for repetitive blocks ---*/ |
|
/*---------------------------------------------*/ |
|
|
|
/*---------------------------------------------*/ |
|
static |
|
__inline__ |
|
void fallbackSimpleSort ( UInt32* fmap, |
|
UInt32* eclass, |
|
Int32 lo, |
|
Int32 hi ) |
|
{ |
|
Int32 i, j, tmp; |
|
UInt32 ec_tmp; |
|
|
|
if (lo == hi) return; |
|
|
|
if (hi - lo > 3) { |
|
for ( i = hi-4; i >= lo; i-- ) { |
|
tmp = fmap[i]; |
|
ec_tmp = eclass[tmp]; |
|
for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) |
|
fmap[j-4] = fmap[j]; |
|
fmap[j-4] = tmp; |
|
} |
|
} |
|
|
|
for ( i = hi-1; i >= lo; i-- ) { |
|
tmp = fmap[i]; |
|
ec_tmp = eclass[tmp]; |
|
for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) |
|
fmap[j-1] = fmap[j]; |
|
fmap[j-1] = tmp; |
|
} |
|
} |
|
|
|
|
|
/*---------------------------------------------*/ |
|
#define fswap(zz1, zz2) \ |
|
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } |
|
|
|
#define fvswap(zzp1, zzp2, zzn) \ |
|
{ \ |
|
Int32 yyp1 = (zzp1); \ |
|
Int32 yyp2 = (zzp2); \ |
|
Int32 yyn = (zzn); \ |
|
while (yyn > 0) { \ |
|
fswap(fmap[yyp1], fmap[yyp2]); \ |
|
yyp1++; yyp2++; yyn--; \ |
|
} \ |
|
} |
|
|
|
|
|
#define fmin(a,b) ((a) < (b)) ? (a) : (b) |
|
|
|
#define fpush(lz,hz) { stackLo[sp] = lz; \ |
|
stackHi[sp] = hz; \ |
|
sp++; } |
|
|
|
#define fpop(lz,hz) { sp--; \ |
|
lz = stackLo[sp]; \ |
|
hz = stackHi[sp]; } |
|
|
|
#define FALLBACK_QSORT_SMALL_THRESH 10 |
|
#define FALLBACK_QSORT_STACK_SIZE 100 |
|
|
|
|
|
static |
|
void fallbackQSort3 ( UInt32* fmap, |
|
UInt32* eclass, |
|
Int32 loSt, |
|
Int32 hiSt ) |
|
{ |
|
Int32 unLo, unHi, ltLo, gtHi, n, m; |
|
Int32 sp, lo, hi; |
|
UInt32 med, r, r3; |
|
Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; |
|
Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; |
|
|
|
r = 0; |
|
|
|
sp = 0; |
|
fpush ( loSt, hiSt ); |
|
|
|
while (sp > 0) { |
|
|
|
AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); |
|
|
|
fpop ( lo, hi ); |
|
if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { |
|
fallbackSimpleSort ( fmap, eclass, lo, hi ); |
|
continue; |
|
} |
|
|
|
/* Random partitioning. Median of 3 sometimes fails to |
|
avoid bad cases. Median of 9 seems to help but |
|
looks rather expensive. This too seems to work but |
|
is cheaper. Guidance for the magic constants |
|
7621 and 32768 is taken from Sedgewick's algorithms |
|
book, chapter 35. |
|
*/ |
|
r = ((r * 7621) + 1) % 32768; |
|
r3 = r % 3; |
|
if (r3 == 0) med = eclass[fmap[lo]]; else |
|
if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else |
|
med = eclass[fmap[hi]]; |
|
|
|
unLo = ltLo = lo; |
|
unHi = gtHi = hi; |
|
|
|
while (1) { |
|
while (1) { |
|
if (unLo > unHi) break; |
|
n = (Int32)eclass[fmap[unLo]] - (Int32)med; |
|
if (n == 0) { |
|
fswap(fmap[unLo], fmap[ltLo]); |
|
ltLo++; unLo++; |
|
continue; |
|
}; |
|
if (n > 0) break; |
|
unLo++; |
|
} |
|
while (1) { |
|
if (unLo > unHi) break; |
|
n = (Int32)eclass[fmap[unHi]] - (Int32)med; |
|
if (n == 0) { |
|
fswap(fmap[unHi], fmap[gtHi]); |
|
gtHi--; unHi--; |
|
continue; |
|
}; |
|
if (n < 0) break; |
|
unHi--; |
|
} |
|
if (unLo > unHi) break; |
|
fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; |
|
} |
|
|
|
AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); |
|
|
|
if (gtHi < ltLo) continue; |
|
|
|
n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); |
|
m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); |
|
|
|
n = lo + unLo - ltLo - 1; |
|
m = hi - (gtHi - unHi) + 1; |
|
|
|
if (n - lo > hi - m) { |
|
fpush ( lo, n ); |
|
fpush ( m, hi ); |
|
} else { |
|
fpush ( m, hi ); |
|
fpush ( lo, n ); |
|
} |
|
} |
|
} |
|
|
|
#undef fmin |
|
#undef fpush |
|
#undef fpop |
|
#undef fswap |
|
#undef fvswap |
|
#undef FALLBACK_QSORT_SMALL_THRESH |
|
#undef FALLBACK_QSORT_STACK_SIZE |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/* Pre: |
|
nblock > 0 |
|
eclass exists for [0 .. nblock-1] |
|
((UChar*)eclass) [0 .. nblock-1] holds block |
|
ptr exists for [0 .. nblock-1] |
|
|
|
Post: |
|
((UChar*)eclass) [0 .. nblock-1] holds block |
|
All other areas of eclass destroyed |
|
fmap [0 .. nblock-1] holds sorted order |
|
bhtab [ 0 .. 2+(nblock/32) ] destroyed |
|
*/ |
|
|
|
#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) |
|
#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) |
|
#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) |
|
#define WORD_BH(zz) bhtab[(zz) >> 5] |
|
#define UNALIGNED_BH(zz) ((zz) & 0x01f) |
|
|
|
static |
|
void fallbackSort ( UInt32* fmap, |
|
UInt32* eclass, |
|
UInt32* bhtab, |
|
Int32 nblock, |
|
Int32 verb ) |
|
{ |
|
Int32 ftab[257]; |
|
Int32 ftabCopy[256]; |
|
Int32 H, i, j, k, l, r, cc, cc1; |
|
Int32 nNotDone; |
|
Int32 nBhtab; |
|
UChar* eclass8 = (UChar*)eclass; |
|
|
|
/*-- |
|
Initial 1-char radix sort to generate |
|
initial fmap and initial BH bits. |
|
--*/ |
|
if (verb >= 4) |
|
VPrintf0 ( " bucket sorting ...\n" ); |
|
for (i = 0; i < 257; i++) ftab[i] = 0; |
|
for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; |
|
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; |
|
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; |
|
|
|
for (i = 0; i < nblock; i++) { |
|
j = eclass8[i]; |
|
k = ftab[j] - 1; |
|
ftab[j] = k; |
|
fmap[k] = i; |
|
} |
|
|
|
nBhtab = 2 + (nblock / 32); |
|
for (i = 0; i < nBhtab; i++) bhtab[i] = 0; |
|
for (i = 0; i < 256; i++) SET_BH(ftab[i]); |
|
|
|
/*-- |
|
Inductively refine the buckets. Kind-of an |
|
"exponential radix sort" (!), inspired by the |
|
Manber-Myers suffix array construction algorithm. |
|
--*/ |
|
|
|
/*-- set sentinel bits for block-end detection --*/ |
|
for (i = 0; i < 32; i++) { |
|
SET_BH(nblock + 2*i); |
|
CLEAR_BH(nblock + 2*i + 1); |
|
} |
|
|
|
/*-- the log(N) loop --*/ |
|
H = 1; |
|
while (1) { |
|
|
|
if (verb >= 4) |
|
VPrintf1 ( " depth %6d has ", H ); |
|
|
|
j = 0; |
|
for (i = 0; i < nblock; i++) { |
|
if (ISSET_BH(i)) j = i; |
|
k = fmap[i] - H; if (k < 0) k += nblock; |
|
eclass[k] = j; |
|
} |
|
|
|
nNotDone = 0; |
|
r = -1; |
|
while (1) { |
|
|
|
/*-- find the next non-singleton bucket --*/ |
|
k = r + 1; |
|
while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; |
|
if (ISSET_BH(k)) { |
|
while (WORD_BH(k) == 0xffffffff) k += 32; |
|
while (ISSET_BH(k)) k++; |
|
} |
|
l = k - 1; |
|
if (l >= nblock) break; |
|
while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; |
|
if (!ISSET_BH(k)) { |
|
while (WORD_BH(k) == 0x00000000) k += 32; |
|
while (!ISSET_BH(k)) k++; |
|
} |
|
r = k - 1; |
|
if (r >= nblock) break; |
|
|
|
/*-- now [l, r] bracket current bucket --*/ |
|
if (r > l) { |
|
nNotDone += (r - l + 1); |
|
fallbackQSort3 ( fmap, eclass, l, r ); |
|
|
|
/*-- scan bucket and generate header bits-- */ |
|
cc = -1; |
|
for (i = l; i <= r; i++) { |
|
cc1 = eclass[fmap[i]]; |
|
if (cc != cc1) { SET_BH(i); cc = cc1; }; |
|
} |
|
} |
|
} |
|
|
|
if (verb >= 4) |
|
VPrintf1 ( "%6d unresolved strings\n", nNotDone ); |
|
|
|
H *= 2; |
|
if (H > nblock || nNotDone == 0) break; |
|
} |
|
|
|
/*-- |
|
Reconstruct the original block in |
|
eclass8 [0 .. nblock-1], since the |
|
previous phase destroyed it. |
|
--*/ |
|
if (verb >= 4) |
|
VPrintf0 ( " reconstructing block ...\n" ); |
|
j = 0; |
|
for (i = 0; i < nblock; i++) { |
|
while (ftabCopy[j] == 0) j++; |
|
ftabCopy[j]--; |
|
eclass8[fmap[i]] = (UChar)j; |
|
} |
|
AssertH ( j < 256, 1005 ); |
|
} |
|
|
|
#undef SET_BH |
|
#undef CLEAR_BH |
|
#undef ISSET_BH |
|
#undef WORD_BH |
|
#undef UNALIGNED_BH |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/*--- The main, O(N^2 log(N)) sorting ---*/ |
|
/*--- algorithm. Faster for "normal" ---*/ |
|
/*--- non-repetitive blocks. ---*/ |
|
/*---------------------------------------------*/ |
|
|
|
/*---------------------------------------------*/ |
|
static |
|
__inline__ |
|
Bool mainGtU ( UInt32 i1, |
|
UInt32 i2, |
|
UChar* block, |
|
UInt16* quadrant, |
|
UInt32 nblock, |
|
Int32* budget ) |
|
{ |
|
Int32 k; |
|
UChar c1, c2; |
|
UInt16 s1, s2; |
|
|
|
AssertD ( i1 != i2, "mainGtU" ); |
|
/* 1 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 2 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 3 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 4 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 5 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 6 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 7 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 8 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 9 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 10 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 11 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
/* 12 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
i1++; i2++; |
|
|
|
k = nblock + 8; |
|
|
|
do { |
|
/* 1 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 2 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 3 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 4 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 5 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 6 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 7 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
/* 8 */ |
|
c1 = block[i1]; c2 = block[i2]; |
|
if (c1 != c2) return (c1 > c2); |
|
s1 = quadrant[i1]; s2 = quadrant[i2]; |
|
if (s1 != s2) return (s1 > s2); |
|
i1++; i2++; |
|
|
|
if (i1 >= nblock) i1 -= nblock; |
|
if (i2 >= nblock) i2 -= nblock; |
|
|
|
k -= 8; |
|
(*budget)--; |
|
} |
|
while (k >= 0); |
|
|
|
return False; |
|
} |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/*-- |
|
Knuth's increments seem to work better |
|
than Incerpi-Sedgewick here. Possibly |
|
because the number of elems to sort is |
|
usually small, typically <= 20. |
|
--*/ |
|
static |
|
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, |
|
9841, 29524, 88573, 265720, |
|
797161, 2391484 }; |
|
|
|
static |
|
void mainSimpleSort ( UInt32* ptr, |
|
UChar* block, |
|
UInt16* quadrant, |
|
Int32 nblock, |
|
Int32 lo, |
|
Int32 hi, |
|
Int32 d, |
|
Int32* budget ) |
|
{ |
|
Int32 i, j, h, bigN, hp; |
|
UInt32 v; |
|
|
|
bigN = hi - lo + 1; |
|
if (bigN < 2) return; |
|
|
|
hp = 0; |
|
while (incs[hp] < bigN) hp++; |
|
hp--; |
|
|
|
for (; hp >= 0; hp--) { |
|
h = incs[hp]; |
|
|
|
i = lo + h; |
|
while (True) { |
|
|
|
/*-- copy 1 --*/ |
|
if (i > hi) break; |
|
v = ptr[i]; |
|
j = i; |
|
while ( mainGtU ( |
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
|
) ) { |
|
ptr[j] = ptr[j-h]; |
|
j = j - h; |
|
if (j <= (lo + h - 1)) break; |
|
} |
|
ptr[j] = v; |
|
i++; |
|
|
|
/*-- copy 2 --*/ |
|
if (i > hi) break; |
|
v = ptr[i]; |
|
j = i; |
|
while ( mainGtU ( |
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
|
) ) { |
|
ptr[j] = ptr[j-h]; |
|
j = j - h; |
|
if (j <= (lo + h - 1)) break; |
|
} |
|
ptr[j] = v; |
|
i++; |
|
|
|
/*-- copy 3 --*/ |
|
if (i > hi) break; |
|
v = ptr[i]; |
|
j = i; |
|
while ( mainGtU ( |
|
ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
|
) ) { |
|
ptr[j] = ptr[j-h]; |
|
j = j - h; |
|
if (j <= (lo + h - 1)) break; |
|
} |
|
ptr[j] = v; |
|
i++; |
|
|
|
if (*budget < 0) return; |
|
} |
|
} |
|
} |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/*-- |
|
The following is an implementation of |
|
an elegant 3-way quicksort for strings, |
|
described in a paper "Fast Algorithms for |
|
Sorting and Searching Strings", by Robert |
|
Sedgewick and Jon L. Bentley. |
|
--*/ |
|
|
|
#define mswap(zz1, zz2) \ |
|
{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } |
|
|
|
#define mvswap(zzp1, zzp2, zzn) \ |
|
{ \ |
|
Int32 yyp1 = (zzp1); \ |
|
Int32 yyp2 = (zzp2); \ |
|
Int32 yyn = (zzn); \ |
|
while (yyn > 0) { \ |
|
mswap(ptr[yyp1], ptr[yyp2]); \ |
|
yyp1++; yyp2++; yyn--; \ |
|
} \ |
|
} |
|
|
|
static |
|
__inline__ |
|
UChar mmed3 ( UChar a, UChar b, UChar c ) |
|
{ |
|
UChar t; |
|
if (a > b) { t = a; a = b; b = t; }; |
|
if (b > c) { |
|
b = c; |
|
if (a > b) b = a; |
|
} |
|
return b; |
|
} |
|
|
|
#define mmin(a,b) ((a) < (b)) ? (a) : (b) |
|
|
|
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ |
|
stackHi[sp] = hz; \ |
|
stackD [sp] = dz; \ |
|
sp++; } |
|
|
|
#define mpop(lz,hz,dz) { sp--; \ |
|
lz = stackLo[sp]; \ |
|
hz = stackHi[sp]; \ |
|
dz = stackD [sp]; } |
|
|
|
|
|
#define mnextsize(az) (nextHi[az]-nextLo[az]) |
|
|
|
#define mnextswap(az,bz) \ |
|
{ Int32 tz; \ |
|
tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ |
|
tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ |
|
tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } |
|
|
|
|
|
#define MAIN_QSORT_SMALL_THRESH 20 |
|
#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) |
|
#define MAIN_QSORT_STACK_SIZE 100 |
|
|
|
static |
|
void mainQSort3 ( UInt32* ptr, |
|
UChar* block, |
|
UInt16* quadrant, |
|
Int32 nblock, |
|
Int32 loSt, |
|
Int32 hiSt, |
|
Int32 dSt, |
|
Int32* budget ) |
|
{ |
|
Int32 unLo, unHi, ltLo, gtHi, n, m, med; |
|
Int32 sp, lo, hi, d; |
|
|
|
Int32 stackLo[MAIN_QSORT_STACK_SIZE]; |
|
Int32 stackHi[MAIN_QSORT_STACK_SIZE]; |
|
Int32 stackD [MAIN_QSORT_STACK_SIZE]; |
|
|
|
Int32 nextLo[3]; |
|
Int32 nextHi[3]; |
|
Int32 nextD [3]; |
|
|
|
sp = 0; |
|
mpush ( loSt, hiSt, dSt ); |
|
|
|
while (sp > 0) { |
|
|
|
AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); |
|
|
|
mpop ( lo, hi, d ); |
|
if (hi - lo < MAIN_QSORT_SMALL_THRESH || |
|
d > MAIN_QSORT_DEPTH_THRESH) { |
|
mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); |
|
if (*budget < 0) return; |
|
continue; |
|
} |
|
|
|
med = (Int32) |
|
mmed3 ( block[ptr[ lo ]+d], |
|
block[ptr[ hi ]+d], |
|
block[ptr[ (lo+hi)>>1 ]+d] ); |
|
|
|
unLo = ltLo = lo; |
|
unHi = gtHi = hi; |
|
|
|
while (True) { |
|
while (True) { |
|
if (unLo > unHi) break; |
|
n = ((Int32)block[ptr[unLo]+d]) - med; |
|
if (n == 0) { |
|
mswap(ptr[unLo], ptr[ltLo]); |
|
ltLo++; unLo++; continue; |
|
}; |
|
if (n > 0) break; |
|
unLo++; |
|
} |
|
while (True) { |
|
if (unLo > unHi) break; |
|
n = ((Int32)block[ptr[unHi]+d]) - med; |
|
if (n == 0) { |
|
mswap(ptr[unHi], ptr[gtHi]); |
|
gtHi--; unHi--; continue; |
|
}; |
|
if (n < 0) break; |
|
unHi--; |
|
} |
|
if (unLo > unHi) break; |
|
mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; |
|
} |
|
|
|
AssertD ( unHi == unLo-1, "mainQSort3(2)" ); |
|
|
|
if (gtHi < ltLo) { |
|
mpush(lo, hi, d+1 ); |
|
continue; |
|
} |
|
|
|
n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); |
|
m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); |
|
|
|
n = lo + unLo - ltLo - 1; |
|
m = hi - (gtHi - unHi) + 1; |
|
|
|
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; |
|
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; |
|
nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; |
|
|
|
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); |
|
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); |
|
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); |
|
|
|
AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); |
|
AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); |
|
|
|
mpush (nextLo[0], nextHi[0], nextD[0]); |
|
mpush (nextLo[1], nextHi[1], nextD[1]); |
|
mpush (nextLo[2], nextHi[2], nextD[2]); |
|
} |
|
} |
|
|
|
#undef mswap |
|
#undef mvswap |
|
#undef mpush |
|
#undef mpop |
|
#undef mmin |
|
#undef mnextsize |
|
#undef mnextswap |
|
#undef MAIN_QSORT_SMALL_THRESH |
|
#undef MAIN_QSORT_DEPTH_THRESH |
|
#undef MAIN_QSORT_STACK_SIZE |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/* Pre: |
|
nblock > N_OVERSHOOT |
|
block32 exists for [0 .. nblock-1 +N_OVERSHOOT] |
|
((UChar*)block32) [0 .. nblock-1] holds block |
|
ptr exists for [0 .. nblock-1] |
|
|
|
Post: |
|
((UChar*)block32) [0 .. nblock-1] holds block |
|
All other areas of block32 destroyed |
|
ftab [0 .. 65536 ] destroyed |
|
ptr [0 .. nblock-1] holds sorted order |
|
if (*budget < 0), sorting was abandoned |
|
*/ |
|
|
|
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) |
|
#define SETMASK (1 << 21) |
|
#define CLEARMASK (~(SETMASK)) |
|
|
|
static |
|
void mainSort ( UInt32* ptr, |
|
UChar* block, |
|
UInt16* quadrant, |
|
UInt32* ftab, |
|
Int32 nblock, |
|
Int32 verb, |
|
Int32* budget ) |
|
{ |
|
Int32 i, j, k, ss, sb; |
|
Int32 runningOrder[256]; |
|
Bool bigDone[256]; |
|
Int32 copyStart[256]; |
|
Int32 copyEnd [256]; |
|
UChar c1; |
|
Int32 numQSorted; |
|
UInt16 s; |
|
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); |
|
|
|
/*-- set up the 2-byte frequency table --*/ |
|
for (i = 65536; i >= 0; i--) ftab[i] = 0; |
|
|
|
j = block[0] << 8; |
|
i = nblock-1; |
|
for (; i >= 3; i -= 4) { |
|
quadrant[i] = 0; |
|
j = (j >> 8) | ( ((UInt16)block[i]) << 8); |
|
ftab[j]++; |
|
quadrant[i-1] = 0; |
|
j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); |
|
ftab[j]++; |
|
quadrant[i-2] = 0; |
|
j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); |
|
ftab[j]++; |
|
quadrant[i-3] = 0; |
|
j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); |
|
ftab[j]++; |
|
} |
|
for (; i >= 0; i--) { |
|
quadrant[i] = 0; |
|
j = (j >> 8) | ( ((UInt16)block[i]) << 8); |
|
ftab[j]++; |
|
} |
|
|
|
/*-- (emphasises close relationship of block & quadrant) --*/ |
|
for (i = 0; i < BZ_N_OVERSHOOT; i++) { |
|
block [nblock+i] = block[i]; |
|
quadrant[nblock+i] = 0; |
|
} |
|
|
|
if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); |
|
|
|
/*-- Complete the initial radix sort --*/ |
|
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; |
|
|
|
s = block[0] << 8; |
|
i = nblock-1; |
|
for (; i >= 3; i -= 4) { |
|
s = (s >> 8) | (block[i] << 8); |
|
j = ftab[s] -1; |
|
ftab[s] = j; |
|
ptr[j] = i; |
|
s = (s >> 8) | (block[i-1] << 8); |
|
j = ftab[s] -1; |
|
ftab[s] = j; |
|
ptr[j] = i-1; |
|
s = (s >> 8) | (block[i-2] << 8); |
|
j = ftab[s] -1; |
|
ftab[s] = j; |
|
ptr[j] = i-2; |
|
s = (s >> 8) | (block[i-3] << 8); |
|
j = ftab[s] -1; |
|
ftab[s] = j; |
|
ptr[j] = i-3; |
|
} |
|
for (; i >= 0; i--) { |
|
s = (s >> 8) | (block[i] << 8); |
|
j = ftab[s] -1; |
|
ftab[s] = j; |
|
ptr[j] = i; |
|
} |
|
|
|
/*-- |
|
Now ftab contains the first loc of every small bucket. |
|
Calculate the running order, from smallest to largest |
|
big bucket. |
|
--*/ |
|
for (i = 0; i <= 255; i++) { |
|
bigDone [i] = False; |
|
runningOrder[i] = i; |
|
} |
|
|
|
{ |
|
Int32 vv; |
|
Int32 h = 1; |
|
do h = 3 * h + 1; while (h <= 256); |
|
do { |
|
h = h / 3; |
|
for (i = h; i <= 255; i++) { |
|
vv = runningOrder[i]; |
|
j = i; |
|
while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { |
|
runningOrder[j] = runningOrder[j-h]; |
|
j = j - h; |
|
if (j <= (h - 1)) goto zero; |
|
} |
|
zero: |
|
runningOrder[j] = vv; |
|
} |
|
} while (h != 1); |
|
} |
|
|
|
/*-- |
|
The main sorting loop. |
|
--*/ |
|
|
|
numQSorted = 0; |
|
|
|
for (i = 0; i <= 255; i++) { |
|
|
|
/*-- |
|
Process big buckets, starting with the least full. |
|
Basically this is a 3-step process in which we call |
|
mainQSort3 to sort the small buckets [ss, j], but |
|
also make a big effort to avoid the calls if we can. |
|
--*/ |
|
ss = runningOrder[i]; |
|
|
|
/*-- |
|
Step 1: |
|
Complete the big bucket [ss] by quicksorting |
|
any unsorted small buckets [ss, j], for j != ss. |
|
Hopefully previous pointer-scanning phases have already |
|
completed many of the small buckets [ss, j], so |
|
we don't have to sort them at all. |
|
--*/ |
|
for (j = 0; j <= 255; j++) { |
|
if (j != ss) { |
|
sb = (ss << 8) + j; |
|
if ( ! (ftab[sb] & SETMASK) ) { |
|
Int32 lo = ftab[sb] & CLEARMASK; |
|
Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; |
|
if (hi > lo) { |
|
if (verb >= 4) |
|
VPrintf4 ( " qsort [0x%x, 0x%x] " |
|
"done %d this %d\n", |
|
ss, j, numQSorted, hi - lo + 1 ); |
|
mainQSort3 ( |
|
ptr, block, quadrant, nblock, |
|
lo, hi, BZ_N_RADIX, budget |
|
); |
|
numQSorted += (hi - lo + 1); |
|
if (*budget < 0) return; |
|
} |
|
} |
|
ftab[sb] |= SETMASK; |
|
} |
|
} |
|
|
|
AssertH ( !bigDone[ss], 1006 ); |
|
|
|
/*-- |
|
Step 2: |
|
Now scan this big bucket [ss] so as to synthesise the |
|
sorted order for small buckets [t, ss] for all t, |
|
including, magically, the bucket [ss,ss] too. |
|
This will avoid doing Real Work in subsequent Step 1's. |
|
--*/ |
|
{ |
|
for (j = 0; j <= 255; j++) { |
|
copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; |
|
copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; |
|
} |
|
for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { |
|
k = ptr[j]-1; if (k < 0) k += nblock; |
|
c1 = block[k]; |
|
if (!bigDone[c1]) |
|
ptr[ copyStart[c1]++ ] = k; |
|
} |
|
for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { |
|
k = ptr[j]-1; if (k < 0) k += nblock; |
|
c1 = block[k]; |
|
if (!bigDone[c1]) |
|
ptr[ copyEnd[c1]-- ] = k; |
|
} |
|
} |
|
|
|
AssertH ( (copyStart[ss]-1 == copyEnd[ss]) |
|
|| |
|
/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. |
|
Necessity for this case is demonstrated by compressing |
|
a sequence of approximately 48.5 million of character |
|
251; 1.0.0/1.0.1 will then die here. */ |
|
(copyStart[ss] == 0 && copyEnd[ss] == nblock-1), |
|
1007 ) |
|
|
|
for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; |
|
|
|
/*-- |
|
Step 3: |
|
The [ss] big bucket is now done. Record this fact, |
|
and update the quadrant descriptors. Remember to |
|
update quadrants in the overshoot area too, if |
|
necessary. The "if (i < 255)" test merely skips |
|
this updating for the last bucket processed, since |
|
updating for the last bucket is pointless. |
|
|
|
The quadrant array provides a way to incrementally |
|
cache sort orderings, as they appear, so as to |
|
make subsequent comparisons in fullGtU() complete |
|
faster. For repetitive blocks this makes a big |
|
difference (but not big enough to be able to avoid |
|
the fallback sorting mechanism, exponential radix sort). |
|
|
|
The precise meaning is: at all times: |
|
|
|
for 0 <= i < nblock and 0 <= j <= nblock |
|
|
|
if block[i] != block[j], |
|
|
|
then the relative values of quadrant[i] and |
|
quadrant[j] are meaningless. |
|
|
|
else { |
|
if quadrant[i] < quadrant[j] |
|
then the string starting at i lexicographically |
|
precedes the string starting at j |
|
|
|
else if quadrant[i] > quadrant[j] |
|
then the string starting at j lexicographically |
|
precedes the string starting at i |
|
|
|
else |
|
the relative ordering of the strings starting |
|
at i and j has not yet been determined. |
|
} |
|
--*/ |
|
bigDone[ss] = True; |
|
|
|
if (i < 255) { |
|
Int32 bbStart = ftab[ss << 8] & CLEARMASK; |
|
Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; |
|
Int32 shifts = 0; |
|
|
|
while ((bbSize >> shifts) > 65534) shifts++; |
|
|
|
for (j = bbSize-1; j >= 0; j--) { |
|
Int32 a2update = ptr[bbStart + j]; |
|
UInt16 qVal = (UInt16)(j >> shifts); |
|
quadrant[a2update] = qVal; |
|
if (a2update < BZ_N_OVERSHOOT) |
|
quadrant[a2update + nblock] = qVal; |
|
} |
|
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); |
|
} |
|
|
|
} |
|
|
|
if (verb >= 4) |
|
VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", |
|
nblock, numQSorted, nblock - numQSorted ); |
|
} |
|
|
|
#undef BIGFREQ |
|
#undef SETMASK |
|
#undef CLEARMASK |
|
|
|
|
|
/*---------------------------------------------*/ |
|
/* Pre: |
|
nblock > 0 |
|
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] |
|
((UChar*)arr2) [0 .. nblock-1] holds block |
|
arr1 exists for [0 .. nblock-1] |
|
|
|
Post: |
|
((UChar*)arr2) [0 .. nblock-1] holds block |
|
All other areas of block destroyed |
|
ftab [ 0 .. 65536 ] destroyed |
|
arr1 [0 .. nblock-1] holds sorted order |
|
*/ |
|
void BZ2_blockSort ( EState* s ) |
|
{ |
|
UInt32* ptr = s->ptr; |
|
UChar* block = s->block; |
|
UInt32* ftab = s->ftab; |
|
Int32 nblock = s->nblock; |
|
Int32 verb = s->verbosity; |
|
Int32 wfact = s->workFactor; |
|
UInt16* quadrant; |
|
Int32 budget; |
|
Int32 budgetInit; |
|
Int32 i; |
|
|
|
if (nblock < 10000) { |
|
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); |
|
} else { |
|
/* Calculate the location for quadrant, remembering to get |
|
the alignment right. Assumes that &(block[0]) is at least |
|
2-byte aligned -- this should be ok since block is really |
|
the first section of arr2. |
|
*/ |
|
i = nblock+BZ_N_OVERSHOOT; |
|
if (i & 1) i++; |
|
quadrant = (UInt16*)(&(block[i])); |
|
|
|
/* (wfact-1) / 3 puts the default-factor-30 |
|
transition point at very roughly the same place as |
|
with v0.1 and v0.9.0. |
|
Not that it particularly matters any more, since the |
|
resulting compressed stream is now the same regardless |
|
of whether or not we use the main sort or fallback sort. |
|
*/ |
|
if (wfact < 1 ) wfact = 1; |
|
if (wfact > 100) wfact = 100; |
|
budgetInit = nblock * ((wfact-1) / 3); |
|
budget = budgetInit; |
|
|
|
mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); |
|
if (verb >= 3) |
|
VPrintf3 ( " %d work, %d block, ratio %5.2f\n", |
|
budgetInit - budget, |
|
nblock, |
|
(float)(budgetInit - budget) / |
|
(float)(nblock==0 ? 1 : nblock) ); |
|
if (budget < 0) { |
|
if (verb >= 2) |
|
VPrintf0 ( " too repetitive; using fallback" |
|
" sorting algorithm\n" ); |
|
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); |
|
} |
|
} |
|
|
|
s->origPtr = -1; |
|
for (i = 0; i < s->nblock; i++) |
|
if (ptr[i] == 0) |
|
{ s->origPtr = i; break; }; |
|
|
|
AssertH( s->origPtr != -1, 1003 ); |
|
} |
|
|
|
|
|
/*-------------------------------------------------------------*/ |
|
/*--- end blocksort.c ---*/ |
|
/*-------------------------------------------------------------*/
|
|
|