1 /*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7 /*-------------------------------------------------------------*/
8 /*--- Block sorting machinery ---*/
9 /*--- blocksort.c ---*/
10 /*-------------------------------------------------------------*/
11
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
15
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
21
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
25
26 /* #include "bzlib_private.h" */
27
28 #define mswap(zz1, zz2) \
29 { \
30 int32_t zztmp = zz1; \
31 zz1 = zz2; \
32 zz2 = zztmp; \
33 }
34
35 static
36 /* No measurable speed gain with inlining */
37 /* ALWAYS_INLINE */
mvswap(uint32_t * ptr,int32_t zzp1,int32_t zzp2,int32_t zzn)38 void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
39 {
40 while (zzn > 0) {
41 mswap(ptr[zzp1], ptr[zzp2]);
42 zzp1++;
43 zzp2++;
44 zzn--;
45 }
46 }
47
48 static
49 ALWAYS_INLINE
mmin(int32_t a,int32_t b)50 int32_t mmin(int32_t a, int32_t b)
51 {
52 return (a < b) ? a : b;
53 }
54
55
56 /*---------------------------------------------*/
57 /*--- Fallback O(N log(N)^2) sorting ---*/
58 /*--- algorithm, for repetitive blocks ---*/
59 /*---------------------------------------------*/
60
61 /*---------------------------------------------*/
62 static
63 inline
fallbackSimpleSort(uint32_t * fmap,uint32_t * eclass,int32_t lo,int32_t hi)64 void fallbackSimpleSort(uint32_t* fmap,
65 uint32_t* eclass,
66 int32_t lo,
67 int32_t hi)
68 {
69 int32_t i, j, tmp;
70 uint32_t ec_tmp;
71
72 if (lo == hi) return;
73
74 if (hi - lo > 3) {
75 for (i = hi-4; i >= lo; i--) {
76 tmp = fmap[i];
77 ec_tmp = eclass[tmp];
78 for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79 fmap[j-4] = fmap[j];
80 fmap[j-4] = tmp;
81 }
82 }
83
84 for (i = hi-1; i >= lo; i--) {
85 tmp = fmap[i];
86 ec_tmp = eclass[tmp];
87 for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88 fmap[j-1] = fmap[j];
89 fmap[j-1] = tmp;
90 }
91 }
92
93
94 /*---------------------------------------------*/
95 #define fpush(lz,hz) { \
96 stackLo[sp] = lz; \
97 stackHi[sp] = hz; \
98 sp++; \
99 }
100
101 #define fpop(lz,hz) { \
102 sp--; \
103 lz = stackLo[sp]; \
104 hz = stackHi[sp]; \
105 }
106
107 #define FALLBACK_QSORT_SMALL_THRESH 10
108 #define FALLBACK_QSORT_STACK_SIZE 100
109
110 static NOINLINE
fallbackQSort3(uint32_t * fmap,uint32_t * eclass,int32_t loSt,int32_t hiSt)111 void fallbackQSort3(uint32_t* fmap,
112 uint32_t* eclass,
113 int32_t loSt,
114 int32_t hiSt)
115 {
116 int32_t sp;
117 uint32_t r;
118 int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
119 int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
120
121 r = 0;
122
123 sp = 0;
124 fpush(loSt, hiSt);
125
126 while (sp > 0) {
127 int32_t unLo, unHi, ltLo, gtHi, n, m;
128 int32_t lo, hi;
129 uint32_t med;
130 uint32_t r3;
131
132 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
133
134 fpop(lo, hi);
135 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
136 fallbackSimpleSort(fmap, eclass, lo, hi);
137 continue;
138 }
139
140 /* Random partitioning. Median of 3 sometimes fails to
141 * avoid bad cases. Median of 9 seems to help but
142 * looks rather expensive. This too seems to work but
143 * is cheaper. Guidance for the magic constants
144 * 7621 and 32768 is taken from Sedgewick's algorithms
145 * book, chapter 35.
146 */
147 r = ((r * 7621) + 1) % 32768;
148 r3 = r % 3;
149 if (r3 == 0)
150 med = eclass[fmap[lo]];
151 else if (r3 == 1)
152 med = eclass[fmap[(lo+hi)>>1]];
153 else
154 med = eclass[fmap[hi]];
155
156 unLo = ltLo = lo;
157 unHi = gtHi = hi;
158
159 while (1) {
160 while (1) {
161 if (unLo > unHi) break;
162 n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
163 if (n == 0) {
164 mswap(fmap[unLo], fmap[ltLo]);
165 ltLo++;
166 unLo++;
167 continue;
168 }
169 if (n > 0) break;
170 unLo++;
171 }
172 while (1) {
173 if (unLo > unHi) break;
174 n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
175 if (n == 0) {
176 mswap(fmap[unHi], fmap[gtHi]);
177 gtHi--; unHi--;
178 continue;
179 }
180 if (n < 0) break;
181 unHi--;
182 }
183 if (unLo > unHi) break;
184 mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
185 }
186
187 AssertD(unHi == unLo-1, "fallbackQSort3(2)");
188
189 if (gtHi < ltLo) continue;
190
191 n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
192 m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
193
194 n = lo + unLo - ltLo - 1;
195 m = hi - (gtHi - unHi) + 1;
196
197 if (n - lo > hi - m) {
198 fpush(lo, n);
199 fpush(m, hi);
200 } else {
201 fpush(m, hi);
202 fpush(lo, n);
203 }
204 }
205 }
206
207 #undef fpush
208 #undef fpop
209 #undef FALLBACK_QSORT_SMALL_THRESH
210 #undef FALLBACK_QSORT_STACK_SIZE
211
212
213 /*---------------------------------------------*/
214 /* Pre:
215 * nblock > 0
216 * eclass exists for [0 .. nblock-1]
217 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
218 * ptr exists for [0 .. nblock-1]
219 *
220 * Post:
221 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
222 * All other areas of eclass destroyed
223 * fmap [0 .. nblock-1] holds sorted order
224 * bhtab[0 .. 2+(nblock/32)] destroyed
225 */
226
227 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
228 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
229 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
230 #define WORD_BH(zz) bhtab[(zz) >> 5]
231 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
232
233 static
fallbackSort(EState * state)234 void fallbackSort(EState* state)
235 {
236 int32_t ftab[257];
237 int32_t ftabCopy[256];
238 int32_t H, i, j, k, l, r, cc, cc1;
239 int32_t nNotDone;
240 int32_t nBhtab;
241 /* params */
242 uint32_t *const fmap = state->arr1;
243 uint32_t *const eclass = state->arr2;
244 #define eclass8 ((uint8_t*)eclass)
245 uint32_t *const bhtab = state->ftab;
246 const int32_t nblock = state->nblock;
247
248 /*
249 * Initial 1-char radix sort to generate
250 * initial fmap and initial BH bits.
251 */
252 for (i = 0; i < 257; i++) ftab[i] = 0;
253 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
254 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
255
256 j = ftab[0]; /* bbox: optimized */
257 for (i = 1; i < 257; i++) {
258 j += ftab[i];
259 ftab[i] = j;
260 }
261
262 for (i = 0; i < nblock; i++) {
263 j = eclass8[i];
264 k = ftab[j] - 1;
265 ftab[j] = k;
266 fmap[k] = i;
267 }
268
269 nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
270 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
271 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
272
273 /*
274 * Inductively refine the buckets. Kind-of an
275 * "exponential radix sort" (!), inspired by the
276 * Manber-Myers suffix array construction algorithm.
277 */
278
279 /*-- set sentinel bits for block-end detection --*/
280 for (i = 0; i < 32; i++) {
281 SET_BH(nblock + 2*i);
282 CLEAR_BH(nblock + 2*i + 1);
283 }
284
285 /*-- the log(N) loop --*/
286 H = 1;
287 while (1) {
288 j = 0;
289 for (i = 0; i < nblock; i++) {
290 if (ISSET_BH(i))
291 j = i;
292 k = fmap[i] - H;
293 if (k < 0)
294 k += nblock;
295 eclass[k] = j;
296 }
297
298 nNotDone = 0;
299 r = -1;
300 while (1) {
301
302 /*-- find the next non-singleton bucket --*/
303 k = r + 1;
304 while (ISSET_BH(k) && UNALIGNED_BH(k))
305 k++;
306 if (ISSET_BH(k)) {
307 while (WORD_BH(k) == 0xffffffff) k += 32;
308 while (ISSET_BH(k)) k++;
309 }
310 l = k - 1;
311 if (l >= nblock)
312 break;
313 while (!ISSET_BH(k) && UNALIGNED_BH(k))
314 k++;
315 if (!ISSET_BH(k)) {
316 while (WORD_BH(k) == 0x00000000) k += 32;
317 while (!ISSET_BH(k)) k++;
318 }
319 r = k - 1;
320 if (r >= nblock)
321 break;
322
323 /*-- now [l, r] bracket current bucket --*/
324 if (r > l) {
325 nNotDone += (r - l + 1);
326 fallbackQSort3(fmap, eclass, l, r);
327
328 /*-- scan bucket and generate header bits-- */
329 cc = -1;
330 for (i = l; i <= r; i++) {
331 cc1 = eclass[fmap[i]];
332 if (cc != cc1) {
333 SET_BH(i);
334 cc = cc1;
335 }
336 }
337 }
338 }
339
340 H *= 2;
341 if (H > nblock || nNotDone == 0)
342 break;
343 }
344
345 /*
346 * Reconstruct the original block in
347 * eclass8 [0 .. nblock-1], since the
348 * previous phase destroyed it.
349 */
350 j = 0;
351 for (i = 0; i < nblock; i++) {
352 while (ftabCopy[j] == 0)
353 j++;
354 ftabCopy[j]--;
355 eclass8[fmap[i]] = (uint8_t)j;
356 }
357 AssertH(j < 256, 1005);
358 #undef eclass8
359 }
360
361 #undef SET_BH
362 #undef CLEAR_BH
363 #undef ISSET_BH
364 #undef WORD_BH
365 #undef UNALIGNED_BH
366
367
368 /*---------------------------------------------*/
369 /*--- The main, O(N^2 log(N)) sorting ---*/
370 /*--- algorithm. Faster for "normal" ---*/
371 /*--- non-repetitive blocks. ---*/
372 /*---------------------------------------------*/
373
374 /*---------------------------------------------*/
375 static
376 NOINLINE
mainGtU(EState * state,uint32_t i1,uint32_t i2)377 int mainGtU(EState* state,
378 uint32_t i1,
379 uint32_t i2)
380 {
381 int32_t k;
382 uint8_t c1, c2;
383 uint16_t s1, s2;
384
385 uint8_t *const block = state->block;
386 uint16_t *const quadrant = state->quadrant;
387 const int32_t nblock = state->nblock;
388
389 /* Loop unrolling here is actually very useful
390 * (generated code is much simpler),
391 * code size increase is only 270 bytes (i386)
392 * but speeds up compression 10% overall
393 */
394
395 #if BZIP2_SPEED >= 1
396
397 #define TIMES_8(code) \
398 code; code; code; code; \
399 code; code; code; code;
400 #define TIMES_12(code) \
401 code; code; code; code; \
402 code; code; code; code; \
403 code; code; code; code;
404
405 #else
406
407 #define TIMES_8(code) \
408 { \
409 int nn = 8; \
410 do { \
411 code; \
412 } while (--nn); \
413 }
414 #define TIMES_12(code) \
415 { \
416 int nn = 12; \
417 do { \
418 code; \
419 } while (--nn); \
420 }
421
422 #endif
423
424 AssertD(i1 != i2, "mainGtU");
425 TIMES_12(
426 c1 = block[i1]; c2 = block[i2];
427 if (c1 != c2) return (c1 > c2);
428 i1++; i2++;
429 )
430
431 k = nblock + 8;
432
433 do {
434 TIMES_8(
435 c1 = block[i1]; c2 = block[i2];
436 if (c1 != c2) return (c1 > c2);
437 s1 = quadrant[i1]; s2 = quadrant[i2];
438 if (s1 != s2) return (s1 > s2);
439 i1++; i2++;
440 )
441
442 if (i1 >= nblock) i1 -= nblock;
443 if (i2 >= nblock) i2 -= nblock;
444
445 state->budget--;
446 k -= 8;
447 } while (k >= 0);
448
449 return False;
450 }
451 #undef TIMES_8
452 #undef TIMES_12
453
454 /*---------------------------------------------*/
455 /*
456 * Knuth's increments seem to work better
457 * than Incerpi-Sedgewick here. Possibly
458 * because the number of elems to sort is
459 * usually small, typically <= 20.
460 */
461 static
462 const uint32_t incs[14] ALIGN4 = {
463 1, 4, 13, 40, 121, 364, 1093, 3280,
464 9841, 29524, 88573, 265720,
465 797161, 2391484
466 };
467
468 static
mainSimpleSort(EState * state,int32_t lo,int32_t hi,int32_t d)469 void mainSimpleSort(EState* state,
470 int32_t lo,
471 int32_t hi,
472 int32_t d)
473 {
474 uint32_t *const ptr = state->ptr;
475
476 /* At which increment to start? */
477 int hp = 0;
478 {
479 int bigN = hi - lo;
480 if (bigN <= 0)
481 return;
482 while (incs[hp] <= bigN)
483 hp++;
484 hp--;
485 }
486
487 for (; hp >= 0; hp--) {
488 int32_t i;
489 unsigned h;
490
491 h = incs[hp];
492 i = lo + h;
493 while (1) {
494 unsigned j;
495 unsigned v;
496
497 if (i > hi) break;
498 v = ptr[i];
499 j = i;
500 while (mainGtU(state, ptr[j-h]+d, v+d)) {
501 ptr[j] = ptr[j-h];
502 j = j - h;
503 if (j <= (lo + h - 1)) break;
504 }
505 ptr[j] = v;
506 i++;
507
508 /* 1.5% overall speedup, +290 bytes */
509 #if BZIP2_SPEED >= 3
510 /*-- copy 2 --*/
511 if (i > hi) break;
512 v = ptr[i];
513 j = i;
514 while (mainGtU(state, ptr[j-h]+d, v+d)) {
515 ptr[j] = ptr[j-h];
516 j = j - h;
517 if (j <= (lo + h - 1)) break;
518 }
519 ptr[j] = v;
520 i++;
521 /*-- copy 3 --*/
522 if (i > hi) break;
523 v = ptr[i];
524 j = i;
525 while (mainGtU(state, ptr[j-h]+d, v+d)) {
526 ptr[j] = ptr[j-h];
527 j = j - h;
528 if (j <= (lo + h - 1)) break;
529 }
530 ptr[j] = v;
531 i++;
532 #endif
533 if (state->budget < 0) return;
534 }
535 }
536 }
537
538
539 /*---------------------------------------------*/
540 /*
541 * The following is an implementation of
542 * an elegant 3-way quicksort for strings,
543 * described in a paper "Fast Algorithms for
544 * Sorting and Searching Strings", by Robert
545 * Sedgewick and Jon L. Bentley.
546 */
547
548 static
549 ALWAYS_INLINE
mmed3(uint8_t a,uint8_t b,uint8_t c)550 uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
551 {
552 uint8_t t;
553 if (a > b) {
554 t = a;
555 a = b;
556 b = t;
557 }
558 /* here b >= a */
559 if (b > c) {
560 b = c;
561 if (a > b)
562 b = a;
563 }
564 return b;
565 }
566
567 #define mpush(lz,hz,dz) \
568 { \
569 stackLo[sp] = lz; \
570 stackHi[sp] = hz; \
571 stackD [sp] = dz; \
572 sp++; \
573 }
574
575 #define mpop(lz,hz,dz) \
576 { \
577 sp--; \
578 lz = stackLo[sp]; \
579 hz = stackHi[sp]; \
580 dz = stackD [sp]; \
581 }
582
583 #define mnextsize(az) (nextHi[az] - nextLo[az])
584
585 #define mnextswap(az,bz) \
586 { \
587 int32_t tz; \
588 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
589 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
590 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
591 }
592
593 #define MAIN_QSORT_SMALL_THRESH 20
594 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
595 #define MAIN_QSORT_STACK_SIZE 100
596
597 static NOINLINE
mainQSort3(EState * state,int32_t loSt,int32_t hiSt)598 void mainQSort3(EState* state,
599 int32_t loSt,
600 int32_t hiSt
601 /*int32_t dSt*/)
602 {
603 enum { dSt = BZ_N_RADIX };
604 int32_t unLo, unHi, ltLo, gtHi, n, m, med;
605 int32_t sp, lo, hi, d;
606
607 int32_t stackLo[MAIN_QSORT_STACK_SIZE];
608 int32_t stackHi[MAIN_QSORT_STACK_SIZE];
609 int32_t stackD [MAIN_QSORT_STACK_SIZE];
610
611 int32_t nextLo[3];
612 int32_t nextHi[3];
613 int32_t nextD [3];
614
615 uint32_t *const ptr = state->ptr;
616 uint8_t *const block = state->block;
617
618 sp = 0;
619 mpush(loSt, hiSt, dSt);
620
621 while (sp > 0) {
622 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
623
624 mpop(lo, hi, d);
625 if (hi - lo < MAIN_QSORT_SMALL_THRESH
626 || d > MAIN_QSORT_DEPTH_THRESH
627 ) {
628 mainSimpleSort(state, lo, hi, d);
629 if (state->budget < 0)
630 return;
631 continue;
632 }
633 med = (int32_t) mmed3(block[ptr[lo ] + d],
634 block[ptr[hi ] + d],
635 block[ptr[(lo+hi) >> 1] + d]);
636
637 unLo = ltLo = lo;
638 unHi = gtHi = hi;
639
640 while (1) {
641 while (1) {
642 if (unLo > unHi)
643 break;
644 n = ((int32_t)block[ptr[unLo]+d]) - med;
645 if (n == 0) {
646 mswap(ptr[unLo], ptr[ltLo]);
647 ltLo++;
648 unLo++;
649 continue;
650 }
651 if (n > 0) break;
652 unLo++;
653 }
654 while (1) {
655 if (unLo > unHi)
656 break;
657 n = ((int32_t)block[ptr[unHi]+d]) - med;
658 if (n == 0) {
659 mswap(ptr[unHi], ptr[gtHi]);
660 gtHi--;
661 unHi--;
662 continue;
663 }
664 if (n < 0) break;
665 unHi--;
666 }
667 if (unLo > unHi)
668 break;
669 mswap(ptr[unLo], ptr[unHi]);
670 unLo++;
671 unHi--;
672 }
673
674 AssertD(unHi == unLo-1, "mainQSort3(2)");
675
676 if (gtHi < ltLo) {
677 mpush(lo, hi, d + 1);
678 continue;
679 }
680
681 n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
682 m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
683
684 n = lo + unLo - ltLo - 1;
685 m = hi - (gtHi - unHi) + 1;
686
687 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
688 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
689 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
690
691 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
692 if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
693 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
694
695 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
696 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
697
698 mpush(nextLo[0], nextHi[0], nextD[0]);
699 mpush(nextLo[1], nextHi[1], nextD[1]);
700 mpush(nextLo[2], nextHi[2], nextD[2]);
701 }
702 }
703
704 #undef mpush
705 #undef mpop
706 #undef mnextsize
707 #undef mnextswap
708 #undef MAIN_QSORT_SMALL_THRESH
709 #undef MAIN_QSORT_DEPTH_THRESH
710 #undef MAIN_QSORT_STACK_SIZE
711
712
713 /*---------------------------------------------*/
714 /* Pre:
715 * nblock > N_OVERSHOOT
716 * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
717 * ((uint8_t*)block32) [0 .. nblock-1] holds block
718 * ptr exists for [0 .. nblock-1]
719 *
720 * Post:
721 * ((uint8_t*)block32) [0 .. nblock-1] holds block
722 * All other areas of block32 destroyed
723 * ftab[0 .. 65536] destroyed
724 * ptr [0 .. nblock-1] holds sorted order
725 * if (*budget < 0), sorting was abandoned
726 */
727
728 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
729 #define SETMASK (1 << 21)
730 #define CLEARMASK (~(SETMASK))
731
732 static NOINLINE
mainSort(EState * state)733 void mainSort(EState* state)
734 {
735 int32_t i, j;
736 Bool bigDone[256];
737 uint8_t runningOrder[256];
738 /* bbox: moved to EState to save stack
739 int32_t copyStart[256];
740 int32_t copyEnd [256];
741 */
742 #define copyStart (state->mainSort__copyStart)
743 #define copyEnd (state->mainSort__copyEnd)
744
745 uint32_t *const ptr = state->ptr;
746 uint8_t *const block = state->block;
747 uint32_t *const ftab = state->ftab;
748 const int32_t nblock = state->nblock;
749 uint16_t *const quadrant = state->quadrant;
750
751 /*-- set up the 2-byte frequency table --*/
752 /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
753 memset(ftab, 0, 65537 * sizeof(ftab[0]));
754
755 j = block[0] << 8;
756 i = nblock - 1;
757 /* 3%, +300 bytes */
758 #if BZIP2_SPEED >= 2
759 for (; i >= 3; i -= 4) {
760 quadrant[i] = 0;
761 j = (j >> 8) | (((unsigned)block[i]) << 8);
762 ftab[j]++;
763 quadrant[i-1] = 0;
764 j = (j >> 8) | (((unsigned)block[i-1]) << 8);
765 ftab[j]++;
766 quadrant[i-2] = 0;
767 j = (j >> 8) | (((unsigned)block[i-2]) << 8);
768 ftab[j]++;
769 quadrant[i-3] = 0;
770 j = (j >> 8) | (((unsigned)block[i-3]) << 8);
771 ftab[j]++;
772 }
773 #endif
774 for (; i >= 0; i--) {
775 quadrant[i] = 0;
776 j = (j >> 8) | (((unsigned)block[i]) << 8);
777 ftab[j]++;
778 }
779
780 /*-- (emphasises close relationship of block & quadrant) --*/
781 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
782 block [nblock+i] = block[i];
783 quadrant[nblock+i] = 0;
784 }
785
786 /*-- Complete the initial radix sort --*/
787 j = ftab[0]; /* bbox: optimized */
788 for (i = 1; i <= 65536; i++) {
789 j += ftab[i];
790 ftab[i] = j;
791 }
792
793 {
794 unsigned s;
795 s = block[0] << 8;
796 i = nblock - 1;
797 #if BZIP2_SPEED >= 2
798 for (; i >= 3; i -= 4) {
799 s = (s >> 8) | (block[i] << 8);
800 j = ftab[s] - 1;
801 ftab[s] = j;
802 ptr[j] = i;
803 s = (s >> 8) | (block[i-1] << 8);
804 j = ftab[s] - 1;
805 ftab[s] = j;
806 ptr[j] = i-1;
807 s = (s >> 8) | (block[i-2] << 8);
808 j = ftab[s] - 1;
809 ftab[s] = j;
810 ptr[j] = i-2;
811 s = (s >> 8) | (block[i-3] << 8);
812 j = ftab[s] - 1;
813 ftab[s] = j;
814 ptr[j] = i-3;
815 }
816 #endif
817 for (; i >= 0; i--) {
818 s = (s >> 8) | (block[i] << 8);
819 j = ftab[s] - 1;
820 ftab[s] = j;
821 ptr[j] = i;
822 }
823 }
824
825 /*
826 * Now ftab contains the first loc of every small bucket.
827 * Calculate the running order, from smallest to largest
828 * big bucket.
829 */
830 for (i = 0; i <= 255; i++) {
831 bigDone [i] = False;
832 runningOrder[i] = i;
833 }
834
835 {
836 /* bbox: was: int32_t h = 1; */
837 /* do h = 3 * h + 1; while (h <= 256); */
838 unsigned h = 364;
839
840 do {
841 /*h = h / 3;*/
842 h = (h * 171) >> 9; /* bbox: fast h/3 */
843 for (i = h; i <= 255; i++) {
844 unsigned vv, jh;
845 vv = runningOrder[i]; /* uint8[] */
846 j = i;
847 while (jh = j - h, BIGFREQ(runningOrder[jh]) > BIGFREQ(vv)) {
848 runningOrder[j] = runningOrder[jh];
849 j = jh;
850 if (j < h)
851 break;
852 }
853 runningOrder[j] = vv;
854 }
855 } while (h != 1);
856 }
857
858 /*
859 * The main sorting loop.
860 */
861
862 for (i = 0; /*i <= 255*/; i++) {
863 unsigned ss;
864
865 /*
866 * Process big buckets, starting with the least full.
867 * Basically this is a 3-step process in which we call
868 * mainQSort3 to sort the small buckets [ss, j], but
869 * also make a big effort to avoid the calls if we can.
870 */
871 ss = runningOrder[i];
872
873 /*
874 * Step 1:
875 * Complete the big bucket [ss] by quicksorting
876 * any unsorted small buckets [ss, j], for j != ss.
877 * Hopefully previous pointer-scanning phases have already
878 * completed many of the small buckets [ss, j], so
879 * we don't have to sort them at all.
880 */
881 for (j = 0; j <= 255; j++) {
882 if (j != ss) {
883 unsigned sb;
884 sb = (ss << 8) + j;
885 if (!(ftab[sb] & SETMASK)) {
886 int32_t lo = ftab[sb] /*& CLEARMASK (redundant)*/;
887 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
888 if (hi > lo) {
889 mainQSort3(state, lo, hi /*,BZ_N_RADIX*/);
890 if (state->budget < 0) return;
891 }
892 }
893 ftab[sb] |= SETMASK;
894 }
895 }
896
897 AssertH(!bigDone[ss], 1006);
898
899 /*
900 * Step 2:
901 * Now scan this big bucket [ss] so as to synthesise the
902 * sorted order for small buckets [t, ss] for all t,
903 * including, magically, the bucket [ss,ss] too.
904 * This will avoid doing Real Work in subsequent Step 1's.
905 */
906 {
907 for (j = 0; j <= 255; j++) {
908 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
909 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
910 }
911 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
912 unsigned c1;
913 int32_t k;
914 k = ptr[j] - 1;
915 if (k < 0)
916 k += nblock;
917 c1 = block[k];
918 if (!bigDone[c1])
919 ptr[copyStart[c1]++] = k;
920 }
921 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
922 unsigned c1;
923 int32_t k;
924 k = ptr[j]-1;
925 if (k < 0)
926 k += nblock;
927 c1 = block[k];
928 if (!bigDone[c1])
929 ptr[copyEnd[c1]--] = k;
930 }
931 }
932
933 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
934 * Necessity for this case is demonstrated by compressing
935 * a sequence of approximately 48.5 million of character
936 * 251; 1.0.0/1.0.1 will then die here. */
937 AssertH((copyStart[ss]-1 == copyEnd[ss]) \
938 || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
939
940 for (j = 0; j <= 255; j++)
941 ftab[(j << 8) + ss] |= SETMASK;
942
943 if (i == 255)
944 break;
945
946 /*
947 * Step 3:
948 * The [ss] big bucket is now done. Record this fact,
949 * and update the quadrant descriptors. Remember to
950 * update quadrants in the overshoot area too, if
951 * necessary. The "if (i < 255)" test merely skips
952 * this updating for the last bucket processed, since
953 * updating for the last bucket is pointless.
954 *
955 * The quadrant array provides a way to incrementally
956 * cache sort orderings, as they appear, so as to
957 * make subsequent comparisons in fullGtU() complete
958 * faster. For repetitive blocks this makes a big
959 * difference (but not big enough to be able to avoid
960 * the fallback sorting mechanism, exponential radix sort).
961 *
962 * The precise meaning is: at all times:
963 *
964 * for 0 <= i < nblock and 0 <= j <= nblock
965 *
966 * if block[i] != block[j],
967 *
968 * then the relative values of quadrant[i] and
969 * quadrant[j] are meaningless.
970 *
971 * else {
972 * if quadrant[i] < quadrant[j]
973 * then the string starting at i lexicographically
974 * precedes the string starting at j
975 *
976 * else if quadrant[i] > quadrant[j]
977 * then the string starting at j lexicographically
978 * precedes the string starting at i
979 *
980 * else
981 * the relative ordering of the strings starting
982 * at i and j has not yet been determined.
983 * }
984 */
985 bigDone[ss] = True;
986
987 {
988 unsigned bbStart = ftab[ss << 8] & CLEARMASK;
989 unsigned bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
990 unsigned shifts = 0;
991
992 while ((bbSize >> shifts) > 65534) shifts++;
993
994 for (j = bbSize-1; j >= 0; j--) {
995 unsigned a2update = ptr[bbStart + j]; /* uint32[] */
996 uint16_t qVal = (uint16_t)(j >> shifts);
997 quadrant[a2update] = qVal;
998 if (a2update < BZ_N_OVERSHOOT)
999 quadrant[a2update + nblock] = qVal;
1000 }
1001 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
1002 }
1003 }
1004 #undef runningOrder
1005 #undef copyStart
1006 #undef copyEnd
1007 }
1008
1009 #undef BIGFREQ
1010 #undef SETMASK
1011 #undef CLEARMASK
1012
1013
1014 /*---------------------------------------------*/
1015 /* Pre:
1016 * nblock > 0
1017 * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1018 * ((uint8_t*)arr2)[0 .. nblock-1] holds block
1019 * arr1 exists for [0 .. nblock-1]
1020 *
1021 * Post:
1022 * ((uint8_t*)arr2) [0 .. nblock-1] holds block
1023 * All other areas of block destroyed
1024 * ftab[0 .. 65536] destroyed
1025 * arr1[0 .. nblock-1] holds sorted order
1026 */
1027 static NOINLINE
BZ2_blockSort(EState * state)1028 int32_t BZ2_blockSort(EState* state)
1029 {
1030 /* In original bzip2 1.0.4, it's a parameter, but 30
1031 * (which was the default) should work ok. */
1032 enum { wfact = 30 };
1033 unsigned i;
1034 int32_t origPtr = origPtr;
1035
1036 if (state->nblock >= 10000) {
1037 /* Calculate the location for quadrant, remembering to get
1038 * the alignment right. Assumes that &(block[0]) is at least
1039 * 2-byte aligned -- this should be ok since block is really
1040 * the first section of arr2.
1041 */
1042 i = state->nblock + BZ_N_OVERSHOOT;
1043 if (i & 1)
1044 i++;
1045 state->quadrant = (uint16_t*) &(state->block[i]);
1046
1047 /* (wfact-1) / 3 puts the default-factor-30
1048 * transition point at very roughly the same place as
1049 * with v0.1 and v0.9.0.
1050 * Not that it particularly matters any more, since the
1051 * resulting compressed stream is now the same regardless
1052 * of whether or not we use the main sort or fallback sort.
1053 */
1054 state->budget = state->nblock * ((wfact-1) / 3);
1055 mainSort(state);
1056 if (state->budget >= 0)
1057 goto good;
1058 }
1059 fallbackSort(state);
1060 good:
1061
1062 #if BZ_LIGHT_DEBUG
1063 origPtr = -1;
1064 #endif
1065 for (i = 0; i < state->nblock; i++) {
1066 if (state->ptr[i] == 0) {
1067 origPtr = i;
1068 break;
1069 }
1070 }
1071
1072 AssertH(origPtr != -1, 1003);
1073 return origPtr;
1074 }
1075
1076
1077 /*-------------------------------------------------------------*/
1078 /*--- end blocksort.c ---*/
1079 /*-------------------------------------------------------------*/
1080