1 /*
2  * bzip2 is written by Julian Seward <jseward@bzip.org>.
3  * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4  * See README and LICENSE files in this directory for more information.
5  */
6 
7 /*-------------------------------------------------------------*/
8 /*--- Compression machinery (not incl block sorting)        ---*/
9 /*---                                            compress.c ---*/
10 /*-------------------------------------------------------------*/
11 
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
15 
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18 
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
21 
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
25 
26 /* CHANGES
27  * 0.9.0    -- original version.
28  * 0.9.0a/b -- no changes in this file.
29  * 0.9.0c   -- changed setting of nGroups in sendMTFValues()
30  *             so as to do a bit better on small files
31 */
32 
33 /* #include "bzlib_private.h" */
34 
35 #if BZIP2_SPEED >= 5
36 # define ALWAYS_INLINE_5 ALWAYS_INLINE
37 #else
38 # define ALWAYS_INLINE_5 /*nothing*/
39 #endif
40 
41 /*---------------------------------------------------*/
42 /*--- Bit stream I/O                              ---*/
43 /*---------------------------------------------------*/
44 
45 /*---------------------------------------------------*/
46 static
BZ2_bsInitWrite(EState * s)47 void BZ2_bsInitWrite(EState* s)
48 {
49 	s->bsLive = 0;
50 	s->bsBuff = 0;
51 }
52 
53 
54 /*---------------------------------------------------*/
55 static NOINLINE
bsFinishWrite(EState * s)56 void bsFinishWrite(EState* s)
57 {
58 	while (s->bsLive > 0) {
59 		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
60 		s->bsBuff <<= 8;
61 		s->bsLive -= 8;
62 	}
63 }
64 
65 
66 /*---------------------------------------------------*/
67 static
68 /* Helps only on level 5, on other levels hurts. ? */
69 ALWAYS_INLINE_5
bsW(EState * s,int32_t n,uint32_t v)70 void bsW(EState* s, int32_t n, uint32_t v)
71 {
72 	while (s->bsLive >= 8) {
73 		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
74 		s->bsBuff <<= 8;
75 		s->bsLive -= 8;
76 	}
77 	s->bsBuff |= (v << (32 - s->bsLive - n));
78 	s->bsLive += n;
79 }
80 /* Same with n == 16: */
81 static
82 ALWAYS_INLINE_5
bsW16(EState * s,uint32_t v)83 void bsW16(EState* s, uint32_t v)
84 {
85 	while (s->bsLive >= 8) {
86 		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
87 		s->bsBuff <<= 8;
88 		s->bsLive -= 8;
89 	}
90 	s->bsBuff |= (v << (16 - s->bsLive));
91 	s->bsLive += 16;
92 }
93 /* Same with n == 1: */
94 static
95 ALWAYS_INLINE /* one callsite */
bsW1_1(EState * s)96 void bsW1_1(EState* s)
97 {
98 	/* need space for only 1 bit, no need for loop freeing > 8 bits */
99 	if (s->bsLive >= 8) {
100 		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
101 		s->bsBuff <<= 8;
102 		s->bsLive -= 8;
103 	}
104 	s->bsBuff |= (1 << (31 - s->bsLive));
105 	s->bsLive += 1;
106 }
107 static
108 ALWAYS_INLINE_5
bsW1_0(EState * s)109 void bsW1_0(EState* s)
110 {
111 	/* need space for only 1 bit, no need for loop freeing > 8 bits */
112 	if (s->bsLive >= 8) {
113 		*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
114 		s->bsBuff <<= 8;
115 		s->bsLive -= 8;
116 	}
117 	//s->bsBuff |= (0 << (31 - s->bsLive));
118 	s->bsLive += 1;
119 }
120 
121 
122 /*---------------------------------------------------*/
123 static ALWAYS_INLINE
bsPutU16(EState * s,unsigned u)124 void bsPutU16(EState* s, unsigned u)
125 {
126 	bsW16(s, u);
127 }
128 
129 
130 /*---------------------------------------------------*/
131 static
bsPutU32(EState * s,unsigned u)132 void bsPutU32(EState* s, unsigned u)
133 {
134 	//bsW(s, 32, u); // can't use: may try "uint32 << -n"
135 	bsW16(s, (u >> 16) & 0xffff);
136 	bsW16(s, u         & 0xffff);
137 }
138 
139 
140 /*---------------------------------------------------*/
141 /*--- The back end proper                         ---*/
142 /*---------------------------------------------------*/
143 
144 /*---------------------------------------------------*/
145 static
makeMaps_e(EState * s)146 void makeMaps_e(EState* s)
147 {
148 	int i;
149 	unsigned cnt = 0;
150 	for (i = 0; i < 256; i++) {
151 		if (s->inUse[i]) {
152 			s->unseqToSeq[i] = cnt;
153 			cnt++;
154 		}
155 	}
156 	s->nInUse = cnt;
157 }
158 
159 
160 /*---------------------------------------------------*/
161 /*
162  * This bit of code is performance-critical.
163  * On 32bit x86, gcc-6.3.0 was observed to spill ryy_j to stack,
164  * resulting in abysmal performance (x3 slowdown).
165  * Forcing it into a separate function alleviates register pressure,
166  * and spillage no longer happens.
167  * Other versions of gcc do not exhibit this problem, but out-of-line code
168  * seems to be helping them too (code is both smaller and faster).
169  * Therefore NOINLINE is enabled for the entire 32bit x86 arch for now,
170  * without a check for gcc version.
171  */
172 static
173 #if defined __i386__
174 NOINLINE
175 #endif
inner_loop(uint8_t * yy,uint8_t ll_i)176 int inner_loop(uint8_t *yy, uint8_t ll_i)
177 {
178 	register uint8_t  rtmp;
179 	register uint8_t* ryy_j;
180 	rtmp  = yy[1];
181 	yy[1] = yy[0];
182 	ryy_j = &(yy[1]);
183 	while (ll_i != rtmp) {
184 		register uint8_t rtmp2;
185 		ryy_j++;
186 		rtmp2  = rtmp;
187 		rtmp   = *ryy_j;
188 		*ryy_j = rtmp2;
189 	}
190 	yy[0] = rtmp;
191 	return ryy_j - &(yy[0]);
192 }
193 static NOINLINE
generateMTFValues(EState * s)194 void generateMTFValues(EState* s)
195 {
196 	uint8_t yy[256];
197 	int i;
198 	int zPend;
199 	int32_t wr;
200 
201 	/*
202 	 * After sorting (eg, here),
203 	 * s->arr1[0 .. s->nblock-1] holds sorted order,
204 	 * and
205 	 * ((uint8_t*)s->arr2)[0 .. s->nblock-1]
206 	 * holds the original block data.
207 	 *
208 	 * The first thing to do is generate the MTF values,
209 	 * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1].
210 	 *
211 	 * Because there are strictly fewer or equal MTF values
212 	 * than block values, ptr values in this area are overwritten
213 	 * with MTF values only when they are no longer needed.
214 	 *
215 	 * The final compressed bitstream is generated into the
216 	 * area starting at &((uint8_t*)s->arr2)[s->nblock]
217 	 *
218 	 * These storage aliases are set up in bzCompressInit(),
219 	 * except for the last one, which is arranged in
220 	 * compressBlock().
221 	 */
222 	uint32_t* ptr   = s->ptr;
223 
224 	makeMaps_e(s);
225 
226 	wr = 0;
227 	zPend = 0;
228 	for (i = 0; i <= s->nInUse+1; i++)
229 		s->mtfFreq[i] = 0;
230 
231 	for (i = 0; i < s->nInUse; i++)
232 		yy[i] = (uint8_t) i;
233 
234 	for (i = 0; i < s->nblock; i++) {
235 		uint8_t ll_i = ll_i; /* gcc 4.3.1 thinks it may be used w/o init */
236 		int32_t j;
237 
238 		AssertD(wr <= i, "generateMTFValues(1)");
239 		j = ptr[i] - 1;
240 		if (j < 0)
241 			j += s->nblock;
242 		ll_i = s->unseqToSeq[s->block[j]];
243 		AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
244 
245 		if (yy[0] == ll_i) {
246 			zPend++;
247 			continue;
248 		}
249 
250 		if (zPend > 0) {
251  process_zPend:
252 			zPend--;
253 			while (1) {
254 #if 0
255 				if (zPend & 1) {
256 					s->mtfv[wr] = BZ_RUNB; wr++;
257 					s->mtfFreq[BZ_RUNB]++;
258 				} else {
259 					s->mtfv[wr] = BZ_RUNA; wr++;
260 					s->mtfFreq[BZ_RUNA]++;
261 				}
262 #else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */
263 				unsigned run = zPend & 1;
264 				s->mtfv[wr] = run;
265 				wr++;
266 				s->mtfFreq[run]++;
267 #endif
268 				zPend -= 2;
269 				if (zPend < 0)
270 					break;
271 				zPend = (unsigned)zPend / 2;
272 				/* bbox: unsigned div is easier */
273 			}
274 			if (i < 0) /* came via "goto process_zPend"? exit */
275 				goto end;
276 			zPend = 0;
277 		}
278 		j = inner_loop(yy, ll_i);
279 		s->mtfv[wr] = j+1;
280 		wr++;
281 		s->mtfFreq[j+1]++;
282 	}
283 
284 	i = -1;
285 	if (zPend > 0)
286 		goto process_zPend; /* "process it and come back here" */
287  end:
288 	s->mtfv[wr] = s->nInUse+1;
289 	wr++;
290 	s->mtfFreq[s->nInUse+1]++;
291 
292 	s->nMTF = wr;
293 }
294 
295 
296 /*---------------------------------------------------*/
297 #define BZ_LESSER_ICOST  0
298 #define BZ_GREATER_ICOST 15
299 
300 static NOINLINE
sendMTFValues(EState * s)301 void sendMTFValues(EState* s)
302 {
303 	int32_t t, i;
304 	unsigned iter;
305 	unsigned gs;
306 	int32_t alphaSize;
307 	unsigned nSelectors, selCtr;
308 	int32_t nGroups;
309 
310 	/*
311 	 * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
312 	 * is a global since the decoder also needs it.
313 	 *
314 	 * int32_t  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
315 	 * int32_t  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
316 	 * are also globals only used in this proc.
317 	 * Made global to keep stack frame size small.
318 	 */
319 #define code     sendMTFValues__code
320 #define rfreq    sendMTFValues__rfreq
321 #define len_pack sendMTFValues__len_pack
322 
323 	unsigned /*uint16_t*/ cost[BZ_N_GROUPS];
324 
325 	uint16_t* mtfv = s->mtfv;
326 
327 	alphaSize = s->nInUse + 2;
328 	for (t = 0; t < BZ_N_GROUPS; t++) {
329 		unsigned v;
330 		for (v = 0; v < alphaSize; v++)
331 			s->len[t][v] = BZ_GREATER_ICOST;
332 	}
333 
334 	/*--- Decide how many coding tables to use ---*/
335 	AssertH(s->nMTF > 0, 3001);
336 	// 1..199 = 2
337 	// 200..599 = 3
338 	// 600..1199 = 4
339 	// 1200..2399 = 5
340 	// 2400..99999 = 6
341 	nGroups = 2;
342 	nGroups += (s->nMTF >= 200);
343 	nGroups += (s->nMTF >= 600);
344 	nGroups += (s->nMTF >= 1200);
345 	nGroups += (s->nMTF >= 2400);
346 
347 	/*--- Generate an initial set of coding tables ---*/
348 	{
349 		unsigned nPart, remF;
350 
351 		nPart = nGroups;
352 		remF  = s->nMTF;
353 		gs = 0;
354 		while (nPart > 0) {
355 			unsigned v;
356 			unsigned ge;
357 			unsigned tFreq, aFreq;
358 
359 			tFreq = remF / nPart;
360 			ge = gs;
361 			aFreq = 0;
362 			while (aFreq < tFreq && ge < alphaSize) {
363 				aFreq += s->mtfFreq[ge++];
364 			}
365 			ge--;
366 
367 			if (ge > gs
368 			 && nPart != nGroups && nPart != 1
369 			 && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
370 			) {
371 				aFreq -= s->mtfFreq[ge];
372 				ge--;
373 			}
374 
375 			for (v = 0; v < alphaSize; v++)
376 				if (v >= gs && v <= ge)
377 					s->len[nPart-1][v] = BZ_LESSER_ICOST;
378 				else
379 					s->len[nPart-1][v] = BZ_GREATER_ICOST;
380 
381 			nPart--;
382 			gs = ge + 1;
383 			remF -= aFreq;
384 		}
385 	}
386 
387 	/*
388 	 * Iterate up to BZ_N_ITERS times to improve the tables.
389 	 */
390 	for (iter = 0; iter < BZ_N_ITERS; iter++) {
391 		for (t = 0; t < nGroups; t++) {
392 			unsigned v;
393 			for (v = 0; v < alphaSize; v++)
394 				s->rfreq[t][v] = 0;
395 		}
396 
397 #if BZIP2_SPEED >= 5
398 		/*
399 		 * Set up an auxiliary length table which is used to fast-track
400 		 * the common case (nGroups == 6).
401 		 */
402 		if (nGroups == 6) {
403 			unsigned v;
404 			for (v = 0; v < alphaSize; v++) {
405 				s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
406 				s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
407 				s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
408 			}
409 		}
410 #endif
411 		nSelectors = 0;
412 		gs = 0;
413 		while (1) {
414 			unsigned ge;
415 			unsigned bt, bc;
416 
417 			/*--- Set group start & end marks. --*/
418 			if (gs >= s->nMTF)
419 				break;
420 			ge = gs + BZ_G_SIZE - 1;
421 			if (ge >= s->nMTF)
422 				ge = s->nMTF-1;
423 
424 			/*
425 			 * Calculate the cost of this group as coded
426 			 * by each of the coding tables.
427 			 */
428 			for (t = 0; t < nGroups; t++)
429 				cost[t] = 0;
430 #if BZIP2_SPEED >= 5
431 			if (nGroups == 6 && 50 == ge-gs+1) {
432 				/*--- fast track the common case ---*/
433 				register uint32_t cost01, cost23, cost45;
434 				register uint16_t icv;
435 				cost01 = cost23 = cost45 = 0;
436 #define BZ_ITER(nn) \
437 	icv = mtfv[gs+(nn)]; \
438 	cost01 += s->len_pack[icv][0]; \
439 	cost23 += s->len_pack[icv][1]; \
440 	cost45 += s->len_pack[icv][2];
441 				BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
442 				BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
443 				BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
444 				BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
445 				BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
446 				BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
447 				BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
448 				BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
449 				BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
450 				BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
451 #undef BZ_ITER
452 				cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
453 				cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
454 				cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
455 			} else
456 #endif
457 			{
458 				/*--- slow version which correctly handles all situations ---*/
459 				for (i = gs; i <= ge; i++) {
460 					unsigned /*uint16_t*/ icv = mtfv[i];
461 					for (t = 0; t < nGroups; t++)
462 						cost[t] += s->len[t][icv];
463 				}
464 			}
465 			/*
466 			 * Find the coding table which is best for this group,
467 			 * and record its identity in the selector table.
468 			 */
469 			/*bc = 999999999;*/
470 			/*bt = -1;*/
471 			bc = cost[0];
472 			bt = 0;
473 			for (t = 1 /*0*/; t < nGroups; t++) {
474 				if (cost[t] < bc) {
475 					bc = cost[t];
476 					bt = t;
477 				}
478 			}
479 			s->selector[nSelectors] = bt;
480 			nSelectors++;
481 
482 			/*
483 			 * Increment the symbol frequencies for the selected table.
484 			 */
485 /* 1% faster compress. +800 bytes */
486 #if BZIP2_SPEED >= 4
487 			if (nGroups == 6 && 50 == ge-gs+1) {
488 				/*--- fast track the common case ---*/
489 #define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
490 				BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
491 				BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
492 				BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
493 				BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
494 				BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
495 				BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
496 				BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
497 				BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
498 				BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
499 				BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
500 #undef BZ_ITUR
501 				gs = ge + 1;
502 			} else
503 #endif
504 			{
505 				/*--- slow version which correctly handles all situations ---*/
506 				while (gs <= ge) {
507 					s->rfreq[bt][mtfv[gs]]++;
508 					gs++;
509 				}
510 				/* already is: gs = ge + 1; */
511 			}
512 		}
513 
514 		/*
515 		 * Recompute the tables based on the accumulated frequencies.
516 		 */
517 		/* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
518 		 * comment in huffman.c for details. */
519 		for (t = 0; t < nGroups; t++)
520 			BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
521 	}
522 
523 	AssertH(nGroups < 8, 3002);
524 	AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003);
525 
526 	/*--- Compute MTF values for the selectors. ---*/
527 	{
528 		uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
529 
530 		for (i = 0; i < nGroups; i++)
531 			pos[i] = i;
532 		for (i = 0; i < nSelectors; i++) {
533 			unsigned j;
534 			ll_i = s->selector[i];
535 			j = 0;
536 			tmp = pos[j];
537 			while (ll_i != tmp) {
538 				j++;
539 				tmp2 = tmp;
540 				tmp = pos[j];
541 				pos[j] = tmp2;
542 			}
543 			pos[0] = tmp;
544 			s->selectorMtf[i] = j;
545 		}
546 	}
547 
548 	/*--- Assign actual codes for the tables. --*/
549 	for (t = 0; t < nGroups; t++) {
550 		unsigned minLen = 32; //todo: s->len[t][0];
551 		unsigned maxLen = 0;  //todo: s->len[t][0];
552 		for (i = 0; i < alphaSize; i++) {
553 			if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
554 			if (s->len[t][i] < minLen) minLen = s->len[t][i];
555 		}
556 		AssertH(!(maxLen > 17 /*20*/), 3004);
557 		AssertH(!(minLen < 1), 3005);
558 		BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize);
559 	}
560 
561 	/*--- Transmit the mapping table. ---*/
562 	{
563 		/* bbox: optimized a bit more than in bzip2 */
564 		int inUse16 = 0;
565 		for (i = 0; i < 16; i++) {
566 			if (sizeof(long) <= 4) {
567 				inUse16 = inUse16*2 +
568 					((*(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 0])
569 					| *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 4])
570 					| *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 8])
571 					| *(bb__aliased_uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
572 			} else { /* Our CPU can do better */
573 				inUse16 = inUse16*2 +
574 					((*(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 0])
575 					| *(bb__aliased_uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
576 			}
577 		}
578 
579 		bsW16(s, inUse16);
580 
581 		inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */
582 		for (i = 0; i < 16; i++) {
583 			if (inUse16 < 0) {
584 				unsigned v16 = 0;
585 				unsigned j;
586 				for (j = 0; j < 16; j++)
587 					v16 = v16*2 + s->inUse[i * 16 + j];
588 				bsW16(s, v16);
589 			}
590 			inUse16 <<= 1;
591 		}
592 	}
593 
594 	/*--- Now the selectors. ---*/
595 	bsW(s, 3, nGroups);
596 	bsW(s, 15, nSelectors);
597 	for (i = 0; i < nSelectors; i++) {
598 		unsigned j;
599 		for (j = 0; j < s->selectorMtf[i]; j++)
600 			bsW1_1(s);
601 		bsW1_0(s);
602 	}
603 
604 	/*--- Now the coding tables. ---*/
605 	for (t = 0; t < nGroups; t++) {
606 		unsigned curr = s->len[t][0];
607 		bsW(s, 5, curr);
608 		for (i = 0; i < alphaSize; i++) {
609 			while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }
610 			while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }
611 			bsW1_0(s);
612 		}
613 	}
614 
615 	/*--- And finally, the block data proper ---*/
616 	selCtr = 0;
617 	gs = 0;
618 	while (1) {
619 		unsigned ge;
620 
621 		if (gs >= s->nMTF)
622 			break;
623 		ge = gs + BZ_G_SIZE - 1;
624 		if (ge >= s->nMTF)
625 			ge = s->nMTF-1;
626 		AssertH(s->selector[selCtr] < nGroups, 3006);
627 
628 /* Costs 1300 bytes and is _slower_ (on Intel Core 2) */
629 #if 0
630 		if (nGroups == 6 && 50 == ge-gs+1) {
631 			/*--- fast track the common case ---*/
632 			uint16_t mtfv_i;
633 			uint8_t* s_len_sel_selCtr  = &(s->len[s->selector[selCtr]][0]);
634 			int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
635 #define BZ_ITAH(nn) \
636 	mtfv_i = mtfv[gs+(nn)]; \
637 	bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i])
638 			BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
639 			BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
640 			BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
641 			BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
642 			BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
643 			BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
644 			BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
645 			BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
646 			BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
647 			BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
648 #undef BZ_ITAH
649 			gs = ge+1;
650 		} else
651 #endif
652 		{
653 			/*--- slow version which correctly handles all situations ---*/
654 			/* code is bit bigger, but moves multiply out of the loop */
655 			uint8_t* s_len_sel_selCtr  = &(s->len [s->selector[selCtr]][0]);
656 			int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
657 			while (gs <= ge) {
658 				bsW(s,
659 					s_len_sel_selCtr[mtfv[gs]],
660 					s_code_sel_selCtr[mtfv[gs]]
661 				);
662 				gs++;
663 			}
664 			/* already is: gs = ge+1; */
665 		}
666 		selCtr++;
667 	}
668 	AssertH(selCtr == nSelectors, 3007);
669 #undef code
670 #undef rfreq
671 #undef len_pack
672 }
673 
674 
675 /*---------------------------------------------------*/
676 static
BZ2_compressBlock(EState * s,int is_last_block)677 void BZ2_compressBlock(EState* s, int is_last_block)
678 {
679 	int32_t origPtr = origPtr;
680 
681 	if (s->nblock > 0) {
682 		BZ_FINALISE_CRC(s->blockCRC);
683 		s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
684 		s->combinedCRC ^= s->blockCRC;
685 		if (s->blockNo > 1)
686 			s->posZ = s->zbits; // was: s->numZ = 0;
687 
688 		origPtr = BZ2_blockSort(s);
689 	}
690 
691 	s->zbits = &((uint8_t*)s->arr2)[s->nblock];
692 	s->posZ = s->zbits;
693 	s->state_out_pos = s->zbits;
694 
695 	/*-- If this is the first block, create the stream header. --*/
696 	if (s->blockNo == 1) {
697 		BZ2_bsInitWrite(s);
698 		/*bsPutU8(s, BZ_HDR_B);*/
699 		/*bsPutU8(s, BZ_HDR_Z);*/
700 		/*bsPutU8(s, BZ_HDR_h);*/
701 		/*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/
702 		bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
703 	}
704 
705 	if (s->nblock > 0) {
706 		/*bsPutU8(s, 0x31);*/
707 		/*bsPutU8(s, 0x41);*/
708 		/*bsPutU8(s, 0x59);*/
709 		/*bsPutU8(s, 0x26);*/
710 		bsPutU32(s, 0x31415926);
711 		/*bsPutU8(s, 0x53);*/
712 		/*bsPutU8(s, 0x59);*/
713 		bsPutU16(s, 0x5359);
714 
715 		/*-- Now the block's CRC, so it is in a known place. --*/
716 		bsPutU32(s, s->blockCRC);
717 
718 		/*
719 		 * Now a single bit indicating (non-)randomisation.
720 		 * As of version 0.9.5, we use a better sorting algorithm
721 		 * which makes randomisation unnecessary.  So always set
722 		 * the randomised bit to 'no'.  Of course, the decoder
723 		 * still needs to be able to handle randomised blocks
724 		 * so as to maintain backwards compatibility with
725 		 * older versions of bzip2.
726 		 */
727 		bsW1_0(s);
728 
729 		bsW(s, 24, origPtr);
730 		generateMTFValues(s);
731 		sendMTFValues(s);
732 	}
733 
734 	/*-- If this is the last block, add the stream trailer. --*/
735 	if (is_last_block) {
736 		/*bsPutU8(s, 0x17);*/
737 		/*bsPutU8(s, 0x72);*/
738 		/*bsPutU8(s, 0x45);*/
739 		/*bsPutU8(s, 0x38);*/
740 		bsPutU32(s, 0x17724538);
741 		/*bsPutU8(s, 0x50);*/
742 		/*bsPutU8(s, 0x90);*/
743 		bsPutU16(s, 0x5090);
744 		bsPutU32(s, s->combinedCRC);
745 		bsFinishWrite(s);
746 	}
747 }
748 
749 
750 /*-------------------------------------------------------------*/
751 /*--- end                                        compress.c ---*/
752 /*-------------------------------------------------------------*/
753