1 /*
2  * bzip2 is written by Julian Seward <jseward@bzip.org>.
3  * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4  * See README and LICENSE files in this directory for more information.
5  */
6 
7 /*-------------------------------------------------------------*/
8 /*--- Huffman coding low-level stuff                        ---*/
9 /*---                                             huffman.c ---*/
10 /*-------------------------------------------------------------*/
11 
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
15 
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18 
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
21 
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
25 
26 /* #include "bzlib_private.h" */
27 
28 /*---------------------------------------------------*/
29 #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
30 #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
31 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32 
33 #define ADDWEIGHTS(zw1,zw2) \
34 	(WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 	(1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36 
37 #define UPHEAP(z) \
38 { \
39 	int32_t zz, tmp; \
40 	zz = z; \
41 	tmp = heap[zz]; \
42 	while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 		heap[zz] = heap[zz >> 1]; \
44 		zz >>= 1; \
45 	} \
46 	heap[zz] = tmp; \
47 }
48 
49 
50 /* 90 bytes, 0.3% of overall compress speed */
51 #if BZIP2_SPEED >= 1
52 
53 /* macro works better than inline (gcc 4.2.1) */
54 #define DOWNHEAP1(heap, weight, Heap) \
55 { \
56 	int32_t zz, yy, tmp; \
57 	zz = 1; \
58 	tmp = heap[zz]; \
59 	while (1) { \
60 		yy = zz << 1; \
61 		if (yy > nHeap) \
62 			break; \
63 		if (yy < nHeap \
64 		 && weight[heap[yy+1]] < weight[heap[yy]]) \
65 			yy++; \
66 		if (weight[tmp] < weight[heap[yy]]) \
67 			break; \
68 		heap[zz] = heap[yy]; \
69 		zz = yy; \
70 	} \
71 	heap[zz] = tmp; \
72 }
73 
74 #else
75 
76 static
DOWNHEAP1(int32_t * heap,int32_t * weight,int32_t nHeap)77 void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
78 {
79 	int32_t zz, yy, tmp;
80 	zz = 1;
81 	tmp = heap[zz];
82 	while (1) {
83 		yy = zz << 1;
84 		if (yy > nHeap)
85 			break;
86 		if (yy < nHeap
87 		 && weight[heap[yy + 1]] < weight[heap[yy]])
88 			yy++;
89 		if (weight[tmp] < weight[heap[yy]])
90 			break;
91 		heap[zz] = heap[yy];
92 		zz = yy;
93 	}
94 	heap[zz] = tmp;
95 }
96 
97 #endif
98 
99 /*---------------------------------------------------*/
100 static
BZ2_hbMakeCodeLengths(EState * s,uint8_t * len,int32_t * freq,int32_t alphaSize,int32_t maxLen)101 void BZ2_hbMakeCodeLengths(EState *s,
102 		uint8_t *len,
103 		int32_t *freq,
104 		int32_t alphaSize,
105 		int32_t maxLen)
106 {
107 	/*
108 	 * Nodes and heap entries run from 1.  Entry 0
109 	 * for both the heap and nodes is a sentinel.
110 	 */
111 	int32_t nNodes, nHeap, n1, n2, i, j, k;
112 	Bool  tooLong;
113 
114 	/* bbox: moved to EState to save stack
115 	int32_t heap  [BZ_MAX_ALPHA_SIZE + 2];
116 	int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
117 	int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
118 	*/
119 #define heap   (s->BZ2_hbMakeCodeLengths__heap)
120 #define weight (s->BZ2_hbMakeCodeLengths__weight)
121 #define parent (s->BZ2_hbMakeCodeLengths__parent)
122 
123 	for (i = 0; i < alphaSize; i++)
124 		weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125 
126 	while (1) {
127 		nNodes = alphaSize;
128 		nHeap = 0;
129 
130 		heap[0] = 0;
131 		weight[0] = 0;
132 		parent[0] = -2;
133 
134 		for (i = 1; i <= alphaSize; i++) {
135 			parent[i] = -1;
136 			nHeap++;
137 			heap[nHeap] = i;
138 			UPHEAP(nHeap);
139 		}
140 
141 		AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
142 
143 		while (nHeap > 1) {
144 			n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
145 			n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
146 			nNodes++;
147 			parent[n1] = parent[n2] = nNodes;
148 			weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
149 			parent[nNodes] = -1;
150 			nHeap++;
151 			heap[nHeap] = nNodes;
152 			UPHEAP(nHeap);
153 		}
154 
155 		AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
156 
157 		tooLong = False;
158 		for (i = 1; i <= alphaSize; i++) {
159 			j = 0;
160 			k = i;
161 			while (parent[k] >= 0) {
162 				k = parent[k];
163 				j++;
164 			}
165 			len[i-1] = j;
166 			if (j > maxLen)
167 				tooLong = True;
168 		}
169 
170 		if (!tooLong)
171 			break;
172 
173 		/* 17 Oct 04: keep-going condition for the following loop used
174 		to be 'i < alphaSize', which missed the last element,
175 		theoretically leading to the possibility of the compressor
176 		looping.  However, this count-scaling step is only needed if
177 		one of the generated Huffman code words is longer than
178 		maxLen, which up to and including version 1.0.2 was 20 bits,
179 		which is extremely unlikely.  In version 1.0.3 maxLen was
180 		changed to 17 bits, which has minimal effect on compression
181 		ratio, but does mean this scaling step is used from time to
182 		time, enough to verify that it works.
183 
184 		This means that bzip2-1.0.3 and later will only produce
185 		Huffman codes with a maximum length of 17 bits.  However, in
186 		order to preserve backwards compatibility with bitstreams
187 		produced by versions pre-1.0.3, the decompressor must still
188 		handle lengths of up to 20. */
189 
190 		for (i = 1; i <= alphaSize; i++) {
191 			j = weight[i] >> 8;
192 			/* bbox: yes, it is a signed division.
193 			 * don't replace with shift! */
194 			j = 1 + (j / 2);
195 			weight[i] = j << 8;
196 		}
197 	}
198 #undef heap
199 #undef weight
200 #undef parent
201 }
202 
203 
204 /*---------------------------------------------------*/
205 static
BZ2_hbAssignCodes(int32_t * code,uint8_t * length,int32_t minLen,int32_t maxLen,int32_t alphaSize)206 void BZ2_hbAssignCodes(int32_t *code,
207 		uint8_t *length,
208 		int32_t minLen,
209 		int32_t maxLen,
210 		int32_t alphaSize)
211 {
212 	int32_t n, vec, i;
213 
214 	vec = 0;
215 	for (n = minLen; n <= maxLen; n++) {
216 		for (i = 0; i < alphaSize; i++) {
217 			if (length[i] == n) {
218 				code[i] = vec;
219 				vec++;
220 			}
221 		}
222 		vec <<= 1;
223 	}
224 }
225 
226 
227 /*-------------------------------------------------------------*/
228 /*--- end                                         huffman.c ---*/
229 /*-------------------------------------------------------------*/
230