1 /*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7 /*-------------------------------------------------------------*/
8 /*--- Huffman coding low-level stuff ---*/
9 /*--- huffman.c ---*/
10 /*-------------------------------------------------------------*/
11
12 /* ------------------------------------------------------------------
13 This file is part of bzip2/libbzip2, a program and library for
14 lossless, block-sorting data compression.
15
16 bzip2/libbzip2 version 1.0.4 of 20 December 2006
17 Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19 Please read the WARNING, DISCLAIMER and PATENTS sections in the
20 README file.
21
22 This program is released under the terms of the license contained
23 in the file LICENSE.
24 ------------------------------------------------------------------ */
25
26 /* #include "bzlib_private.h" */
27
28 /*---------------------------------------------------*/
29 #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
30 #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
31 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32
33 #define ADDWEIGHTS(zw1,zw2) \
34 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36
37 #define UPHEAP(z) \
38 { \
39 int32_t zz, tmp; \
40 zz = z; \
41 tmp = heap[zz]; \
42 while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 heap[zz] = heap[zz >> 1]; \
44 zz >>= 1; \
45 } \
46 heap[zz] = tmp; \
47 }
48
49
50 /* 90 bytes, 0.3% of overall compress speed */
51 #if BZIP2_SPEED >= 1
52
53 /* macro works better than inline (gcc 4.2.1) */
54 #define DOWNHEAP1(heap, weight, Heap) \
55 { \
56 int32_t zz, yy, tmp; \
57 zz = 1; \
58 tmp = heap[zz]; \
59 while (1) { \
60 yy = zz << 1; \
61 if (yy > nHeap) \
62 break; \
63 if (yy < nHeap \
64 && weight[heap[yy+1]] < weight[heap[yy]]) \
65 yy++; \
66 if (weight[tmp] < weight[heap[yy]]) \
67 break; \
68 heap[zz] = heap[yy]; \
69 zz = yy; \
70 } \
71 heap[zz] = tmp; \
72 }
73
74 #else
75
76 static
DOWNHEAP1(int32_t * heap,int32_t * weight,int32_t nHeap)77 void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
78 {
79 int32_t zz, yy, tmp;
80 zz = 1;
81 tmp = heap[zz];
82 while (1) {
83 yy = zz << 1;
84 if (yy > nHeap)
85 break;
86 if (yy < nHeap
87 && weight[heap[yy + 1]] < weight[heap[yy]])
88 yy++;
89 if (weight[tmp] < weight[heap[yy]])
90 break;
91 heap[zz] = heap[yy];
92 zz = yy;
93 }
94 heap[zz] = tmp;
95 }
96
97 #endif
98
99 /*---------------------------------------------------*/
100 static
BZ2_hbMakeCodeLengths(EState * s,uint8_t * len,int32_t * freq,int32_t alphaSize,int32_t maxLen)101 void BZ2_hbMakeCodeLengths(EState *s,
102 uint8_t *len,
103 int32_t *freq,
104 int32_t alphaSize,
105 int32_t maxLen)
106 {
107 /*
108 * Nodes and heap entries run from 1. Entry 0
109 * for both the heap and nodes is a sentinel.
110 */
111 int32_t nNodes, nHeap, n1, n2, i, j, k;
112 Bool tooLong;
113
114 /* bbox: moved to EState to save stack
115 int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
116 int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
117 int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
118 */
119 #define heap (s->BZ2_hbMakeCodeLengths__heap)
120 #define weight (s->BZ2_hbMakeCodeLengths__weight)
121 #define parent (s->BZ2_hbMakeCodeLengths__parent)
122
123 for (i = 0; i < alphaSize; i++)
124 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125
126 while (1) {
127 nNodes = alphaSize;
128 nHeap = 0;
129
130 heap[0] = 0;
131 weight[0] = 0;
132 parent[0] = -2;
133
134 for (i = 1; i <= alphaSize; i++) {
135 parent[i] = -1;
136 nHeap++;
137 heap[nHeap] = i;
138 UPHEAP(nHeap);
139 }
140
141 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
142
143 while (nHeap > 1) {
144 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
145 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
146 nNodes++;
147 parent[n1] = parent[n2] = nNodes;
148 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
149 parent[nNodes] = -1;
150 nHeap++;
151 heap[nHeap] = nNodes;
152 UPHEAP(nHeap);
153 }
154
155 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
156
157 tooLong = False;
158 for (i = 1; i <= alphaSize; i++) {
159 j = 0;
160 k = i;
161 while (parent[k] >= 0) {
162 k = parent[k];
163 j++;
164 }
165 len[i-1] = j;
166 if (j > maxLen)
167 tooLong = True;
168 }
169
170 if (!tooLong)
171 break;
172
173 /* 17 Oct 04: keep-going condition for the following loop used
174 to be 'i < alphaSize', which missed the last element,
175 theoretically leading to the possibility of the compressor
176 looping. However, this count-scaling step is only needed if
177 one of the generated Huffman code words is longer than
178 maxLen, which up to and including version 1.0.2 was 20 bits,
179 which is extremely unlikely. In version 1.0.3 maxLen was
180 changed to 17 bits, which has minimal effect on compression
181 ratio, but does mean this scaling step is used from time to
182 time, enough to verify that it works.
183
184 This means that bzip2-1.0.3 and later will only produce
185 Huffman codes with a maximum length of 17 bits. However, in
186 order to preserve backwards compatibility with bitstreams
187 produced by versions pre-1.0.3, the decompressor must still
188 handle lengths of up to 20. */
189
190 for (i = 1; i <= alphaSize; i++) {
191 j = weight[i] >> 8;
192 /* bbox: yes, it is a signed division.
193 * don't replace with shift! */
194 j = 1 + (j / 2);
195 weight[i] = j << 8;
196 }
197 }
198 #undef heap
199 #undef weight
200 #undef parent
201 }
202
203
204 /*---------------------------------------------------*/
205 static
BZ2_hbAssignCodes(int32_t * code,uint8_t * length,int32_t minLen,int32_t maxLen,int32_t alphaSize)206 void BZ2_hbAssignCodes(int32_t *code,
207 uint8_t *length,
208 int32_t minLen,
209 int32_t maxLen,
210 int32_t alphaSize)
211 {
212 int32_t n, vec, i;
213
214 vec = 0;
215 for (n = minLen; n <= maxLen; n++) {
216 for (i = 0; i < alphaSize; i++) {
217 if (length[i] == n) {
218 code[i] = vec;
219 vec++;
220 }
221 }
222 vec <<= 1;
223 }
224 }
225
226
227 /*-------------------------------------------------------------*/
228 /*--- end huffman.c ---*/
229 /*-------------------------------------------------------------*/
230