1 /*
2 * $Source: /homes/cvs/ftape-stacked/ftape/compressor/lzrw3.c,v $
3 * $Revision: 1.1 $
4 * $Date: 1997/10/05 19:12:29 $
5 *
6 * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape.
7 *
8 */
9
10 #include "../compressor/lzrw3.h" /* Defines single exported function "compress". */
11
12 /******************************************************************************/
13 /* */
14 /* LZRW3.C */
15 /* */
16 /******************************************************************************/
17 /* */
18 /* Author : Ross Williams. */
19 /* Date : 30-Jun-1991. */
20 /* Release : 1. */
21 /* */
22 /******************************************************************************/
23 /* */
24 /* This file contains an implementation of the LZRW3 data compression */
25 /* algorithm in C. */
26 /* */
27 /* The algorithm is a general purpose compression algorithm that runs fast */
28 /* and gives reasonable compression. The algorithm is a member of the Lempel */
29 /* Ziv family of algorithms and bases its compression on the presence in the */
30 /* data of repeated substrings. */
31 /* */
32 /* This algorithm is unpatented and the code is public domain. As the */
33 /* algorithm is based on the LZ77 class of algorithms, it is unlikely to be */
34 /* the subject of a patent challenge. */
35 /* */
36 /* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is */
37 /* deterministic and is guaranteed to yield the same compressed */
38 /* representation for a given file each time it is run. */
39 /* */
40 /* The LZRW3 algorithm was originally designed and implemented */
41 /* by Ross Williams on 31-Dec-1990. */
42 /* */
43 /* Here are the results of applying this code, compiled under THINK C 4.0 */
44 /* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */
45 /* */
46 /* +----------------------------------------------------------------+ */
47 /* | DATA COMPRESSION TEST | */
48 /* | ===================== | */
49 /* | Time of run : Sun 30-Jun-1991 09:31PM | */
50 /* | Timing accuracy : One part in 100 | */
51 /* | Context length : 262144 bytes (= 256.0000K) | */
52 /* | Test suite : Calgary Corpus Suite | */
53 /* | Files in suite : 14 | */
54 /* | Algorithm : LZRW3 | */
55 /* | Note: All averages are calculated from the un-rounded values. | */
56 /* +----------------------------------------------------------------+ */
57 /* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */
58 /* | ---------- ------ --- ------ ----- ---- ------- ------- | */
59 /* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | */
60 /* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | */
61 /* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | */
62 /* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | */
63 /* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | */
64 /* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | */
65 /* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | */
66 /* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | */
67 /* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | */
68 /* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | */
69 /* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | */
70 /* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | */
71 /* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | */
72 /* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | */
73 /* +----------------------------------------------------------------+ */
74 /* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | */
75 /* +----------------------------------------------------------------+ */
76 /* */
77 /******************************************************************************/
78
79 /******************************************************************************/
80
81 /* The following structure is returned by the "compress" function below when */
82 /* the user asks the function to return identifying information. */
83 /* The most important field in the record is the working memory field which */
84 /* tells the calling program how much working memory should be passed to */
85 /* "compress" when it is called to perform a compression or decompression. */
86 /* LZRW3 uses the same amount of memory during compression and decompression. */
87 /* For more information on this structure see "compress.h". */
88
89 #define U(X) ((ULONG) X)
90 #define SIZE_P_BYTE (U(sizeof(UBYTE *)))
91 #define SIZE_WORD (U(sizeof(UWORD )))
92 #define ALIGNMENT_FUDGE (U(16))
93 #define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE )
94
95 static struct compress_identity identity =
96 {
97 U(0x032DDEA8), /* Algorithm identification number. */
98 MEM_REQ, /* Working memory (bytes) required. */
99 "LZRW3", /* Name of algorithm. */
100 "1.0", /* Version number of algorithm. */
101 "31-Dec-1990", /* Date of algorithm. */
102 "Public Domain", /* Copyright notice. */
103 "Ross N. Williams", /* Author of algorithm. */
104 "Renaissance Software", /* Affiliation of author. */
105 "Public Domain" /* Vendor of algorithm. */
106 };
107
108 LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *);
109 LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *);
110
111 /******************************************************************************/
112
113 /* This function is the only function exported by this module. */
114 /* Depending on its first parameter, the function can be requested to */
115 /* compress a block of memory, decompress a block of memory, or to identify */
116 /* itself. For more information, see the specification file "compress.h". */
117
lzrw3_compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)118 EXPORT void lzrw3_compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)
119 UWORD action; /* Action to be performed. */
120 UBYTE *wrk_mem; /* Address of working memory we can use. */
121 UBYTE *src_adr; /* Address of input data. */
122 LONG src_len; /* Length of input data. */
123 UBYTE *dst_adr; /* Address to put output data. */
124 void *p_dst_len; /* Address of longword for length of output data. */
125 {
126 switch (action)
127 {
128 case COMPRESS_ACTION_IDENTITY:
129 *((struct compress_identity **)p_dst_len)= &identity;
130 break;
131 case COMPRESS_ACTION_COMPRESS:
132 compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
133 break;
134 case COMPRESS_ACTION_DECOMPRESS:
135 compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
136 break;
137 }
138 }
139
140 /******************************************************************************/
141 /* */
142 /* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM */
143 /* ======================================== */
144 /* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that */
145 /* instead of transmitting history offsets, it transmits hash table indexes. */
146 /* In order to decode the indexes, the decompressor must maintain an */
147 /* identical hash table. Copy items are straightforward:when the decompressor */
148 /* receives a copy item, it simply looks up the hash table to translate the */
149 /* index into a pointer into the data already decompressed. To update the */
150 /* hash table, it replaces the same table entry with a pointer to the start */
151 /* of the newly decoded phrase. The tricky part is with literal items, for at */
152 /* the time that the decompressor receives a literal item the decompressor */
153 /* does not have the three bytes in the Ziv (that the compressor has) to */
154 /* perform the three-byte hash. To solve this problem, in LZRW3, both the */
155 /* compressor and decompressor are wired up so that they "buffer" these */
156 /* literals and update their hash tables only when three bytes are available. */
157 /* This makes the maximum buffering 2 bytes. */
158 /* */
159 /* Replacement of offsets by hash table indexes yields a few percent extra */
160 /* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */
161 /* and LZRW2, but yields better compression. */
162 /* */
163 /* Extra compression could be obtained by using a hash table of depth two. */
164 /* However, increasing the depth above one incurs a significant decrease in */
165 /* compression speed which was not considered worthwhile. Another reason for */
166 /* keeping the depth down to one was to allow easy comparison with the */
167 /* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the */
168 /* use of direct hash indexes. */
169 /* */
170 /* +---+ */
171 /* |___|4095 */
172 /* |___| */
173 /* +---------------------*_|<---+ /----+---\ */
174 /* | |___| +---|Hash | */
175 /* | |___| |Function| */
176 /* | |___| \--------/ */
177 /* | |___|0 ^ */
178 /* | +---+ | */
179 /* | Hash +-----+ */
180 /* | Table | */
181 /* | --- */
182 /* v ^^^ */
183 /* +-------------------------------------|----------------+ */
184 /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */
185 /* +-------------------------------------|----------------+ */
186 /* | |1......18| | */
187 /* |<------- Lempel=History ------------>|<--Ziv-->| | */
188 /* | (=bytes already processed) |<-Still to go-->| */
189 /* |<-------------------- INPUT BLOCK ------------------->| */
190 /* */
191 /* The diagram above for LZRW3 looks almost identical to the diagram for */
192 /* LZRW1. The difference is that in LZRW3, the compressor transmits hash */
193 /* table indices instead of Lempel offsets. For this to work, the */
194 /* decompressor must maintain a hash table as well as the compressor and both */
195 /* compressor and decompressor must "buffer" literals, as the decompressor */
196 /* cannot hash phrases commencing with a literal until another two bytes have */
197 /* arrived. */
198 /* */
199 /* LZRW3 Algorithm Execution Summary */
200 /* --------------------------------- */
201 /* 1. Hash the first three bytes of the Ziv to yield a hash table index h. */
202 /* 2. Look up the hash table yielding history pointer p. */
203 /* 3. Match where p points with the Ziv. If there is a match of three or */
204 /* more bytes, code those bytes (in the Ziv) as a copy item, otherwise */
205 /* code the next byte in the Ziv as a literal item. */
206 /* 4. Update the hash table as possible subject to the constraint that only */
207 /* phrases commencing three bytes back from the Ziv can be hashed and */
208 /* entered into the hash table. (This enables the decompressor to keep */
209 /* pace). See the description and code for more details. */
210 /* */
211 /******************************************************************************/
212 /* */
213 /* DEFINITION OF COMPRESSED FILE FORMAT */
214 /* ==================================== */
215 /* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */
216 /* * The copy flag CF uses up four bytes with the first byte being the */
217 /* least significant. */
218 /* * If CF=1, then the compressed file represents the remainder of the file */
219 /* exactly. Otherwise CF=0 and the remainder of the file consists of zero */
220 /* or more GROUPS, each of which represents one or more bytes. */
221 /* * Each group consists of two bytes of CONTROL information followed by */
222 /* sixteen ITEMs except for the last group which can contain from one */
223 /* to sixteen items. */
224 /* * An item can be either a LITERAL item or a COPY item. */
225 /* * Each item corresponds to a bit in the control bytes. */
226 /* * The first control byte corresponds to the first 8 items in the group */
227 /* with bit 0 corresponding to the first item in the group and bit 7 to */
228 /* the eighth item in the group. */
229 /* * The second control byte corresponds to the second 8 items in the group */
230 /* with bit 0 corresponding to the ninth item in the group and bit 7 to */
231 /* the sixteenth item in the group. */
232 /* * A zero bit in a control word means that the corresponding item is a */
233 /* literal item. A one bit corresponds to a copy item. */
234 /* * A literal item consists of a single byte which represents itself. */
235 /* * A copy item consists of two bytes that represent from 3 to 18 bytes. */
236 /* * The first byte in a copy item will be denoted C1. */
237 /* * The second byte in a copy item will be denoted C2. */
238 /* * Bits will be selected using square brackets. */
239 /* For example: C1[0..3] is the low nibble of the first control byte. */
240 /* of copy item C1. */
241 /* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */
242 /* in the range [3,18]. */
243 /* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */
244 /* is a number in the range [0,4095]. */
245 /* * A copy item represents the sequence of bytes */
246 /* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */
247 /* text is the entire text of the uncompressed string. */
248 /* POS is the index in the text of the character following the */
249 /* string represented by all the items preceeding the item */
250 /* being defined. */
251 /* OFFSET is obtained from INDEX by looking up the hash table. */
252 /* */
253 /******************************************************************************/
254
255 /* The following #define defines the length of the copy flag that appears at */
256 /* the start of the compressed file. The value of four bytes was chosen */
257 /* because the fast_copy routine on my Macintosh runs faster if the source */
258 /* and destination blocks are relatively longword aligned. */
259 /* The actual flag data appears in the first byte. The rest are zeroed so as */
260 /* to normalize the compressed representation (i.e. not non-deterministic). */
261 #define FLAG_BYTES 4
262
263 /* The following #defines define the meaning of the values of the copy */
264 /* flag at the start of the compressed file. */
265 #define FLAG_COMPRESS 0 /* Signals that output was result of compression. */
266 #define FLAG_COPY 1 /* Signals that output was simply copied over. */
267
268 /* The 68000 microprocessor (on which this algorithm was originally developed */
269 /* is fussy about non-aligned arrays of words. To avoid these problems the */
270 /* following macro can be used to "waste" from 0 to 3 bytes so as to align */
271 /* the argument pointer. */
272 #define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1))
273
274
275 /* The following constant defines the maximum length of an uncompressed item. */
276 /* This definition must not be changed; its value is hardwired into the code. */
277 /* The longest number of bytes that can be spanned by a single item is 18 */
278 /* for the longest copy item. */
279 #define MAX_RAW_ITEM (18)
280
281 /* The following constant defines the maximum length of an uncompressed group.*/
282 /* This definition must not be changed; its value is hardwired into the code. */
283 /* A group contains at most 16 items which explains this definition. */
284 #define MAX_RAW_GROUP (16*MAX_RAW_ITEM)
285
286 /* The following constant defines the maximum length of a compressed group. */
287 /* This definition must not be changed; its value is hardwired into the code. */
288 /* A compressed group consists of two control bytes followed by up to 16 */
289 /* compressed items each of which can have a maximum length of two bytes. */
290 #define MAX_CMP_GROUP (2+16*2)
291
292 /* The following constant defines the number of entries in the hash table. */
293 /* This definition must not be changed; its value is hardwired into the code. */
294 #define HASH_TABLE_LENGTH (4096)
295
296 /* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable */
297 /* the compressor and decompressor to stay in step maintaining identical hash */
298 /* tables. In an early version of the algorithm, the tables were simply */
299 /* initialized to zero and a check for zero was included just before the */
300 /* matching code. However, this test costs time. A better solution is to */
301 /* initialize all the entries in the hash table to point to a constant */
302 /* string. The decompressor does the same. This solution requires no extra */
303 /* test. The contents of the string do not matter so long as the string is */
304 /* the same for the compressor and decompressor and contains at least */
305 /* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */
306 /* have white space problems (e.g. there is no chance that the compiler will */
307 /* replace more than one space by a TAB) and because they make the length of */
308 /* the string obvious by inspection. */
309 #define START_STRING_18 ((UBYTE *) "123456789012345678")
310
311 /* In this algorithm, hash values have to be calculated at more than one */
312 /* point. The following macro neatens the code up for this. */
313 #define HASH(PTR) \
314 (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF)
315
316 /******************************************************************************/
317
compress_compress(p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)318 LOCAL void compress_compress
319 (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)
320 /* Input : Hand over the required amount of working memory in p_wrk_mem. */
321 /* Input : Specify input block using p_src_first and src_len. */
322 /* Input : Point p_dst_first to the start of the output zone (OZ). */
323 /* Input : Point p_dst_len to a ULONG to receive the output length. */
324 /* Input : Input block and output zone must not overlap. */
325 /* Output : Length of output block written to *p_dst_len. */
326 /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */
327 /* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/
328 /* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */
329 UBYTE *p_wrk_mem;
330 UBYTE *p_src_first;
331 ULONG src_len;
332 UBYTE *p_dst_first;
333 LONG *p_dst_len;
334 {
335 /* p_src and p_dst step through the source and destination blocks. */
336 register UBYTE *p_src = p_src_first;
337 register UBYTE *p_dst = p_dst_first;
338
339 /* The following variables are never modified and are used in the */
340 /* calculations that determine when the main loop terminates. */
341 UBYTE *p_src_post = p_src_first+src_len;
342 UBYTE *p_dst_post = p_dst_first+src_len;
343 UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM;
344 UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16;
345
346 /* The variables 'p_control' and 'control' are used to buffer control bits. */
347 /* Before each group is processed, the next two bytes of the output block */
348 /* are set aside for the control word for the group about to be processed. */
349 /* 'p_control' is set to point to the first byte of that word. Meanwhile, */
350 /* 'control' buffers the control bits being generated during the processing */
351 /* of the group. Instead of having a counter to keep track of how many items */
352 /* have been processed (=the number of bits in the control word), at the */
353 /* start of each group, the top word of 'control' is filled with 1 bits. */
354 /* As 'control' is shifted for each item, the 1 bits in the top word are */
355 /* absorbed or destroyed. When they all run out (i.e. when the top word is */
356 /* all zero bits, we know that we are at the end of a group. */
357 # define TOPWORD 0xFFFF0000
358 UBYTE *p_control;
359 register ULONG control=TOPWORD;
360
361 /* THe variable 'hash' always points to the first element of the hash table. */
362 UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem);
363
364 /* The following two variables represent the literal buffer. p_h1 points to */
365 /* the hash table entry corresponding to the youngest literal. p_h2 points */
366 /* to the hash table entry corresponding to the second youngest literal. */
367 /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */
368 /* literal. The variables are initialized to zero meaning an empty "buffer". */
369 UBYTE **p_h1=0;
370 UBYTE **p_h2=0;
371
372 /* To start, we write the flag bytes. Being optimistic, we set the flag to */
373 /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */
374 /* algorithm deterministic. */
375 *p_dst++=FLAG_COMPRESS;
376 {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;}
377
378 /* Reserve the first word of output as the control word for the first group. */
379 /* Note: This is undone at the end if the input block is empty. */
380 p_control=p_dst; p_dst+=2;
381
382 /* Initialize all elements of the hash table to point to a constant string. */
383 /* Use of an unrolled loop speeds this up considerably. */
384 {UWORD i; UBYTE **p_h=hash;
385 # define ZH *p_h++=START_STRING_18
386 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */
387 {ZH;ZH;ZH;ZH;
388 ZH;ZH;ZH;ZH;
389 ZH;ZH;ZH;ZH;
390 ZH;ZH;ZH;ZH;}
391 }
392
393 /* The main loop processes either 1 or 16 items per iteration. As its */
394 /* termination logic is complicated, I have opted for an infinite loop */
395 /* structure containing 'break' and 'goto' statements. */
396 while (TRUE)
397 {/* Begin main processing loop. */
398
399 /* Note: All the variables here except unroll should be defined within */
400 /* the inner loop. Unfortunately the loop hasn't got a block. */
401 register UBYTE *p; /* Scans through targ phrase during matching. */
402 register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */
403 register UWORD unroll; /* Loop counter for unrolled inner loop. */
404 register UWORD index; /* Index of current hash table entry. */
405 register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */
406
407 /* Test for overrun and jump to overrun code if necessary. */
408 if (p_dst>p_dst_post)
409 goto overrun;
410
411 /* The following cascade of if statements efficiently catches and deals */
412 /* with varying degrees of closeness to the end of the input block. */
413 /* When we get very close to the end, we stop updating the table and */
414 /* code the remaining bytes as literals. This makes the code simpler. */
415 unroll=16;
416 if (p_src>p_src_max16)
417 {
418 unroll=1;
419 if (p_src>p_src_max1)
420 {
421 if (p_src==p_src_post)
422 break;
423 else
424 goto literal;
425 }
426 }
427
428 /* This inner unrolled loop processes 'unroll' (whose value is either 1 */
429 /* or 16) items. I have chosen to implement this loop with labels and */
430 /* gotos to heighten the ease with which the loop may be implemented with */
431 /* a single decrement and branch instruction in assembly language and */
432 /* also because the labels act as highly readable place markers. */
433 /* (Also because we jump into the loop for endgame literals (see above)). */
434
435 begin_unrolled_loop:
436
437 /* To process the next phrase, we hash the next three bytes and use */
438 /* the resultant hash table index to look up the hash table. A pointer */
439 /* to the entry is stored in p_h0 so as to avoid an array lookup. The */
440 /* hash table entry *p_h0 is looked up yielding a pointer p to a */
441 /* potential match of the Ziv in the history. */
442 index=HASH(p_src);
443 p_h0=&hash[index];
444 p=*p_h0;
445
446 /* Having looked up the candidate position, we are in a position to */
447 /* attempt a match. The match loop has been unrolled using the PS */
448 /* macro so that failure within the first three bytes automatically */
449 /* results in the literal branch being taken. The coding is simple. */
450 /* p_ziv saves p_src so we can let p_src wander. */
451 # define PS *p++!=*p_src++
452 p_ziv=p_src;
453 if (PS || PS || PS)
454 {
455 /* Literal. */
456
457 /* Code the literal byte as itself and a zero control bit. */
458 p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF;
459
460 /* We have just coded a literal. If we had two pending ones, that */
461 /* makes three and we can update the hash table. */
462 if (p_h2!=0)
463 {*p_h2=p_ziv-2;}
464
465 /* In any case, rotate the hash table pointers for next time. */
466 p_h2=p_h1; p_h1=p_h0;
467
468 }
469 else
470 {
471 /* Copy */
472
473 /* Match up to 15 remaining bytes using an unrolled loop and code. */
474 #if 0
475 PS || PS || PS || PS || PS || PS || PS || PS ||
476 PS || PS || PS || PS || PS || PS || PS || p_src++;
477 #else
478 if (
479 !( PS || PS || PS || PS || PS || PS || PS || PS ||
480 PS || PS || PS || PS || PS || PS || PS )
481 ) p_src++;
482 #endif
483 *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3);
484 *p_dst++=index&0xFF;
485
486 /* As we have just coded three bytes, we are now in a position to */
487 /* update the hash table with the literal bytes that were pending */
488 /* upon the arrival of extra context bytes. */
489 if (p_h1!=0)
490 {
491 if (p_h2!=0)
492 {*p_h2=p_ziv-2; p_h2=0;}
493 *p_h1=p_ziv-1; p_h1=0;
494 }
495
496 /* In any case, we can update the hash table based on the current */
497 /* position as we just coded at least three bytes in a copy items. */
498 *p_h0=p_ziv;
499
500 }
501 control>>=1;
502
503 /* This loop is all set up for a decrement and jump instruction! */
504 #ifndef linux
505 ` end_unrolled_loop: if (--unroll) goto begin_unrolled_loop;
506 #else
507 /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop;
508 #endif
509
510 /* At this point it will nearly always be the end of a group in which */
511 /* case, we have to do some control-word processing. However, near the */
512 /* end of the input block, the inner unrolled loop is only executed once. */
513 /* This necessitates the 'if' test. */
514 if ((control&TOPWORD)==0)
515 {
516 /* Write the control word to the place we saved for it in the output. */
517 *p_control++= control &0xFF;
518 *p_control = (control>>8) &0xFF;
519
520 /* Reserve the next word in the output block for the control word */
521 /* for the group about to be processed. */
522 p_control=p_dst; p_dst+=2;
523
524 /* Reset the control bits buffer. */
525 control=TOPWORD;
526 }
527
528 } /* End main processing loop. */
529
530 /* After the main processing loop has executed, all the input bytes have */
531 /* been processed. However, the control word has still to be written to the */
532 /* word reserved for it in the output at the start of the most recent group. */
533 /* Before writing, the control word has to be shifted so that all the bits */
534 /* are in the right place. The "empty" bit positions are filled with 1s */
535 /* which partially fill the top word. */
536 while(control&TOPWORD) control>>=1;
537 *p_control++= control &0xFF;
538 *p_control++=(control>>8) &0xFF;
539
540 /* If the last group contained no items, delete the control word too. */
541 if (p_control==p_dst) p_dst-=2;
542
543 /* Write the length of the output block to the dst_len parameter and return. */
544 *p_dst_len=p_dst-p_dst_first;
545 return;
546
547 /* Jump here as soon as an overrun is detected. An overrun is defined to */
548 /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */
549 /* length of the output written so far exceeds the length of the input block.*/
550 /* The algorithm checks for overruns at least at the end of each group */
551 /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */
552 /* Once an overrun occurs, the only thing to do is to set the copy flag and */
553 /* copy the input over. */
554 overrun:
555 #if 0
556 *p_dst_first=FLAG_COPY;
557 fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len);
558 *p_dst_len=src_len+FLAG_BYTES;
559 #else
560 fast_copy(p_src_first,p_dst_first,src_len);
561 *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */
562 #endif
563 }
564
565 /******************************************************************************/
566
compress_decompress(p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)567 LOCAL void compress_decompress
568 (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)
569 /* Input : Hand over the required amount of working memory in p_wrk_mem. */
570 /* Input : Specify input block using p_src_first and src_len. */
571 /* Input : Point p_dst_first to the start of the output zone. */
572 /* Input : Point p_dst_len to a ULONG to receive the output length. */
573 /* Input : Input block and output zone must not overlap. User knows */
574 /* Input : upperbound on output block length from earlier compression. */
575 /* Input : In any case, maximum expansion possible is nine times. */
576 /* Output : Length of output block written to *p_dst_len. */
577 /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */
578 /* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */
579 UBYTE *p_wrk_mem;
580 UBYTE *p_src_first;
581 LONG src_len;
582 UBYTE *p_dst_first;
583 ULONG *p_dst_len;
584 {
585 /* Byte pointers p_src and p_dst scan through the input and output blocks. */
586 register UBYTE *p_src = p_src_first+FLAG_BYTES;
587 register UBYTE *p_dst = p_dst_first;
588 /* we need to avoid a SEGV when trying to uncompress corrupt data */
589 register UBYTE *p_dst_post = p_dst_first + *p_dst_len;
590
591 /* The following two variables are never modified and are used to control */
592 /* the main loop. */
593 UBYTE *p_src_post = p_src_first+src_len;
594 UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2);
595
596 /* The hash table is the only resident of the working memory. The hash table */
597 /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */
598 /* keep Macintoshes happy, it is longword aligned. */
599 UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem);
600
601 /* The variable 'control' is used to buffer the control bits which appear in */
602 /* groups of 16 bits (control words) at the start of each compressed group. */
603 /* When each group is read, bit 16 of the register is set to one. Whenever */
604 /* a new bit is needed, the register is shifted right. When the value of the */
605 /* register becomes 1, we know that we have reached the end of a group. */
606 /* Initializing the register to 1 thus instructs the code to follow that it */
607 /* should read a new control word immediately. */
608 register ULONG control=1;
609
610 /* The value of 'literals' is always in the range 0..3. It is the number of */
611 /* consecutive literal items just seen. We have to record this number so as */
612 /* to know when to update the hash table. When literals gets to 3, there */
613 /* have been three consecutive literals and we can update at the position of */
614 /* the oldest of the three. */
615 register UWORD literals=0;
616
617 /* Check the leading copy flag to see if the compressor chose to use a copy */
618 /* operation instead of a compression operation. If a copy operation was */
619 /* used, then all we need to do is copy the data over, set the output length */
620 /* and return. */
621 #if 0
622 if (*p_src_first==FLAG_COPY)
623 {
624 fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES);
625 *p_dst_len=src_len-FLAG_BYTES;
626 return;
627 }
628 #else
629 if ( src_len < 0 )
630 {
631 fast_copy(p_src_first,p_dst_first,-src_len );
632 *p_dst_len = (ULONG)-src_len;
633 return;
634 }
635 #endif
636
637 /* Initialize all elements of the hash table to point to a constant string. */
638 /* Use of an unrolled loop speeds this up considerably. */
639 {UWORD i; UBYTE **p_h=hash;
640 # define ZJ *p_h++=START_STRING_18
641 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */
642 {ZJ;ZJ;ZJ;ZJ;
643 ZJ;ZJ;ZJ;ZJ;
644 ZJ;ZJ;ZJ;ZJ;
645 ZJ;ZJ;ZJ;ZJ;}
646 }
647
648 /* The outer loop processes either 1 or 16 items per iteration depending on */
649 /* how close p_src is to the end of the input block. */
650 while (p_src!=p_src_post)
651 {/* Start of outer loop */
652
653 register UWORD unroll; /* Counts unrolled loop executions. */
654
655 /* When 'control' has the value 1, it means that the 16 buffered control */
656 /* bits that were read in at the start of the current group have all been */
657 /* shifted out and that all that is left is the 1 bit that was injected */
658 /* into bit 16 at the start of the current group. When we reach the end */
659 /* of a group, we have to load a new control word and inject a new 1 bit. */
660 if (control==1)
661 {
662 control=0x10000|*p_src++;
663 control|=(*p_src++)<<8;
664 }
665
666 /* If it is possible that we are within 16 groups from the end of the */
667 /* input, execute the unrolled loop only once, else process a whole group */
668 /* of 16 items by looping 16 times. */
669 unroll= p_src<=p_src_max16 ? 16 : 1;
670
671 /* This inner loop processes one phrase (item) per iteration. */
672 while (unroll--)
673 { /* Begin unrolled inner loop. */
674
675 /* Process a literal or copy item depending on the next control bit. */
676 if (control&1)
677 {
678 /* Copy item. */
679
680 register UBYTE *p; /* Points to place from which to copy. */
681 register UWORD lenmt; /* Length of copy item minus three. */
682 register UBYTE **p_hte; /* Pointer to current hash table entry.*/
683 register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */
684
685 /* Read and dismantle the copy word. Work out from where to copy. */
686 lenmt=*p_src++;
687 p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++];
688 p=*p_hte;
689 lenmt&=0xF;
690
691 /* Now perform the copy using a half unrolled loop. */
692 *p_dst++=*p++;
693 *p_dst++=*p++;
694 *p_dst++=*p++;
695 while (lenmt--)
696 *p_dst++=*p++;
697
698 /* Because we have just received 3 or more bytes in a copy item */
699 /* (whose bytes we have just installed in the output), we are now */
700 /* in a position to flush all the pending literal hashings that had */
701 /* been postponed for lack of bytes. */
702 if (literals>0)
703 {
704 register UBYTE *r=p_ziv-literals;;
705 hash[HASH(r)]=r;
706 if (literals==2)
707 {r++; hash[HASH(r)]=r;}
708 literals=0;
709 }
710
711 /* In any case, we can immediately update the hash table with the */
712 /* current position. We don't need to do a HASH(...) to work out */
713 /* where to put the pointer, as the compressor just told us!!! */
714 *p_hte=p_ziv;
715
716 }
717 else
718 {
719 /* Literal item. */
720
721 /* Copy over the literal byte. */
722 *p_dst++=*p_src++;
723
724 /* If we now have three literals waiting to be hashed into the hash */
725 /* table, we can do one of them now (because there are three). */
726 if (++literals == 3)
727 {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;}
728 }
729
730 /* Shift the control buffer so the next control bit is in bit 0. */
731 control>>=1;
732 #if 1
733 if (p_dst > p_dst_post)
734 {
735 /* Shit: we tried to decompress corrupt data */
736 *p_dst_len = 0;
737 return;
738 }
739 #endif
740 } /* End unrolled inner loop. */
741
742 } /* End of outer loop */
743
744 /* Write the length of the decompressed data before returning. */
745 *p_dst_len=p_dst-p_dst_first;
746 }
747
748 /******************************************************************************/
749 /* End of LZRW3.C */
750 /******************************************************************************/
751