author | pohly <pohly> | 2004-08-24 20:52:45 (UTC) |
---|---|---|
committer | pohly <pohly> | 2004-08-24 20:52:45 (UTC) |
commit | 73253e93327cf4ef0932de1b4afb56af22a0f37e (patch) (unidiff) | |
tree | 1c9a7a6dd3341e036a894d348a3372525d29acec /noncore/apps/opie-reader/lzx.c | |
parent | e90847c784c48bd21bf8768cb38edb853b832697 (diff) | |
download | opie-73253e93327cf4ef0932de1b4afb56af22a0f37e.zip opie-73253e93327cf4ef0932de1b4afb56af22a0f37e.tar.gz opie-73253e93327cf4ef0932de1b4afb56af22a0f37e.tar.bz2 |
updated source to opie-reader 0.7g
Diffstat (limited to 'noncore/apps/opie-reader/lzx.c') (more/less context) (show whitespace changes)
-rw-r--r-- | noncore/apps/opie-reader/lzx.c | 811 |
1 files changed, 811 insertions, 0 deletions
diff --git a/noncore/apps/opie-reader/lzx.c b/noncore/apps/opie-reader/lzx.c new file mode 100644 index 0000000..860077d --- a/dev/null +++ b/noncore/apps/opie-reader/lzx.c | |||
@@ -0,0 +1,811 @@ | |||
1 | /* $Id$ */ | ||
2 | /*************************************************************************** | ||
3 | * lzx.c - LZX decompression routines * | ||
4 | * ------------------- * | ||
5 | * * | ||
6 | * maintainer: Jed Wing <jedwin@ugcs.caltech.edu> * | ||
7 | * source: modified lzx.c from cabextract v0.5 * | ||
8 | * notes: This file was taken from cabextract v0.5, which was, * | ||
9 | * itself, a modified version of the lzx decompression code * | ||
10 | * from unlzx. * | ||
11 | * * | ||
12 | * platforms: In its current incarnation, this file has been tested on * | ||
13 | * two different Linux platforms (one, redhat-based, with a * | ||
14 | * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with * | ||
15 | * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were * | ||
16 | * Intel x86 compatible machines. * | ||
17 | ***************************************************************************/ | ||
18 | |||
19 | /*************************************************************************** | ||
20 | * * | ||
21 | * This program is free software; you can redistribute it and/or modify * | ||
22 | * it under the terms of the GNU General Public License as published by * | ||
23 | * the Free Software Foundation; either version 2 of the License, or * | ||
24 | * (at your option) any later version. Note that an exemption to this * | ||
25 | * license has been granted by Stuart Caie for the purposes of * | ||
26 | * distribution with chmlib. This does not, to the best of my * | ||
27 | * knowledge, constitute a change in the license of this (the LZX) code * | ||
28 | * in general. * | ||
29 | * * | ||
30 | ***************************************************************************/ | ||
31 | |||
32 | #include "lzx.h" | ||
33 | #include <stdio.h> | ||
34 | #include <stdlib.h> | ||
35 | #include <string.h> | ||
36 | #include <malloc.h> | ||
37 | |||
38 | #ifdef __GNUC__ | ||
39 | #define memcpy __builtin_memcpy | ||
40 | #endif | ||
41 | |||
42 | /* sized types */ | ||
43 | typedef unsigned char UBYTE; /* 8 bits exactly */ | ||
44 | typedef unsigned short UWORD; /* 16 bits (or more) */ | ||
45 | typedef unsigned int ULONG; /* 32 bits (or more) */ | ||
46 | typedef signed int LONG; /* 32 bits (or more) */ | ||
47 | |||
48 | /* some constants defined by the LZX specification */ | ||
49 | #define LZX_MIN_MATCH (2) | ||
50 | #define LZX_MAX_MATCH (257) | ||
51 | #define LZX_NUM_CHARS (256) | ||
52 | #define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */ | ||
53 | #define LZX_BLOCKTYPE_VERBATIM (1) | ||
54 | #define LZX_BLOCKTYPE_ALIGNED (2) | ||
55 | #define LZX_BLOCKTYPE_UNCOMPRESSED (3) | ||
56 | #define LZX_PRETREE_NUM_ELEMENTS (20) | ||
57 | #define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */ | ||
58 | #define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */ | ||
59 | #define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */ | ||
60 | |||
61 | /* LZX huffman defines: tweak tablebits as desired */ | ||
62 | #define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) | ||
63 | #define LZX_PRETREE_TABLEBITS (6) | ||
64 | #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) | ||
65 | #define LZX_MAINTREE_TABLEBITS (12) | ||
66 | #define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) | ||
67 | #define LZX_LENGTH_TABLEBITS (12) | ||
68 | #define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS) | ||
69 | #define LZX_ALIGNED_TABLEBITS (7) | ||
70 | |||
71 | #define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */ | ||
72 | |||
73 | #define LZX_DECLARE_TABLE(tbl) \ | ||
74 | UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\ | ||
75 | UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY] | ||
76 | |||
77 | struct LZXstate | ||
78 | { | ||
79 | UBYTE *window; /* the actual decoding window */ | ||
80 | ULONG window_size; /* window size (32Kb through 2Mb) */ | ||
81 | ULONG actual_size; /* window size when it was first allocated */ | ||
82 | ULONG window_posn; /* current offset within the window */ | ||
83 | ULONG R0, R1, R2; /* for the LRU offset system */ | ||
84 | UWORD main_elements; /* number of main tree elements */ | ||
85 | int header_read; /* have we started decoding at all yet? */ | ||
86 | UWORD block_type; /* type of this block */ | ||
87 | ULONG block_length; /* uncompressed length of this block */ | ||
88 | ULONG block_remaining; /* uncompressed bytes still left to decode */ | ||
89 | ULONG frames_read; /* the number of CFDATA blocks processed */ | ||
90 | LONG intel_filesize; /* magic header value used for transform */ | ||
91 | LONG intel_curpos; /* current offset in transform space */ | ||
92 | int intel_started; /* have we seen any translatable data yet? */ | ||
93 | |||
94 | LZX_DECLARE_TABLE(PRETREE); | ||
95 | LZX_DECLARE_TABLE(MAINTREE); | ||
96 | LZX_DECLARE_TABLE(LENGTH); | ||
97 | LZX_DECLARE_TABLE(ALIGNED); | ||
98 | }; | ||
99 | |||
100 | /* LZX decruncher */ | ||
101 | |||
102 | /* Microsoft's LZX document and their implementation of the | ||
103 | * com.ms.util.cab Java package do not concur. | ||
104 | * | ||
105 | * In the LZX document, there is a table showing the correlation between | ||
106 | * window size and the number of position slots. It states that the 1MB | ||
107 | * window = 40 slots and the 2MB window = 42 slots. In the implementation, | ||
108 | * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the | ||
109 | * first slot whose position base is equal to or more than the required | ||
110 | * window size'. This would explain why other tables in the document refer | ||
111 | * to 50 slots rather than 42. | ||
112 | * | ||
113 | * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode | ||
114 | * is not defined in the specification. | ||
115 | * | ||
116 | * The LZX document does not state the uncompressed block has an | ||
117 | * uncompressed length field. Where does this length field come from, so | ||
118 | * we can know how large the block is? The implementation has it as the 24 | ||
119 | * bits following after the 3 blocktype bits, before the alignment | ||
120 | * padding. | ||
121 | * | ||
122 | * The LZX document states that aligned offset blocks have their aligned | ||
123 | * offset huffman tree AFTER the main and length trees. The implementation | ||
124 | * suggests that the aligned offset tree is BEFORE the main and length | ||
125 | * trees. | ||
126 | * | ||
127 | * The LZX document decoding algorithm states that, in an aligned offset | ||
128 | * block, if an extra_bits value is 1, 2 or 3, then that number of bits | ||
129 | * should be read and the result added to the match offset. This is | ||
130 | * correct for 1 and 2, but not 3, where just a huffman symbol (using the | ||
131 | * aligned tree) should be read. | ||
132 | * | ||
133 | * Regarding the E8 preprocessing, the LZX document states 'No translation | ||
134 | * may be performed on the last 6 bytes of the input block'. This is | ||
135 | * correct. However, the pseudocode provided checks for the *E8 leader* | ||
136 | * up to the last 6 bytes. If the leader appears between -10 and -7 bytes | ||
137 | * from the end, this would cause the next four bytes to be modified, at | ||
138 | * least one of which would be in the last 6 bytes, which is not allowed | ||
139 | * according to the spec. | ||
140 | * | ||
141 | * The specification states that the huffman trees must always contain at | ||
142 | * least one element. However, many CAB files contain blocks where the | ||
143 | * length tree is completely empty (because there are no matches), and | ||
144 | * this is expected to succeed. | ||
145 | */ | ||
146 | |||
147 | |||
148 | /* LZX uses what it calls 'position slots' to represent match offsets. | ||
149 | * What this means is that a small 'position slot' number and a small | ||
150 | * offset from that slot are encoded instead of one large offset for | ||
151 | * every match. | ||
152 | * - position_base is an index to the position slot bases | ||
153 | * - extra_bits states how many bits of offset-from-base data is needed. | ||
154 | */ | ||
155 | static const UBYTE extra_bits[51] = { | ||
156 | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, | ||
157 | 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, | ||
158 | 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, | ||
159 | 17, 17, 17 | ||
160 | }; | ||
161 | |||
162 | static const ULONG position_base[51] = { | ||
163 | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, | ||
164 | 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, | ||
165 | 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, | ||
166 | 1835008, 1966080, 2097152 | ||
167 | }; | ||
168 | |||
169 | struct LZXstate *LZXinit(int window) | ||
170 | { | ||
171 | struct LZXstate *pState=NULL; | ||
172 | ULONG wndsize = 1 << window; | ||
173 | int i, posn_slots; | ||
174 | |||
175 | /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ | ||
176 | /* if a previously allocated window is big enough, keep it */ | ||
177 | if (window < 15 || window > 21) return NULL; | ||
178 | |||
179 | /* allocate state and associated window */ | ||
180 | pState = (struct LZXstate *)malloc(sizeof(struct LZXstate)); | ||
181 | if (!(pState->window = (UBYTE *)malloc(wndsize))) | ||
182 | { | ||
183 | free(pState); | ||
184 | return NULL; | ||
185 | } | ||
186 | pState->actual_size = wndsize; | ||
187 | pState->window_size = wndsize; | ||
188 | |||
189 | /* calculate required position slots */ | ||
190 | if (window == 20) posn_slots = 42; | ||
191 | else if (window == 21) posn_slots = 50; | ||
192 | else posn_slots = window << 1; | ||
193 | |||
194 | /** alternatively **/ | ||
195 | /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */ | ||
196 | |||
197 | /* initialize other state */ | ||
198 | pState->R0 = pState->R1 = pState->R2 = 1; | ||
199 | pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3); | ||
200 | pState->header_read = 0; | ||
201 | pState->frames_read = 0; | ||
202 | pState->block_remaining = 0; | ||
203 | pState->block_type = LZX_BLOCKTYPE_INVALID; | ||
204 | pState->intel_curpos = 0; | ||
205 | pState->intel_started = 0; | ||
206 | pState->window_posn = 0; | ||
207 | |||
208 | /* initialise tables to 0 (because deltas will be applied to them) */ | ||
209 | for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0; | ||
210 | for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0; | ||
211 | |||
212 | return pState; | ||
213 | } | ||
214 | |||
215 | void LZXteardown(struct LZXstate *pState) | ||
216 | { | ||
217 | if (pState) | ||
218 | { | ||
219 | if (pState->window) | ||
220 | free(pState->window); | ||
221 | free(pState); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | int LZXreset(struct LZXstate *pState) | ||
226 | { | ||
227 | int i; | ||
228 | |||
229 | pState->R0 = pState->R1 = pState->R2 = 1; | ||
230 | pState->header_read = 0; | ||
231 | pState->frames_read = 0; | ||
232 | pState->block_remaining = 0; | ||
233 | pState->block_type = LZX_BLOCKTYPE_INVALID; | ||
234 | pState->intel_curpos = 0; | ||
235 | pState->intel_started = 0; | ||
236 | pState->window_posn = 0; | ||
237 | |||
238 | for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0; | ||
239 | for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0; | ||
240 | |||
241 | return DECR_OK; | ||
242 | }; | ||
243 | |||
244 | |||
245 | /* Bitstream reading macros: | ||
246 | * | ||
247 | * INIT_BITSTREAM should be used first to set up the system | ||
248 | * READ_BITS(var,n) takes N bits from the buffer and puts them in var | ||
249 | * | ||
250 | * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer | ||
251 | * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer | ||
252 | * REMOVE_BITS(n) removes N bits from the bit buffer | ||
253 | * | ||
254 | * These bit access routines work by using the area beyond the MSB and the | ||
255 | * LSB as a free source of zeroes. This avoids having to mask any bits. | ||
256 | * So we have to know the bit width of the bitbuffer variable. This is | ||
257 | * sizeof(ULONG) * 8, also defined as ULONG_BITS | ||
258 | */ | ||
259 | |||
260 | /* number of bits in ULONG. Note: This must be at multiple of 16, and at | ||
261 | * least 32 for the bitbuffer code to work (ie, it must be able to ensure | ||
262 | * up to 17 bits - that's adding 16 bits when there's one bit left, or | ||
263 | * adding 32 bits when there are no bits left. The code should work fine | ||
264 | * for machines where ULONG >= 32 bits. | ||
265 | */ | ||
266 | #define ULONG_BITS (sizeof(ULONG)<<3) | ||
267 | |||
268 | #define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0) | ||
269 | |||
270 | #define ENSURE_BITS(n) \ | ||
271 | while (bitsleft < (n)) { \ | ||
272 | bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);\ | ||
273 | bitsleft += 16; inpos+=2; \ | ||
274 | } | ||
275 | |||
276 | #define PEEK_BITS(n) (bitbuf >> (ULONG_BITS - (n))) | ||
277 | #define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n))) | ||
278 | |||
279 | #define READ_BITS(v,n) do { \ | ||
280 | ENSURE_BITS(n); \ | ||
281 | (v) = PEEK_BITS(n); \ | ||
282 | REMOVE_BITS(n); \ | ||
283 | } while (0) | ||
284 | |||
285 | |||
286 | /* Huffman macros */ | ||
287 | |||
288 | #define TABLEBITS(tbl) (LZX_##tbl##_TABLEBITS) | ||
289 | #define MAXSYMBOLS(tbl) (LZX_##tbl##_MAXSYMBOLS) | ||
290 | #define SYMTABLE(tbl) (pState->tbl##_table) | ||
291 | #define LENTABLE(tbl) (pState->tbl##_len) | ||
292 | |||
293 | /* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths. | ||
294 | * In reality, it just calls make_decode_table() with the appropriate | ||
295 | * values - they're all fixed by some #defines anyway, so there's no point | ||
296 | * writing each call out in full by hand. | ||
297 | */ | ||
298 | #define BUILD_TABLE(tbl) \ | ||
299 | if (make_decode_table( \ | ||
300 | MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl)\ | ||
301 | )) { return DECR_ILLEGALDATA; } | ||
302 | |||
303 | |||
304 | /* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the | ||
305 | * bitstream using the stated table and puts it in var. | ||
306 | */ | ||
307 | #define READ_HUFFSYM(tbl,var) do { \ | ||
308 | ENSURE_BITS(16); \ | ||
309 | hufftbl = SYMTABLE(tbl); \ | ||
310 | if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) {\ | ||
311 | j = 1 << (ULONG_BITS - TABLEBITS(tbl)); \ | ||
312 | do { \ | ||
313 | j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0; \ | ||
314 | if (!j) { return DECR_ILLEGALDATA; } \ | ||
315 | } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl)); \ | ||
316 | } \ | ||
317 | j = LENTABLE(tbl)[(var) = i]; \ | ||
318 | REMOVE_BITS(j); \ | ||
319 | } while (0) | ||
320 | |||
321 | |||
322 | /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols | ||
323 | * first to last in the given table. The code lengths are stored in their | ||
324 | * own special LZX way. | ||
325 | */ | ||
326 | #define READ_LENGTHS(tbl,first,last) do { \ | ||
327 | lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \ | ||
328 | if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \ | ||
329 | return DECR_ILLEGALDATA; \ | ||
330 | } \ | ||
331 | bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \ | ||
332 | } while (0) | ||
333 | |||
334 | |||
335 | /* make_decode_table(nsyms, nbits, length[], table[]) | ||
336 | * | ||
337 | * This function was coded by David Tritscher. It builds a fast huffman | ||
338 | * decoding table out of just a canonical huffman code lengths table. | ||
339 | * | ||
340 | * nsyms = total number of symbols in this huffman tree. | ||
341 | * nbits = any symbols with a code length of nbits or less can be decoded | ||
342 | * in one lookup of the table. | ||
343 | * length = A table to get code lengths from [0 to syms-1] | ||
344 | * table = The table to fill up with decoded symbols and pointers. | ||
345 | * | ||
346 | * Returns 0 for OK or 1 for error | ||
347 | */ | ||
348 | |||
349 | static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) { | ||
350 | register UWORD sym; | ||
351 | register ULONG leaf; | ||
352 | register UBYTE bit_num = 1; | ||
353 | ULONG fill; | ||
354 | ULONG pos = 0; /* the current position in the decode table */ | ||
355 | ULONG table_mask = 1 << nbits; | ||
356 | ULONG bit_mask = table_mask >> 1; /* don't do 0 length codes */ | ||
357 | ULONG next_symbol = bit_mask; /* base of allocation for long codes */ | ||
358 | |||
359 | /* fill entries for codes short enough for a direct mapping */ | ||
360 | while (bit_num <= nbits) { | ||
361 | for (sym = 0; sym < nsyms; sym++) { | ||
362 | if (length[sym] == bit_num) { | ||
363 | leaf = pos; | ||
364 | |||
365 | if((pos += bit_mask) > table_mask) return 1; /* table overrun */ | ||
366 | |||
367 | /* fill all possible lookups of this symbol with the symbol itself */ | ||
368 | fill = bit_mask; | ||
369 | while (fill-- > 0) table[leaf++] = sym; | ||
370 | } | ||
371 | } | ||
372 | bit_mask >>= 1; | ||
373 | bit_num++; | ||
374 | } | ||
375 | |||
376 | /* if there are any codes longer than nbits */ | ||
377 | if (pos != table_mask) { | ||
378 | /* clear the remainder of the table */ | ||
379 | for (sym = pos; sym < table_mask; sym++) table[sym] = 0; | ||
380 | |||
381 | /* give ourselves room for codes to grow by up to 16 more bits */ | ||
382 | pos <<= 16; | ||
383 | table_mask <<= 16; | ||
384 | bit_mask = 1 << 15; | ||
385 | |||
386 | while (bit_num <= 16) { | ||
387 | for (sym = 0; sym < nsyms; sym++) { | ||
388 | if (length[sym] == bit_num) { | ||
389 | leaf = pos >> 16; | ||
390 | for (fill = 0; fill < bit_num - nbits; fill++) { | ||
391 | /* if this path hasn't been taken yet, 'allocate' two entries */ | ||
392 | if (table[leaf] == 0) { | ||
393 | table[(next_symbol << 1)] = 0; | ||
394 | table[(next_symbol << 1) + 1] = 0; | ||
395 | table[leaf] = next_symbol++; | ||
396 | } | ||
397 | /* follow the path and select either left or right for next bit */ | ||
398 | leaf = table[leaf] << 1; | ||
399 | if ((pos >> (15-fill)) & 1) leaf++; | ||
400 | } | ||
401 | table[leaf] = sym; | ||
402 | |||
403 | if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ | ||
404 | } | ||
405 | } | ||
406 | bit_mask >>= 1; | ||
407 | bit_num++; | ||
408 | } | ||
409 | } | ||
410 | |||
411 | /* full table? */ | ||
412 | if (pos == table_mask) return 0; | ||
413 | |||
414 | /* either erroneous table, or all elements are 0 - let's find out. */ | ||
415 | for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | struct lzx_bits { | ||
420 | ULONG bb; | ||
421 | int bl; | ||
422 | UBYTE *ip; | ||
423 | }; | ||
424 | |||
425 | static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) { | ||
426 | ULONG i,j, x,y; | ||
427 | int z; | ||
428 | |||
429 | register ULONG bitbuf = lb->bb; | ||
430 | register int bitsleft = lb->bl; | ||
431 | UBYTE *inpos = lb->ip; | ||
432 | UWORD *hufftbl; | ||
433 | |||
434 | for (x = 0; x < 20; x++) { | ||
435 | READ_BITS(y, 4); | ||
436 | LENTABLE(PRETREE)[x] = y; | ||
437 | } | ||
438 | BUILD_TABLE(PRETREE); | ||
439 | |||
440 | for (x = first; x < last; ) { | ||
441 | READ_HUFFSYM(PRETREE, z); | ||
442 | if (z == 17) { | ||
443 | READ_BITS(y, 4); y += 4; | ||
444 | while (y--) lens[x++] = 0; | ||
445 | } | ||
446 | else if (z == 18) { | ||
447 | READ_BITS(y, 5); y += 20; | ||
448 | while (y--) lens[x++] = 0; | ||
449 | } | ||
450 | else if (z == 19) { | ||
451 | READ_BITS(y, 1); y += 4; | ||
452 | READ_HUFFSYM(PRETREE, z); | ||
453 | z = lens[x] - z; if (z < 0) z += 17; | ||
454 | while (y--) lens[x++] = z; | ||
455 | } | ||
456 | else { | ||
457 | z = lens[x] - z; if (z < 0) z += 17; | ||
458 | lens[x++] = z; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | lb->bb = bitbuf; | ||
463 | lb->bl = bitsleft; | ||
464 | lb->ip = inpos; | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) { | ||
469 | UBYTE *endinp = inpos + inlen; | ||
470 | UBYTE *window = pState->window; | ||
471 | UBYTE *runsrc, *rundest; | ||
472 | UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */ | ||
473 | |||
474 | ULONG window_posn = pState->window_posn; | ||
475 | ULONG window_size = pState->window_size; | ||
476 | ULONG R0 = pState->R0; | ||
477 | ULONG R1 = pState->R1; | ||
478 | ULONG R2 = pState->R2; | ||
479 | |||
480 | register ULONG bitbuf; | ||
481 | register int bitsleft; | ||
482 | ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */ | ||
483 | struct lzx_bits lb; /* used in READ_LENGTHS macro */ | ||
484 | |||
485 | int togo = outlen, this_run, main_element, aligned_bits; | ||
486 | int match_length, length_footer, extra, verbatim_bits; | ||
487 | |||
488 | INIT_BITSTREAM; | ||
489 | |||
490 | /* read header if necessary */ | ||
491 | if (!pState->header_read) { | ||
492 | i = j = 0; | ||
493 | READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); } | ||
494 | pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */ | ||
495 | pState->header_read = 1; | ||
496 | } | ||
497 | |||
498 | /* main decoding loop */ | ||
499 | while (togo > 0) { | ||
500 | /* last block finished, new block expected */ | ||
501 | if (pState->block_remaining == 0) { | ||
502 | if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) { | ||
503 | if (pState->block_length & 1) inpos++; /* realign bitstream to word */ | ||
504 | INIT_BITSTREAM; | ||
505 | } | ||
506 | |||
507 | READ_BITS(pState->block_type, 3); | ||
508 | READ_BITS(i, 16); | ||
509 | READ_BITS(j, 8); | ||
510 | pState->block_remaining = pState->block_length = (i << 8) | j; | ||
511 | |||
512 | switch (pState->block_type) { | ||
513 | case LZX_BLOCKTYPE_ALIGNED: | ||
514 | for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; } | ||
515 | BUILD_TABLE(ALIGNED); | ||
516 | /* rest of aligned header is same as verbatim */ | ||
517 | |||
518 | case LZX_BLOCKTYPE_VERBATIM: | ||
519 | READ_LENGTHS(MAINTREE, 0, 256); | ||
520 | READ_LENGTHS(MAINTREE, 256, pState->main_elements); | ||
521 | BUILD_TABLE(MAINTREE); | ||
522 | if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1; | ||
523 | |||
524 | READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); | ||
525 | BUILD_TABLE(LENGTH); | ||
526 | break; | ||
527 | |||
528 | case LZX_BLOCKTYPE_UNCOMPRESSED: | ||
529 | pState->intel_started = 1; /* because we can't assume otherwise */ | ||
530 | ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */ | ||
531 | if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */ | ||
532 | R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; | ||
533 | R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; | ||
534 | R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; | ||
535 | break; | ||
536 | |||
537 | default: | ||
538 | return DECR_ILLEGALDATA; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* buffer exhaustion check */ | ||
543 | if (inpos > endinp) { | ||
544 | /* it's possible to have a file where the next run is less than | ||
545 | * 16 bits in size. In this case, the READ_HUFFSYM() macro used | ||
546 | * in building the tables will exhaust the buffer, so we should | ||
547 | * allow for this, but not allow those accidentally read bits to | ||
548 | * be used (so we check that there are at least 16 bits | ||
549 | * remaining - in this boundary case they aren't really part of | ||
550 | * the compressed data) | ||
551 | */ | ||
552 | if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA; | ||
553 | } | ||
554 | |||
555 | while ((this_run = pState->block_remaining) > 0 && togo > 0) { | ||
556 | if (this_run > togo) this_run = togo; | ||
557 | togo -= this_run; | ||
558 | pState->block_remaining -= this_run; | ||
559 | |||
560 | /* apply 2^x-1 mask */ | ||
561 | window_posn &= window_size - 1; | ||
562 | /* runs can't straddle the window wraparound */ | ||
563 | if ((window_posn + this_run) > window_size) | ||
564 | return DECR_DATAFORMAT; | ||
565 | |||
566 | switch (pState->block_type) { | ||
567 | |||
568 | case LZX_BLOCKTYPE_VERBATIM: | ||
569 | while (this_run > 0) { | ||
570 | READ_HUFFSYM(MAINTREE, main_element); | ||
571 | |||
572 | if (main_element < LZX_NUM_CHARS) { | ||
573 | /* literal: 0 to LZX_NUM_CHARS-1 */ | ||
574 | window[window_posn++] = main_element; | ||
575 | this_run--; | ||
576 | } | ||
577 | else { | ||
578 | /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ | ||
579 | main_element -= LZX_NUM_CHARS; | ||
580 | |||
581 | match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; | ||
582 | if (match_length == LZX_NUM_PRIMARY_LENGTHS) { | ||
583 | READ_HUFFSYM(LENGTH, length_footer); | ||
584 | match_length += length_footer; | ||
585 | } | ||
586 | match_length += LZX_MIN_MATCH; | ||
587 | |||
588 | match_offset = main_element >> 3; | ||
589 | |||
590 | if (match_offset > 2) { | ||
591 | /* not repeated offset */ | ||
592 | if (match_offset != 3) { | ||
593 | extra = extra_bits[match_offset]; | ||
594 | READ_BITS(verbatim_bits, extra); | ||
595 | match_offset = position_base[match_offset] - 2 + verbatim_bits; | ||
596 | } | ||
597 | else { | ||
598 | match_offset = 1; | ||
599 | } | ||
600 | |||
601 | /* update repeated offset LRU queue */ | ||
602 | R2 = R1; R1 = R0; R0 = match_offset; | ||
603 | } | ||
604 | else if (match_offset == 0) { | ||
605 | match_offset = R0; | ||
606 | } | ||
607 | else if (match_offset == 1) { | ||
608 | match_offset = R1; | ||
609 | R1 = R0; R0 = match_offset; | ||
610 | } | ||
611 | else /* match_offset == 2 */ { | ||
612 | match_offset = R2; | ||
613 | R2 = R0; R0 = match_offset; | ||
614 | } | ||
615 | |||
616 | rundest = window + window_posn; | ||
617 | runsrc = rundest - match_offset; | ||
618 | window_posn += match_length; | ||
619 | this_run -= match_length; | ||
620 | |||
621 | /* copy any wrapped around source data */ | ||
622 | while ((runsrc < window) && (match_length-- > 0)) { | ||
623 | *rundest++ = *(runsrc + window_size); runsrc++; | ||
624 | } | ||
625 | /* copy match data - no worries about destination wraps */ | ||
626 | while (match_length-- > 0) *rundest++ = *runsrc++; | ||
627 | |||
628 | } | ||
629 | } | ||
630 | break; | ||
631 | |||
632 | case LZX_BLOCKTYPE_ALIGNED: | ||
633 | while (this_run > 0) { | ||
634 | READ_HUFFSYM(MAINTREE, main_element); | ||
635 | |||
636 | if (main_element < LZX_NUM_CHARS) { | ||
637 | /* literal: 0 to LZX_NUM_CHARS-1 */ | ||
638 | window[window_posn++] = main_element; | ||
639 | this_run--; | ||
640 | } | ||
641 | else { | ||
642 | /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ | ||
643 | main_element -= LZX_NUM_CHARS; | ||
644 | |||
645 | match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; | ||
646 | if (match_length == LZX_NUM_PRIMARY_LENGTHS) { | ||
647 | READ_HUFFSYM(LENGTH, length_footer); | ||
648 | match_length += length_footer; | ||
649 | } | ||
650 | match_length += LZX_MIN_MATCH; | ||
651 | |||
652 | match_offset = main_element >> 3; | ||
653 | |||
654 | if (match_offset > 2) { | ||
655 | /* not repeated offset */ | ||
656 | extra = extra_bits[match_offset]; | ||
657 | match_offset = position_base[match_offset] - 2; | ||
658 | if (extra > 3) { | ||
659 | /* verbatim and aligned bits */ | ||
660 | extra -= 3; | ||
661 | READ_BITS(verbatim_bits, extra); | ||
662 | match_offset += (verbatim_bits << 3); | ||
663 | READ_HUFFSYM(ALIGNED, aligned_bits); | ||
664 | match_offset += aligned_bits; | ||
665 | } | ||
666 | else if (extra == 3) { | ||
667 | /* aligned bits only */ | ||
668 | READ_HUFFSYM(ALIGNED, aligned_bits); | ||
669 | match_offset += aligned_bits; | ||
670 | } | ||
671 | else if (extra > 0) { /* extra==1, extra==2 */ | ||
672 | /* verbatim bits only */ | ||
673 | READ_BITS(verbatim_bits, extra); | ||
674 | match_offset += verbatim_bits; | ||
675 | } | ||
676 | else /* extra == 0 */ { | ||
677 | /* ??? */ | ||
678 | match_offset = 1; | ||
679 | } | ||
680 | |||
681 | /* update repeated offset LRU queue */ | ||
682 | R2 = R1; R1 = R0; R0 = match_offset; | ||
683 | } | ||
684 | else if (match_offset == 0) { | ||
685 | match_offset = R0; | ||
686 | } | ||
687 | else if (match_offset == 1) { | ||
688 | match_offset = R1; | ||
689 | R1 = R0; R0 = match_offset; | ||
690 | } | ||
691 | else /* match_offset == 2 */ { | ||
692 | match_offset = R2; | ||
693 | R2 = R0; R0 = match_offset; | ||
694 | } | ||
695 | |||
696 | rundest = window + window_posn; | ||
697 | runsrc = rundest - match_offset; | ||
698 | window_posn += match_length; | ||
699 | this_run -= match_length; | ||
700 | |||
701 | /* copy any wrapped around source data */ | ||
702 | while ((runsrc < window) && (match_length-- > 0)) { | ||
703 | *rundest++ = *(runsrc + window_size); runsrc++; | ||
704 | } | ||
705 | /* copy match data - no worries about destination wraps */ | ||
706 | while (match_length-- > 0) *rundest++ = *runsrc++; | ||
707 | |||
708 | } | ||
709 | } | ||
710 | break; | ||
711 | |||
712 | case LZX_BLOCKTYPE_UNCOMPRESSED: | ||
713 | if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA; | ||
714 | memcpy(window + window_posn, inpos, (size_t) this_run); | ||
715 | inpos += this_run; window_posn += this_run; | ||
716 | break; | ||
717 | |||
718 | default: | ||
719 | return DECR_ILLEGALDATA; /* might as well */ | ||
720 | } | ||
721 | |||
722 | } | ||
723 | } | ||
724 | |||
725 | if (togo != 0) return DECR_ILLEGALDATA; | ||
726 | memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen); | ||
727 | |||
728 | pState->window_posn = window_posn; | ||
729 | pState->R0 = R0; | ||
730 | pState->R1 = R1; | ||
731 | pState->R2 = R2; | ||
732 | |||
733 | /* intel E8 decoding */ | ||
734 | if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) { | ||
735 | if (outlen <= 6 || !pState->intel_started) { | ||
736 | pState->intel_curpos += outlen; | ||
737 | } | ||
738 | else { | ||
739 | UBYTE *data = outpos; | ||
740 | UBYTE *dataend = data + outlen - 10; | ||
741 | LONG curpos = pState->intel_curpos; | ||
742 | LONG filesize = pState->intel_filesize; | ||
743 | LONG abs_off, rel_off; | ||
744 | |||
745 | pState->intel_curpos = curpos + outlen; | ||
746 | |||
747 | while (data < dataend) { | ||
748 | if (*data++ != 0xE8) { curpos++; continue; } | ||
749 | abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); | ||
750 | if ((abs_off >= -curpos) && (abs_off < filesize)) { | ||
751 | rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; | ||
752 | data[0] = (UBYTE) rel_off; | ||
753 | data[1] = (UBYTE) (rel_off >> 8); | ||
754 | data[2] = (UBYTE) (rel_off >> 16); | ||
755 | data[3] = (UBYTE) (rel_off >> 24); | ||
756 | } | ||
757 | data += 4; | ||
758 | curpos += 5; | ||
759 | } | ||
760 | } | ||
761 | } | ||
762 | return DECR_OK; | ||
763 | } | ||
764 | |||
765 | #ifdef LZX_CHM_TESTDRIVER | ||
766 | int main(int c, char **v) | ||
767 | { | ||
768 | FILE *fin, *fout; | ||
769 | struct LZXstate state; | ||
770 | UBYTE ibuf[16384]; | ||
771 | UBYTE obuf[32768]; | ||
772 | int ilen, olen; | ||
773 | int status; | ||
774 | int i; | ||
775 | int count=0; | ||
776 | int w = atoi(v[1]); | ||
777 | LZXinit(&state, w); | ||
778 | fout = fopen(v[2], "wb"); | ||
779 | for (i=3; i<c; i++) | ||
780 | { | ||
781 | fin = fopen(v[i], "rb"); | ||
782 | ilen = fread(ibuf, 1, 16384, fin); | ||
783 | status = LZXdecompress(&state, ibuf, obuf, ilen, 32768); | ||
784 | switch (status) | ||
785 | { | ||
786 | case DECR_OK: | ||
787 | printf("ok\n"); | ||
788 | fwrite(obuf, 1, 32768, fout); | ||
789 | break; | ||
790 | case DECR_DATAFORMAT: | ||
791 | printf("bad format\n"); | ||
792 | break; | ||
793 | case DECR_ILLEGALDATA: | ||
794 | printf("illegal data\n"); | ||
795 | break; | ||
796 | case DECR_NOMEMORY: | ||
797 | printf("no memory\n"); | ||
798 | break; | ||
799 | default: | ||
800 | break; | ||
801 | } | ||
802 | fclose(fin); | ||
803 | if (++count == 2) | ||
804 | { | ||
805 | count = 0; | ||
806 | LZXreset(&state); | ||
807 | } | ||
808 | } | ||
809 | fclose(fout); | ||
810 | } | ||
811 | #endif | ||