Diffstat (limited to 'noncore/apps/opie-reader/chm_lib.c') (more/less context) (show whitespace changes)
-rw-r--r-- | noncore/apps/opie-reader/chm_lib.c | 1876 |
1 files changed, 1876 insertions, 0 deletions
diff --git a/noncore/apps/opie-reader/chm_lib.c b/noncore/apps/opie-reader/chm_lib.c new file mode 100644 index 0000000..ecf8278 --- a/dev/null +++ b/noncore/apps/opie-reader/chm_lib.c | |||
@@ -0,0 +1,1876 @@ | |||
1 | /* $Id$ */ | ||
2 | /*************************************************************************** | ||
3 | * chm_lib.c - CHM archive manipulation routines * | ||
4 | * ------------------- * | ||
5 | * * | ||
6 | * author: Jed Wing <jedwin@ugcs.caltech.edu> * | ||
7 | * version: 0.3 * | ||
8 | * notes: These routines are meant for the manipulation of microsoft * | ||
9 | * .chm (compiled html help) files, but may likely be used * | ||
10 | * for the manipulation of any ITSS archive, if ever ITSS * | ||
11 | * archives are used for any other purpose. * | ||
12 | * * | ||
13 | * Note also that the section names are statically handled. * | ||
14 | * To be entirely correct, the section names should be read * | ||
15 | * from the section names meta-file, and then the various * | ||
16 | * content sections and the "transforms" to apply to the data * | ||
17 | * they contain should be inferred from the section name and * | ||
18 | * the meta-files referenced using that name; however, all of * | ||
19 | * the files I've been able to get my hands on appear to have * | ||
20 | * only two sections: Uncompressed and MSCompressed. * | ||
21 | * Additionally, the ITSS.DLL file included with Windows does * | ||
22 | * not appear to handle any different transforms than the * | ||
23 | * simple LZX-transform. Furthermore, the list of transforms * | ||
24 | * to apply is broken, in that only half the required space * | ||
25 | * is allocated for the list. (It appears as though the * | ||
26 | * space is allocated for ASCII strings, but the strings are * | ||
27 | * written as unicode. As a result, only the first half of * | ||
28 | * the string appears.) So this is probably not too big of * | ||
29 | * a deal, at least until CHM v4 (MS .lit files), which also * | ||
30 | * incorporate encryption, of some description. * | ||
31 | * * | ||
32 | * switches: CHM_MT: compile library with thread-safety * | ||
33 | * * | ||
34 | * switches (Linux only): * | ||
35 | * CHM_USE_PREAD: compile library to use pread instead of * | ||
36 | * lseek/read * | ||
37 | * CHM_USE_IO64: compile library to support full 64-bit I/O * | ||
38 | * as is needed to properly deal with the * | ||
39 | * 64-bit file offsets. * | ||
40 | ***************************************************************************/ | ||
41 | |||
42 | /*************************************************************************** | ||
43 | * * | ||
44 | * This program is free software; you can redistribute it and/or modify * | ||
45 | * it under the terms of the GNU Lesser General Public License as * | ||
46 | * published by the Free Software Foundation; either version 2.1 of the * | ||
47 | * License, or (at your option) any later version. * | ||
48 | * * | ||
49 | ***************************************************************************/ | ||
50 | |||
51 | #include "chm_lib.h" | ||
52 | |||
53 | #ifdef CHM_MT | ||
54 | #define _REENTRANT | ||
55 | #endif | ||
56 | |||
57 | #include "lzx.h" | ||
58 | |||
59 | #include <stdlib.h> | ||
60 | #include <string.h> | ||
61 | |||
62 | #if __sun || __sgi | ||
63 | #include <strings.h> | ||
64 | #endif | ||
65 | |||
66 | #ifdef WIN32 | ||
67 | #include <windows.h> | ||
68 | #include <malloc.h> | ||
69 | #else | ||
70 | /* basic Linux system includes */ | ||
71 | #define _XOPEN_SOURCE 500 | ||
72 | #include <unistd.h> | ||
73 | #include <sys/types.h> | ||
74 | #include <sys/stat.h> | ||
75 | #include <fcntl.h> | ||
76 | #include <malloc.h> | ||
77 | #endif | ||
78 | |||
79 | /* includes/defines for threading, if using them */ | ||
80 | #ifdef CHM_MT | ||
81 | #ifdef WIN32 | ||
82 | #define CHM_ACQUIRE_LOCK(a) do { \ | ||
83 | EnterCriticalSection(&(a)); \ | ||
84 | } while(0) | ||
85 | #define CHM_RELEASE_LOCK(a) do { \ | ||
86 | EnterCriticalSection(&(a)); \ | ||
87 | } while(0) | ||
88 | |||
89 | #else | ||
90 | #include <pthread.h> | ||
91 | |||
92 | #define CHM_ACQUIRE_LOCK(a) do { \ | ||
93 | pthread_mutex_lock(&(a)); \ | ||
94 | } while(0) | ||
95 | #define CHM_RELEASE_LOCK(a) do { \ | ||
96 | pthread_mutex_unlock(&(a)); \ | ||
97 | } while(0) | ||
98 | |||
99 | #endif | ||
100 | #else | ||
101 | #define CHM_ACQUIRE_LOCK(a) /* do nothing */ | ||
102 | #define CHM_RELEASE_LOCK(a) /* do nothing */ | ||
103 | #endif | ||
104 | |||
105 | #ifdef WIN32 | ||
106 | #define CHM_NULL_FD (INVALID_HANDLE_VALUE) | ||
107 | #define CHM_USE_WIN32IO 1 | ||
108 | #define CHM_CLOSE_FILE(fd) CloseHandle((fd)) | ||
109 | #else | ||
110 | #define CHM_NULL_FD (-1) | ||
111 | #define CHM_CLOSE_FILE(fd) close((fd)) | ||
112 | #endif | ||
113 | |||
114 | /* | ||
115 | * defines related to tuning | ||
116 | */ | ||
117 | #ifndef CHM_MAX_BLOCKS_CACHED | ||
118 | #define CHM_MAX_BLOCKS_CACHED 5 | ||
119 | #endif | ||
120 | |||
121 | /* | ||
122 | * architecture specific defines | ||
123 | * | ||
124 | * Note: as soon as C99 is more widespread, the below defines should | ||
125 | * probably just use the C99 sized-int types. | ||
126 | * | ||
127 | * The following settings will probably work for many platforms. The sizes | ||
128 | * don't have to be exactly correct, but the types must accommodate at least as | ||
129 | * many bits as they specify. | ||
130 | */ | ||
131 | |||
132 | /* i386, 32-bit, Windows */ | ||
133 | #ifdef WIN32 | ||
134 | typedef unsigned char UChar; | ||
135 | typedef __int16 Int16; | ||
136 | typedef unsigned __int16 UInt16; | ||
137 | typedef __int32 Int32; | ||
138 | typedef unsigned __int32 UInt32; | ||
139 | typedef __int64 Int64; | ||
140 | typedef unsigned __int64 UInt64; | ||
141 | |||
142 | /* I386, 32-bit, non-Windows */ | ||
143 | /* Sparc */ | ||
144 | /* MIPS */ | ||
145 | #else | ||
146 | typedef unsigned char UChar; | ||
147 | typedef short Int16; | ||
148 | typedef unsigned short UInt16; | ||
149 | typedef long Int32; | ||
150 | typedef unsigned long UInt32; | ||
151 | typedef long long Int64; | ||
152 | typedef unsigned long long UInt64; | ||
153 | #endif | ||
154 | |||
155 | /* GCC */ | ||
156 | #ifdef __GNUC__ | ||
157 | #define memcmp __builtin_memcmp | ||
158 | #define memcpy __builtin_memcpy | ||
159 | #define strlen __builtin_strlen | ||
160 | |||
161 | #elif defined(WIN32) | ||
162 | static int ffs(unsigned int val) | ||
163 | { | ||
164 | int bit=1, idx=1; | ||
165 | while (bit != 0 && (val & bit) == 0) | ||
166 | { | ||
167 | bit <<= 1; | ||
168 | ++idx; | ||
169 | } | ||
170 | if (bit == 0) | ||
171 | return 0; | ||
172 | else | ||
173 | return idx; | ||
174 | } | ||
175 | |||
176 | #endif | ||
177 | |||
178 | /* utilities for unmarshalling data */ | ||
179 | static int _unmarshal_char_array(unsigned char **pData, | ||
180 | unsigned long *pLenRemain, | ||
181 | char *dest, | ||
182 | int count) | ||
183 | { | ||
184 | if (count <= 0 || (unsigned int)count > *pLenRemain) | ||
185 | return 0; | ||
186 | memcpy(dest, (*pData), count); | ||
187 | *pData += count; | ||
188 | *pLenRemain -= count; | ||
189 | return 1; | ||
190 | } | ||
191 | |||
192 | static int _unmarshal_uchar_array(unsigned char **pData, | ||
193 | unsigned long *pLenRemain, | ||
194 | unsigned char *dest, | ||
195 | int count) | ||
196 | { | ||
197 | if (count <= 0 || (unsigned int)count > *pLenRemain) | ||
198 | return 0; | ||
199 | memcpy(dest, (*pData), count); | ||
200 | *pData += count; | ||
201 | *pLenRemain -= count; | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | static int _unmarshal_int16(unsigned char **pData, | ||
206 | unsigned long *pLenRemain, | ||
207 | Int16 *dest) | ||
208 | { | ||
209 | if (2 > *pLenRemain) | ||
210 | return 0; | ||
211 | *dest = (*pData)[0] | (*pData)[1]<<8; | ||
212 | *pData += 2; | ||
213 | *pLenRemain -= 2; | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | static int _unmarshal_uint16(unsigned char **pData, | ||
218 | unsigned long *pLenRemain, | ||
219 | UInt16 *dest) | ||
220 | { | ||
221 | if (2 > *pLenRemain) | ||
222 | return 0; | ||
223 | *dest = (*pData)[0] | (*pData)[1]<<8; | ||
224 | *pData += 2; | ||
225 | *pLenRemain -= 2; | ||
226 | return 1; | ||
227 | } | ||
228 | |||
229 | static int _unmarshal_int32(unsigned char **pData, | ||
230 | unsigned long *pLenRemain, | ||
231 | Int32 *dest) | ||
232 | { | ||
233 | if (4 > *pLenRemain) | ||
234 | return 0; | ||
235 | *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; | ||
236 | *pData += 4; | ||
237 | *pLenRemain -= 4; | ||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | static int _unmarshal_uint32(unsigned char **pData, | ||
242 | unsigned long *pLenRemain, | ||
243 | UInt32 *dest) | ||
244 | { | ||
245 | if (4 > *pLenRemain) | ||
246 | return 0; | ||
247 | *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; | ||
248 | *pData += 4; | ||
249 | *pLenRemain -= 4; | ||
250 | return 1; | ||
251 | } | ||
252 | |||
253 | static int _unmarshal_int64(unsigned char **pData, | ||
254 | unsigned long *pLenRemain, | ||
255 | Int64 *dest) | ||
256 | { | ||
257 | Int64 temp; | ||
258 | int i; | ||
259 | if (8 > *pLenRemain) | ||
260 | return 0; | ||
261 | temp=0; | ||
262 | for(i=8; i>0; i--) | ||
263 | { | ||
264 | temp <<= 8; | ||
265 | temp |= (*pData)[i-1]; | ||
266 | } | ||
267 | *dest = temp; | ||
268 | *pData += 8; | ||
269 | *pLenRemain -= 8; | ||
270 | return 1; | ||
271 | } | ||
272 | |||
273 | static int _unmarshal_uint64(unsigned char **pData, | ||
274 | unsigned long *pLenRemain, | ||
275 | UInt64 *dest) | ||
276 | { | ||
277 | UInt64 temp; | ||
278 | int i; | ||
279 | if (8 > *pLenRemain) | ||
280 | return 0; | ||
281 | temp=0; | ||
282 | for(i=8; i>0; i--) | ||
283 | { | ||
284 | temp <<= 8; | ||
285 | temp |= (*pData)[i-1]; | ||
286 | } | ||
287 | *dest = temp; | ||
288 | *pData += 8; | ||
289 | *pLenRemain -= 8; | ||
290 | return 1; | ||
291 | } | ||
292 | |||
293 | static int _unmarshal_uuid(unsigned char **pData, | ||
294 | unsigned long *pDataLen, | ||
295 | unsigned char *dest) | ||
296 | { | ||
297 | return _unmarshal_uchar_array(pData, pDataLen, dest, 16); | ||
298 | } | ||
299 | |||
300 | /* names of sections essential to decompression */ | ||
301 | static const char _CHMU_RESET_TABLE[] = | ||
302 | "::DataSpace/Storage/MSCompressed/Transform/" | ||
303 | "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/" | ||
304 | "InstanceData/ResetTable"; | ||
305 | static const char _CHMU_LZXC_CONTROLDATA[] = | ||
306 | "::DataSpace/Storage/MSCompressed/ControlData"; | ||
307 | static const char _CHMU_CONTENT[] = | ||
308 | "::DataSpace/Storage/MSCompressed/Content"; | ||
309 | static const char _CHMU_SPANINFO[] = | ||
310 | "::DataSpace/Storage/MSCompressed/SpanInfo"; | ||
311 | |||
312 | /* | ||
313 | * structures local to this module | ||
314 | */ | ||
315 | |||
316 | /* structure of ITSF headers */ | ||
317 | #define _CHM_ITSF_V2_LEN (0x58) | ||
318 | #define _CHM_ITSF_V3_LEN (0x60) | ||
319 | struct chmItsfHeader | ||
320 | { | ||
321 | char signature[4]; /* 0 (ITSF) */ | ||
322 | Int32 version; /* 4 */ | ||
323 | Int32 header_len; /* 8 */ | ||
324 | Int32 unknown_000c; /* c */ | ||
325 | UInt32 last_modified; /* 10 */ | ||
326 | UInt32 lang_id; /* 14 */ | ||
327 | UChar dir_uuid[16]; /* 18 */ | ||
328 | UChar stream_uuid[16]; /* 28 */ | ||
329 | UInt64 unknown_offset; /* 38 */ | ||
330 | UInt64 unknown_len; /* 40 */ | ||
331 | UInt64 dir_offset; /* 48 */ | ||
332 | UInt64 dir_len; /* 50 */ | ||
333 | UInt64 data_offset; /* 58 (Not present before V3) */ | ||
334 | }; /* __attribute__ ((aligned (1))); */ | ||
335 | |||
336 | static int _unmarshal_itsf_header(unsigned char **pData, | ||
337 | unsigned long *pDataLen, | ||
338 | struct chmItsfHeader *dest) | ||
339 | { | ||
340 | /* we only know how to deal with the 0x58 and 0x60 byte structures */ | ||
341 | if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN) | ||
342 | return 0; | ||
343 | |||
344 | /* unmarshal common fields */ | ||
345 | _unmarshal_char_array(pData, pDataLen, dest->signature, 4); | ||
346 | _unmarshal_int32 (pData, pDataLen, &dest->version); | ||
347 | _unmarshal_int32 (pData, pDataLen, &dest->header_len); | ||
348 | _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); | ||
349 | _unmarshal_uint32 (pData, pDataLen, &dest->last_modified); | ||
350 | _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); | ||
351 | _unmarshal_uuid (pData, pDataLen, dest->dir_uuid); | ||
352 | _unmarshal_uuid (pData, pDataLen, dest->stream_uuid); | ||
353 | _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset); | ||
354 | _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len); | ||
355 | _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset); | ||
356 | _unmarshal_uint64 (pData, pDataLen, &dest->dir_len); | ||
357 | |||
358 | /* error check the data */ | ||
359 | /* XXX: should also check UUIDs, probably, though with a version 3 file, | ||
360 | * current MS tools do not seem to use them. | ||
361 | */ | ||
362 | if (memcmp(dest->signature, "ITSF", 4) != 0) | ||
363 | return 0; | ||
364 | if (dest->version == 2) | ||
365 | { | ||
366 | if (dest->header_len < _CHM_ITSF_V2_LEN) | ||
367 | return 0; | ||
368 | } | ||
369 | else if (dest->version == 3) | ||
370 | { | ||
371 | if (dest->header_len < _CHM_ITSF_V3_LEN) | ||
372 | return 0; | ||
373 | } | ||
374 | else | ||
375 | return 0; | ||
376 | |||
377 | /* now, if we have a V3 structure, unmarshal the rest. | ||
378 | * otherwise, compute it | ||
379 | */ | ||
380 | if (dest->version == 3) | ||
381 | { | ||
382 | if (*pDataLen != 0) | ||
383 | _unmarshal_uint64(pData, pDataLen, &dest->data_offset); | ||
384 | else | ||
385 | return 0; | ||
386 | } | ||
387 | else | ||
388 | dest->data_offset = dest->dir_offset + dest->dir_len; | ||
389 | |||
390 | return 1; | ||
391 | } | ||
392 | |||
393 | /* structure of ITSP headers */ | ||
394 | #define _CHM_ITSP_V1_LEN (0x54) | ||
395 | struct chmItspHeader | ||
396 | { | ||
397 | char signature[4]; /* 0 (ITSP) */ | ||
398 | Int32 version; /* 4 */ | ||
399 | Int32 header_len; /* 8 */ | ||
400 | Int32 unknown_000c; /* c */ | ||
401 | UInt32 block_len; /* 10 */ | ||
402 | Int32 blockidx_intvl; /* 14 */ | ||
403 | Int32 index_depth; /* 18 */ | ||
404 | Int32 index_root; /* 1c */ | ||
405 | Int32 index_head; /* 20 */ | ||
406 | Int32 unknown_0024; /* 24 */ | ||
407 | UInt32 num_blocks; /* 28 */ | ||
408 | Int32 unknown_002c; /* 2c */ | ||
409 | UInt32 lang_id; /* 30 */ | ||
410 | UChar system_uuid[16]; /* 34 */ | ||
411 | UChar unknown_0044[16]; /* 44 */ | ||
412 | }; /* __attribute__ ((aligned (1))); */ | ||
413 | |||
414 | static int _unmarshal_itsp_header(unsigned char **pData, | ||
415 | unsigned long *pDataLen, | ||
416 | struct chmItspHeader *dest) | ||
417 | { | ||
418 | /* we only know how to deal with a 0x54 byte structures */ | ||
419 | if (*pDataLen != _CHM_ITSP_V1_LEN) | ||
420 | return 0; | ||
421 | |||
422 | /* unmarshal fields */ | ||
423 | _unmarshal_char_array(pData, pDataLen, dest->signature, 4); | ||
424 | _unmarshal_int32 (pData, pDataLen, &dest->version); | ||
425 | _unmarshal_int32 (pData, pDataLen, &dest->header_len); | ||
426 | _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); | ||
427 | _unmarshal_uint32 (pData, pDataLen, &dest->block_len); | ||
428 | _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl); | ||
429 | _unmarshal_int32 (pData, pDataLen, &dest->index_depth); | ||
430 | _unmarshal_int32 (pData, pDataLen, &dest->index_root); | ||
431 | _unmarshal_int32 (pData, pDataLen, &dest->index_head); | ||
432 | _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024); | ||
433 | _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks); | ||
434 | _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c); | ||
435 | _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); | ||
436 | _unmarshal_uuid (pData, pDataLen, dest->system_uuid); | ||
437 | _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16); | ||
438 | |||
439 | /* error check the data */ | ||
440 | if (memcmp(dest->signature, "ITSP", 4) != 0) | ||
441 | return 0; | ||
442 | if (dest->version != 1) | ||
443 | return 0; | ||
444 | if (dest->header_len != _CHM_ITSP_V1_LEN) | ||
445 | return 0; | ||
446 | |||
447 | return 1; | ||
448 | } | ||
449 | |||
450 | /* structure of PMGL headers */ | ||
451 | static const char _chm_pmgl_marker[4] = "PMGL"; | ||
452 | #define _CHM_PMGL_LEN (0x14) | ||
453 | struct chmPmglHeader | ||
454 | { | ||
455 | char signature[4]; /* 0 (PMGL) */ | ||
456 | UInt32 free_space; /* 4 */ | ||
457 | UInt32 unknown_0008; /* 8 */ | ||
458 | Int32 block_prev; /* c */ | ||
459 | Int32 block_next; /* 10 */ | ||
460 | }; /* __attribute__ ((aligned (1))); */ | ||
461 | |||
462 | static int _unmarshal_pmgl_header(unsigned char **pData, | ||
463 | unsigned long *pDataLen, | ||
464 | struct chmPmglHeader *dest) | ||
465 | { | ||
466 | /* we only know how to deal with a 0x14 byte structures */ | ||
467 | if (*pDataLen != _CHM_PMGL_LEN) | ||
468 | return 0; | ||
469 | |||
470 | /* unmarshal fields */ | ||
471 | _unmarshal_char_array(pData, pDataLen, dest->signature, 4); | ||
472 | _unmarshal_uint32 (pData, pDataLen, &dest->free_space); | ||
473 | _unmarshal_uint32 (pData, pDataLen, &dest->unknown_0008); | ||
474 | _unmarshal_int32 (pData, pDataLen, &dest->block_prev); | ||
475 | _unmarshal_int32 (pData, pDataLen, &dest->block_next); | ||
476 | |||
477 | /* check structure */ | ||
478 | if (memcmp(dest->signature, _chm_pmgl_marker, 4) != 0) | ||
479 | return 0; | ||
480 | |||
481 | return 1; | ||
482 | } | ||
483 | |||
484 | /* structure of PMGI headers */ | ||
485 | static const char _chm_pmgi_marker[4] = "PMGI"; | ||
486 | #define _CHM_PMGI_LEN (0x08) | ||
487 | struct chmPmgiHeader | ||
488 | { | ||
489 | char signature[4]; /* 0 (PMGI) */ | ||
490 | UInt32 free_space; /* 4 */ | ||
491 | }; /* __attribute__ ((aligned (1))); */ | ||
492 | |||
493 | static int _unmarshal_pmgi_header(unsigned char **pData, | ||
494 | unsigned long *pDataLen, | ||
495 | struct chmPmgiHeader *dest) | ||
496 | { | ||
497 | /* we only know how to deal with a 0x8 byte structures */ | ||
498 | if (*pDataLen != _CHM_PMGI_LEN) | ||
499 | return 0; | ||
500 | |||
501 | /* unmarshal fields */ | ||
502 | _unmarshal_char_array(pData, pDataLen, dest->signature, 4); | ||
503 | _unmarshal_uint32 (pData, pDataLen, &dest->free_space); | ||
504 | |||
505 | /* check structure */ | ||
506 | if (memcmp(dest->signature, _chm_pmgi_marker, 4) != 0) | ||
507 | return 0; | ||
508 | |||
509 | return 1; | ||
510 | } | ||
511 | |||
512 | /* structure of LZXC reset table */ | ||
513 | #define _CHM_LZXC_RESETTABLE_V1_LEN (0x28) | ||
514 | struct chmLzxcResetTable | ||
515 | { | ||
516 | UInt32 version; | ||
517 | UInt32 block_count; | ||
518 | UInt32 unknown; | ||
519 | UInt32 table_offset; | ||
520 | UInt64 uncompressed_len; | ||
521 | UInt64 compressed_len; | ||
522 | UInt64 block_len; | ||
523 | }; /* __attribute__ ((aligned (1))); */ | ||
524 | |||
525 | static int _unmarshal_lzxc_reset_table(unsigned char **pData, | ||
526 | unsigned long *pDataLen, | ||
527 | struct chmLzxcResetTable *dest) | ||
528 | { | ||
529 | /* we only know how to deal with a 0x28 byte structures */ | ||
530 | if (*pDataLen != _CHM_LZXC_RESETTABLE_V1_LEN) | ||
531 | return 0; | ||
532 | |||
533 | /* unmarshal fields */ | ||
534 | _unmarshal_uint32 (pData, pDataLen, &dest->version); | ||
535 | _unmarshal_uint32 (pData, pDataLen, &dest->block_count); | ||
536 | _unmarshal_uint32 (pData, pDataLen, &dest->unknown); | ||
537 | _unmarshal_uint32 (pData, pDataLen, &dest->table_offset); | ||
538 | _unmarshal_uint64 (pData, pDataLen, &dest->uncompressed_len); | ||
539 | _unmarshal_uint64 (pData, pDataLen, &dest->compressed_len); | ||
540 | _unmarshal_uint64 (pData, pDataLen, &dest->block_len); | ||
541 | |||
542 | /* check structure */ | ||
543 | if (dest->version != 2) | ||
544 | return 0; | ||
545 | |||
546 | return 1; | ||
547 | } | ||
548 | |||
549 | /* structure of LZXC control data block */ | ||
550 | #define _CHM_LZXC_MIN_LEN (0x18) | ||
551 | #define _CHM_LZXC_V2_LEN (0x1c) | ||
552 | struct chmLzxcControlData | ||
553 | { | ||
554 | UInt32 size; /* 0 */ | ||
555 | char signature[4]; /* 4 (LZXC) */ | ||
556 | UInt32 version; /* 8 */ | ||
557 | UInt32 resetInterval; /* c */ | ||
558 | UInt32 windowSize; /* 10 */ | ||
559 | UInt32 unknown_14; /* 14 */ | ||
560 | UInt32 unknown_18; /* 18 */ | ||
561 | }; | ||
562 | |||
563 | static int _unmarshal_lzxc_control_data(unsigned char **pData, | ||
564 | unsigned long *pDataLen, | ||
565 | struct chmLzxcControlData *dest) | ||
566 | { | ||
567 | /* we want at least 0x18 bytes */ | ||
568 | if (*pDataLen < _CHM_LZXC_MIN_LEN) | ||
569 | return 0; | ||
570 | |||
571 | /* unmarshal fields */ | ||
572 | _unmarshal_uint32 (pData, pDataLen, &dest->size); | ||
573 | _unmarshal_char_array(pData, pDataLen, dest->signature, 4); | ||
574 | _unmarshal_uint32 (pData, pDataLen, &dest->version); | ||
575 | _unmarshal_uint32 (pData, pDataLen, &dest->resetInterval); | ||
576 | _unmarshal_uint32 (pData, pDataLen, &dest->windowSize); | ||
577 | _unmarshal_uint32 (pData, pDataLen, &dest->unknown_14); | ||
578 | |||
579 | if (*pDataLen >= _CHM_LZXC_V2_LEN) | ||
580 | _unmarshal_uint32 (pData, pDataLen, &dest->unknown_18); | ||
581 | else | ||
582 | dest->unknown_18 = 0; | ||
583 | |||
584 | if (dest->version == 2) | ||
585 | { | ||
586 | dest->resetInterval *= 0x8000; | ||
587 | dest->windowSize *= 0x8000; | ||
588 | dest->unknown_14 *= 0x8000; | ||
589 | } | ||
590 | if (dest->windowSize == 0 || dest->resetInterval == 0) | ||
591 | return 0; | ||
592 | |||
593 | /* for now, only support resetInterval a multiple of windowSize/2 */ | ||
594 | if (dest->windowSize == 1) | ||
595 | return 0; | ||
596 | if ((dest->resetInterval % (dest->windowSize/2)) != 0) | ||
597 | return 0; | ||
598 | |||
599 | /* check structure */ | ||
600 | if (memcmp(dest->signature, "LZXC", 4) != 0) | ||
601 | return 0; | ||
602 | |||
603 | return 1; | ||
604 | } | ||
605 | |||
606 | /* the structure used for chm file handles */ | ||
607 | struct chmFile | ||
608 | { | ||
609 | #ifdef WIN32 | ||
610 | HANDLE fd; | ||
611 | #else | ||
612 | int fd; | ||
613 | #endif | ||
614 | |||
615 | #ifdef CHM_MT | ||
616 | #ifdef WIN32 | ||
617 | CRITICAL_SECTION mutex; | ||
618 | CRITICAL_SECTION lzx_mutex; | ||
619 | CRITICAL_SECTION cache_mutex; | ||
620 | #else | ||
621 | pthread_mutex_t mutex; | ||
622 | pthread_mutex_t lzx_mutex; | ||
623 | pthread_mutex_t cache_mutex; | ||
624 | #endif | ||
625 | #endif | ||
626 | |||
627 | UInt64 dir_offset; | ||
628 | UInt64 dir_len; | ||
629 | UInt64 data_offset; | ||
630 | Int32 index_root; | ||
631 | Int32 index_head; | ||
632 | UInt32 block_len; | ||
633 | |||
634 | UInt64 span; | ||
635 | struct chmUnitInfo rt_unit; | ||
636 | struct chmUnitInfo cn_unit; | ||
637 | struct chmLzxcResetTable reset_table; | ||
638 | |||
639 | /* LZX control data */ | ||
640 | UInt32 window_size; | ||
641 | UInt32 reset_interval; | ||
642 | UInt32 reset_blkcount; | ||
643 | |||
644 | /* decompressor state */ | ||
645 | struct LZXstate *lzx_state; | ||
646 | int lzx_last_block; | ||
647 | |||
648 | /* cache for decompressed blocks */ | ||
649 | UChar **cache_blocks; | ||
650 | Int64 *cache_block_indices; | ||
651 | Int32 cache_num_blocks; | ||
652 | }; | ||
653 | |||
654 | /* | ||
655 | * utility functions local to this module | ||
656 | */ | ||
657 | |||
658 | /* utility function to handle differences between {pread,read}(64)? */ | ||
659 | static Int64 _chm_fetch_bytes(struct chmFile *h, | ||
660 | UChar *buf, | ||
661 | UInt64 os, | ||
662 | Int64 len) | ||
663 | { | ||
664 | Int64 readLen=0, oldOs=0; | ||
665 | if (h->fd == CHM_NULL_FD) | ||
666 | return readLen; | ||
667 | |||
668 | CHM_ACQUIRE_LOCK(h->mutex); | ||
669 | #ifdef CHM_USE_WIN32IO | ||
670 | /* NOTE: this might be better done with CreateFileMapping, et cetera... */ | ||
671 | { | ||
672 | DWORD origOffsetLo=0, origOffsetHi=0; | ||
673 | DWORD offsetLo, offsetHi; | ||
674 | DWORD actualLen=0; | ||
675 | |||
676 | /* awkward Win32 Seek/Tell */ | ||
677 | offsetLo = (unsigned long)(os & 0xffffffffL); | ||
678 | offsetHi = (unsigned long)((os >> 32) & 0xffffffffL); | ||
679 | origOffsetLo = SetFilePointer(h->fd, 0, &origOffsetHi, FILE_CURRENT); | ||
680 | offsetLo = SetFilePointer(h->fd, offsetLo, &offsetHi, FILE_BEGIN); | ||
681 | |||
682 | /* read the data */ | ||
683 | if (ReadFile(h->fd, | ||
684 | buf, | ||
685 | (DWORD)len, | ||
686 | &actualLen, | ||
687 | NULL) == TRUE) | ||
688 | readLen = actualLen; | ||
689 | else | ||
690 | readLen = 0; | ||
691 | |||
692 | /* restore original position */ | ||
693 | SetFilePointer(h->fd, origOffsetLo, &origOffsetHi, FILE_BEGIN); | ||
694 | } | ||
695 | #else | ||
696 | #ifdef CHM_USE_PREAD | ||
697 | #ifdef CHM_USE_IO64 | ||
698 | readLen = pread64(h->fd, buf, (long)len, os); | ||
699 | #else | ||
700 | readLen = pread(h->fd, buf, (long)len, (unsigned long)os); | ||
701 | #endif | ||
702 | #else | ||
703 | #ifdef CHM_USE_IO64 | ||
704 | oldOs = lseek64(h->fd, 0, SEEK_CUR); | ||
705 | lseek64(h->fd, os, SEEK_SET); | ||
706 | readLen = read(h->fd, buf, len); | ||
707 | lseek64(h->fd, oldOs, SEEK_SET); | ||
708 | #else | ||
709 | oldOs = lseek(h->fd, 0, SEEK_CUR); | ||
710 | lseek(h->fd, (long)os, SEEK_SET); | ||
711 | readLen = read(h->fd, buf, len); | ||
712 | lseek(h->fd, (long)oldOs, SEEK_SET); | ||
713 | #endif | ||
714 | #endif | ||
715 | #endif | ||
716 | CHM_RELEASE_LOCK(h->mutex); | ||
717 | return readLen; | ||
718 | } | ||
719 | |||
720 | /* open an ITS archive */ | ||
721 | struct chmFile *chm_open(const char *filename) | ||
722 | { | ||
723 | unsigned char sbuffer[256]; | ||
724 | unsigned long sremain; | ||
725 | unsigned char *sbufpos; | ||
726 | struct chmFile *newHandle=NULL; | ||
727 | struct chmItsfHeader itsfHeader; | ||
728 | struct chmItspHeader itspHeader; | ||
729 | struct chmUnitInfo uiSpan; | ||
730 | struct chmUnitInfo uiLzxc; | ||
731 | struct chmLzxcControlData ctlData; | ||
732 | |||
733 | /* allocate handle */ | ||
734 | newHandle = (struct chmFile *)malloc(sizeof(struct chmFile)); | ||
735 | newHandle->fd = CHM_NULL_FD; | ||
736 | newHandle->lzx_state = NULL; | ||
737 | newHandle->cache_blocks = NULL; | ||
738 | newHandle->cache_block_indices = NULL; | ||
739 | newHandle->cache_num_blocks = 0; | ||
740 | |||
741 | /* open file */ | ||
742 | #ifdef WIN32 | ||
743 | if ((newHandle->fd=CreateFileA(filename, | ||
744 | GENERIC_READ, | ||
745 | 0, | ||
746 | NULL, | ||
747 | OPEN_EXISTING, | ||
748 | FILE_ATTRIBUTE_NORMAL, | ||
749 | NULL)) == CHM_NULL_FD) | ||
750 | { | ||
751 | free(newHandle); | ||
752 | return NULL; | ||
753 | } | ||
754 | #else | ||
755 | if ((newHandle->fd=open(filename, O_RDONLY)) == CHM_NULL_FD) | ||
756 | { | ||
757 | free(newHandle); | ||
758 | return NULL; | ||
759 | } | ||
760 | #endif | ||
761 | |||
762 | /* initialize mutexes, if needed */ | ||
763 | #ifdef CHM_MT | ||
764 | #ifdef WIN32 | ||
765 | InitializeCriticalSection(&newHandle->mutex); | ||
766 | InitializeCriticalSection(&newHandle->lzx_mutex); | ||
767 | InitializeCriticalSection(&newHandle->cache_mutex); | ||
768 | #else | ||
769 | pthread_mutex_init(&newHandle->mutex, NULL); | ||
770 | pthread_mutex_init(&newHandle->lzx_mutex, NULL); | ||
771 | pthread_mutex_init(&newHandle->cache_mutex, NULL); | ||
772 | #endif | ||
773 | #endif | ||
774 | |||
775 | /* read and verify header */ | ||
776 | sremain = _CHM_ITSF_V3_LEN; | ||
777 | sbufpos = sbuffer; | ||
778 | if (_chm_fetch_bytes(newHandle, sbuffer, (UInt64)0, sremain) != sremain || | ||
779 | !_unmarshal_itsf_header(&sbufpos, &sremain, &itsfHeader)) | ||
780 | { | ||
781 | chm_close(newHandle); | ||
782 | return NULL; | ||
783 | } | ||
784 | |||
785 | /* stash important values from header */ | ||
786 | newHandle->dir_offset = itsfHeader.dir_offset; | ||
787 | newHandle->dir_len = itsfHeader.dir_len; | ||
788 | newHandle->data_offset = itsfHeader.data_offset; | ||
789 | |||
790 | /* now, read and verify the directory header chunk */ | ||
791 | sremain = _CHM_ITSP_V1_LEN; | ||
792 | sbufpos = sbuffer; | ||
793 | if (_chm_fetch_bytes(newHandle, sbuffer, | ||
794 | (UInt64)itsfHeader.dir_offset, sremain) != sremain || | ||
795 | !_unmarshal_itsp_header(&sbufpos, &sremain, &itspHeader)) | ||
796 | { | ||
797 | chm_close(newHandle); | ||
798 | return NULL; | ||
799 | } | ||
800 | |||
801 | /* grab essential information from ITSP header */ | ||
802 | newHandle->dir_offset += itspHeader.header_len; | ||
803 | newHandle->dir_len -= itspHeader.header_len; | ||
804 | newHandle->index_root = itspHeader.index_root; | ||
805 | newHandle->index_head = itspHeader.index_head; | ||
806 | newHandle->block_len = itspHeader.block_len; | ||
807 | |||
808 | /* if the index root is -1, this means we don't have any PMGI blocks. | ||
809 | * as a result, we must use the sole PMGL block as the index root | ||
810 | */ | ||
811 | if (newHandle->index_root == -1) | ||
812 | newHandle->index_root = newHandle->index_head; | ||
813 | |||
814 | /* prefetch most commonly needed unit infos */ | ||
815 | if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, | ||
816 | _CHMU_SPANINFO, | ||
817 | &uiSpan) || | ||
818 | uiSpan.space == CHM_COMPRESSED || | ||
819 | CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, | ||
820 | _CHMU_RESET_TABLE, | ||
821 | &newHandle->rt_unit) || | ||
822 | newHandle->rt_unit.space == CHM_COMPRESSED || | ||
823 | CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, | ||
824 | _CHMU_CONTENT, | ||
825 | &newHandle->cn_unit) || | ||
826 | newHandle->cn_unit.space == CHM_COMPRESSED || | ||
827 | CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, | ||
828 | _CHMU_LZXC_CONTROLDATA, | ||
829 | &uiLzxc) || | ||
830 | uiLzxc.space == CHM_COMPRESSED) | ||
831 | { | ||
832 | chm_close(newHandle); | ||
833 | return NULL; | ||
834 | } | ||
835 | |||
836 | /* try to read span */ | ||
837 | /* N.B.: we've already checked that uiSpan is in the uncompressed section, | ||
838 | * so this should not require attempting to decompress, which may | ||
839 | * rely on having a valid "span" | ||
840 | */ | ||
841 | sremain = 8; | ||
842 | sbufpos = sbuffer; | ||
843 | if (chm_retrieve_object(newHandle, &uiSpan, sbuffer, | ||
844 | 0, sremain) != sremain || | ||
845 | !_unmarshal_uint64(&sbufpos, &sremain, &newHandle->span)) | ||
846 | { | ||
847 | chm_close(newHandle); | ||
848 | return NULL; | ||
849 | } | ||
850 | |||
851 | /* read reset table info */ | ||
852 | sremain = _CHM_LZXC_RESETTABLE_V1_LEN; | ||
853 | sbufpos = sbuffer; | ||
854 | if (chm_retrieve_object(newHandle, &newHandle->rt_unit, sbuffer, | ||
855 | 0, sremain) != sremain || | ||
856 | !_unmarshal_lzxc_reset_table(&sbufpos, &sremain, | ||
857 | &newHandle->reset_table)) | ||
858 | { | ||
859 | chm_close(newHandle); | ||
860 | return NULL; | ||
861 | } | ||
862 | |||
863 | /* read control data */ | ||
864 | sremain = (unsigned long)uiLzxc.length; | ||
865 | sbufpos = sbuffer; | ||
866 | if (chm_retrieve_object(newHandle, &uiLzxc, sbuffer, | ||
867 | 0, sremain) != sremain || | ||
868 | !_unmarshal_lzxc_control_data(&sbufpos, &sremain, | ||
869 | &ctlData)) | ||
870 | { | ||
871 | chm_close(newHandle); | ||
872 | return NULL; | ||
873 | } | ||
874 | newHandle->window_size = ctlData.windowSize; | ||
875 | newHandle->reset_interval = ctlData.resetInterval; | ||
876 | newHandle->reset_blkcount = newHandle->reset_interval / | ||
877 | (newHandle->window_size / 2); | ||
878 | |||
879 | /* initialize cache */ | ||
880 | chm_set_param(newHandle, CHM_PARAM_MAX_BLOCKS_CACHED, | ||
881 | CHM_MAX_BLOCKS_CACHED); | ||
882 | |||
883 | return newHandle; | ||
884 | } | ||
885 | |||
886 | /* close an ITS archive */ | ||
887 | void chm_close(struct chmFile *h) | ||
888 | { | ||
889 | if (h != NULL) | ||
890 | { | ||
891 | if (h->fd != CHM_NULL_FD) | ||
892 | CHM_CLOSE_FILE(h->fd); | ||
893 | h->fd = CHM_NULL_FD; | ||
894 | |||
895 | #ifdef CHM_MT | ||
896 | #ifdef WIN32 | ||
897 | DeleteCriticalSection(&h->mutex); | ||
898 | DeleteCriticalSection(&h->lzx_mutex); | ||
899 | DeleteCriticalSection(&h->cache_mutex); | ||
900 | #else | ||
901 | pthread_mutex_destroy(&h->mutex); | ||
902 | pthread_mutex_destroy(&h->lzx_mutex); | ||
903 | pthread_mutex_destroy(&h->cache_mutex); | ||
904 | #endif | ||
905 | #endif | ||
906 | |||
907 | if (h->lzx_state) | ||
908 | LZXteardown(h->lzx_state); | ||
909 | h->lzx_state = NULL; | ||
910 | |||
911 | if (h->cache_blocks) | ||
912 | { | ||
913 | int i; | ||
914 | for (i=0; i<h->cache_num_blocks; i++) | ||
915 | { | ||
916 | if (h->cache_blocks[i]) | ||
917 | free(h->cache_blocks[i]); | ||
918 | } | ||
919 | free(h->cache_blocks); | ||
920 | h->cache_blocks = NULL; | ||
921 | } | ||
922 | |||
923 | if (h->cache_block_indices) | ||
924 | free(h->cache_block_indices); | ||
925 | h->cache_block_indices = NULL; | ||
926 | |||
927 | free(h); | ||
928 | } | ||
929 | } | ||
930 | |||
931 | /* | ||
932 | * set a parameter on the file handle. | ||
933 | * valid parameter types: | ||
934 | * CHM_PARAM_MAX_BLOCKS_CACHED: | ||
935 | * how many decompressed blocks should be cached? A simple | ||
936 | * caching scheme is used, wherein the index of the block is | ||
937 | * used as a hash value, and hash collision results in the | ||
938 | * invalidation of the previously cached block. | ||
939 | */ | ||
940 | void chm_set_param(struct chmFile *h, | ||
941 | int paramType, | ||
942 | int paramVal) | ||
943 | { | ||
944 | switch (paramType) | ||
945 | { | ||
946 | case CHM_PARAM_MAX_BLOCKS_CACHED: | ||
947 | CHM_ACQUIRE_LOCK(h->cache_mutex); | ||
948 | if (paramVal != h->cache_num_blocks) | ||
949 | { | ||
950 | UChar **newBlocks; | ||
951 | UInt64 *newIndices; | ||
952 | int i; | ||
953 | |||
954 | /* allocate new cached blocks */ | ||
955 | newBlocks = (UChar **)malloc(paramVal * sizeof (UChar *)); | ||
956 | newIndices = (UInt64 *)malloc(paramVal * sizeof (UInt64)); | ||
957 | for (i=0; i<paramVal; i++) | ||
958 | { | ||
959 | newBlocks[i] = NULL; | ||
960 | newIndices[i] = 0; | ||
961 | } | ||
962 | |||
963 | /* re-distribute old cached blocks */ | ||
964 | if (h->cache_blocks) | ||
965 | { | ||
966 | for (i=0; i<h->cache_num_blocks; i++) | ||
967 | { | ||
968 | int newSlot = (int)(h->cache_block_indices[i] % paramVal); | ||
969 | |||
970 | if (h->cache_blocks[i]) | ||
971 | { | ||
972 | /* in case of collision, destroy newcomer */ | ||
973 | if (newBlocks[newSlot]) | ||
974 | { | ||
975 | free(h->cache_blocks[i]); | ||
976 | h->cache_blocks[i] = NULL; | ||
977 | } | ||
978 | else | ||
979 | { | ||
980 | newBlocks[newSlot] = h->cache_blocks[i]; | ||
981 | newIndices[newSlot] = | ||
982 | h->cache_block_indices[i]; | ||
983 | } | ||
984 | } | ||
985 | } | ||
986 | |||
987 | free(h->cache_blocks); | ||
988 | free(h->cache_block_indices); | ||
989 | } | ||
990 | |||
991 | /* now, set new values */ | ||
992 | h->cache_blocks = newBlocks; | ||
993 | h->cache_block_indices = newIndices; | ||
994 | h->cache_num_blocks = paramVal; | ||
995 | } | ||
996 | CHM_RELEASE_LOCK(h->cache_mutex); | ||
997 | break; | ||
998 | |||
999 | default: | ||
1000 | break; | ||
1001 | } | ||
1002 | } | ||
1003 | |||
1004 | /* | ||
1005 | * helper methods for chm_resolve_object | ||
1006 | */ | ||
1007 | |||
1008 | /* skip a compressed dword */ | ||
1009 | static void _chm_skip_cword(UChar **pEntry) | ||
1010 | { | ||
1011 | while (*(*pEntry)++ >= 0x80) | ||
1012 | ; | ||
1013 | } | ||
1014 | |||
1015 | /* skip the data from a PMGL entry */ | ||
1016 | static void _chm_skip_PMGL_entry_data(UChar **pEntry) | ||
1017 | { | ||
1018 | _chm_skip_cword(pEntry); | ||
1019 | _chm_skip_cword(pEntry); | ||
1020 | _chm_skip_cword(pEntry); | ||
1021 | } | ||
1022 | |||
1023 | /* parse a compressed dword */ | ||
1024 | static UInt64 _chm_parse_cword(UChar **pEntry) | ||
1025 | { | ||
1026 | UInt64 accum = 0; | ||
1027 | UChar temp; | ||
1028 | while ((temp=*(*pEntry)++) >= 0x80) | ||
1029 | { | ||
1030 | accum <<= 7; | ||
1031 | accum += temp & 0x7f; | ||
1032 | } | ||
1033 | |||
1034 | return (accum << 7) + temp; | ||
1035 | } | ||
1036 | |||
1037 | /* parse a utf-8 string into an ASCII char buffer */ | ||
1038 | static int _chm_parse_UTF8(UChar **pEntry, UInt64 count, char *path) | ||
1039 | { | ||
1040 | /* XXX: implement UTF-8 support, including a real mapping onto | ||
1041 | * ISO-8859-1? probably there is a library to do this? As is | ||
1042 | * immediately apparent from the below code, I'm only handling files | ||
1043 | * in which none of the strings contain UTF-8 multi-byte characters. | ||
1044 | */ | ||
1045 | while (count != 0) | ||
1046 | { | ||
1047 | if (*(*pEntry) > 0x7f) | ||
1048 | return 0; | ||
1049 | |||
1050 | *path++ = (char)(*(*pEntry)++); | ||
1051 | --count; | ||
1052 | } | ||
1053 | |||
1054 | *path = '\0'; | ||
1055 | return 1; | ||
1056 | } | ||
1057 | |||
1058 | /* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */ | ||
1059 | static int _chm_parse_PMGL_entry(UChar **pEntry, struct chmUnitInfo *ui) | ||
1060 | { | ||
1061 | UInt64 strLen; | ||
1062 | |||
1063 | /* parse str len */ | ||
1064 | strLen = _chm_parse_cword(pEntry); | ||
1065 | if (strLen > CHM_MAX_PATHLEN) | ||
1066 | return 0; | ||
1067 | |||
1068 | /* parse path */ | ||
1069 | if (! _chm_parse_UTF8(pEntry, strLen, ui->path)) | ||
1070 | return 0; | ||
1071 | |||
1072 | /* parse info */ | ||
1073 | ui->space = (int)_chm_parse_cword(pEntry); | ||
1074 | ui->start = _chm_parse_cword(pEntry); | ||
1075 | ui->length = _chm_parse_cword(pEntry); | ||
1076 | return 1; | ||
1077 | } | ||
1078 | |||
1079 | /* find an exact entry in PMGL; return NULL if we fail */ | ||
1080 | static UChar *_chm_find_in_PMGL(UChar *page_buf, | ||
1081 | UInt32 block_len, | ||
1082 | const char *objPath) | ||
1083 | { | ||
1084 | /* XXX: modify this to do a binary search using the nice index structure | ||
1085 | * that is provided for us. | ||
1086 | */ | ||
1087 | struct chmPmglHeader header; | ||
1088 | UInt32 hremain; | ||
1089 | UChar *end; | ||
1090 | UChar *cur; | ||
1091 | UChar *temp; | ||
1092 | UInt64 strLen; | ||
1093 | char buffer[CHM_MAX_PATHLEN+1]; | ||
1094 | |||
1095 | /* figure out where to start and end */ | ||
1096 | cur = page_buf; | ||
1097 | hremain = _CHM_PMGL_LEN; | ||
1098 | if (! _unmarshal_pmgl_header(&cur, &hremain, &header)) | ||
1099 | return NULL; | ||
1100 | end = page_buf + block_len - (header.free_space); | ||
1101 | |||
1102 | /* now, scan progressively */ | ||
1103 | while (cur < end) | ||
1104 | { | ||
1105 | /* grab the name */ | ||
1106 | temp = cur; | ||
1107 | strLen = _chm_parse_cword(&cur); | ||
1108 | if (! _chm_parse_UTF8(&cur, strLen, buffer)) | ||
1109 | return NULL; | ||
1110 | |||
1111 | /* check if it is the right name */ | ||
1112 | #ifdef WIN32 | ||
1113 | if (! stricmp(buffer, objPath)) | ||
1114 | return temp; | ||
1115 | #else | ||
1116 | if (! strcasecmp(buffer, objPath)) | ||
1117 | return temp; | ||
1118 | #endif | ||
1119 | |||
1120 | _chm_skip_PMGL_entry_data(&cur); | ||
1121 | } | ||
1122 | |||
1123 | return NULL; | ||
1124 | } | ||
1125 | |||
1126 | /* find which block should be searched next for the entry; -1 if no block */ | ||
1127 | static Int32 _chm_find_in_PMGI(UChar *page_buf, | ||
1128 | UInt32 block_len, | ||
1129 | const char *objPath) | ||
1130 | { | ||
1131 | /* XXX: modify this to do a binary search using the nice index structure | ||
1132 | * that is provided for us | ||
1133 | */ | ||
1134 | struct chmPmgiHeader header; | ||
1135 | UInt32 hremain; | ||
1136 | int page=-1; | ||
1137 | UChar *end; | ||
1138 | UChar *cur; | ||
1139 | UInt64 strLen; | ||
1140 | char buffer[CHM_MAX_PATHLEN+1]; | ||
1141 | |||
1142 | /* figure out where to start and end */ | ||
1143 | cur = page_buf; | ||
1144 | hremain = _CHM_PMGI_LEN; | ||
1145 | if (! _unmarshal_pmgi_header(&cur, &hremain, &header)) | ||
1146 | return -1; | ||
1147 | end = page_buf + block_len - (header.free_space); | ||
1148 | |||
1149 | /* now, scan progressively */ | ||
1150 | while (cur < end) | ||
1151 | { | ||
1152 | /* grab the name */ | ||
1153 | strLen = _chm_parse_cword(&cur); | ||
1154 | if (! _chm_parse_UTF8(&cur, strLen, buffer)) | ||
1155 | return -1; | ||
1156 | |||
1157 | /* check if it is the right name */ | ||
1158 | #ifdef WIN32 | ||
1159 | if (stricmp(buffer, objPath) > 0) | ||
1160 | return page; | ||
1161 | #else | ||
1162 | if (strcasecmp(buffer, objPath) > 0) | ||
1163 | return page; | ||
1164 | #endif | ||
1165 | |||
1166 | /* load next value for path */ | ||
1167 | page = (int)_chm_parse_cword(&cur); | ||
1168 | } | ||
1169 | |||
1170 | return page; | ||
1171 | } | ||
1172 | |||
1173 | /* resolve a particular object from the archive */ | ||
1174 | int chm_resolve_object(struct chmFile *h, | ||
1175 | const char *objPath, | ||
1176 | struct chmUnitInfo *ui) | ||
1177 | { | ||
1178 | /* | ||
1179 | * XXX: implement caching scheme for dir pages | ||
1180 | */ | ||
1181 | |||
1182 | Int32 curPage; | ||
1183 | |||
1184 | /* buffer to hold whatever page we're looking at */ | ||
1185 | #ifdef WIN32 | ||
1186 | UChar *page_buf = alloca(h->block_len); | ||
1187 | #else | ||
1188 | UChar page_buf[h->block_len]; | ||
1189 | #endif | ||
1190 | |||
1191 | /* starting page */ | ||
1192 | curPage = h->index_root; | ||
1193 | |||
1194 | /* until we have either returned or given up */ | ||
1195 | while (curPage != -1) | ||
1196 | { | ||
1197 | |||
1198 | /* try to fetch the index page */ | ||
1199 | if (_chm_fetch_bytes(h, page_buf, | ||
1200 | (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, | ||
1201 | h->block_len) != h->block_len) | ||
1202 | return CHM_RESOLVE_FAILURE; | ||
1203 | |||
1204 | /* now, if it is a leaf node: */ | ||
1205 | if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0) | ||
1206 | { | ||
1207 | /* scan block */ | ||
1208 | UChar *pEntry = _chm_find_in_PMGL(page_buf, | ||
1209 | h->block_len, | ||
1210 | objPath); | ||
1211 | if (pEntry == NULL) | ||
1212 | return CHM_RESOLVE_FAILURE; | ||
1213 | |||
1214 | /* parse entry and return */ | ||
1215 | _chm_parse_PMGL_entry(&pEntry, ui); | ||
1216 | return CHM_RESOLVE_SUCCESS; | ||
1217 | } | ||
1218 | |||
1219 | /* else, if it is a branch node: */ | ||
1220 | else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0) | ||
1221 | curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath); | ||
1222 | |||
1223 | /* else, we are confused. give up. */ | ||
1224 | else | ||
1225 | return CHM_RESOLVE_FAILURE; | ||
1226 | } | ||
1227 | |||
1228 | /* didn't find anything. fail. */ | ||
1229 | return CHM_RESOLVE_FAILURE; | ||
1230 | } | ||
1231 | |||
1232 | /* | ||
1233 | * utility methods for dealing with compressed data | ||
1234 | */ | ||
1235 | |||
1236 | /* get the bounds of a compressed block. return 0 on failure */ | ||
1237 | static int _chm_get_cmpblock_bounds(struct chmFile *h, | ||
1238 | UInt64 block, | ||
1239 | UInt64 *start, | ||
1240 | Int64 *len) | ||
1241 | { | ||
1242 | UChar buffer[8], *dummy; | ||
1243 | UInt32 remain; | ||
1244 | |||
1245 | /* for all but the last block, use the reset table */ | ||
1246 | if (block < h->reset_table.block_count-1) | ||
1247 | { | ||
1248 | /* unpack the start address */ | ||
1249 | dummy = buffer; | ||
1250 | remain = 8; | ||
1251 | if (_chm_fetch_bytes(h, buffer, | ||
1252 | (UInt64)h->data_offset | ||
1253 | + (UInt64)h->rt_unit.start | ||
1254 | + (UInt64)h->reset_table.table_offset | ||
1255 | + (UInt64)block*8, | ||
1256 | remain) != remain || | ||
1257 | !_unmarshal_uint64(&dummy, &remain, start)) | ||
1258 | return 0; | ||
1259 | |||
1260 | /* unpack the end address */ | ||
1261 | dummy = buffer; | ||
1262 | remain = 8; | ||
1263 | if (_chm_fetch_bytes(h, buffer, | ||
1264 | (UInt64)h->data_offset | ||
1265 | + (UInt64)h->rt_unit.start | ||
1266 | + (UInt64)h->reset_table.table_offset | ||
1267 | + (UInt64)block*8 + 8, | ||
1268 | remain) != remain || | ||
1269 | !_unmarshal_int64(&dummy, &remain, len)) | ||
1270 | return 0; | ||
1271 | } | ||
1272 | |||
1273 | /* for the last block, use the span in addition to the reset table */ | ||
1274 | else | ||
1275 | { | ||
1276 | /* unpack the start address */ | ||
1277 | dummy = buffer; | ||
1278 | remain = 8; | ||
1279 | if (_chm_fetch_bytes(h, buffer, | ||
1280 | (UInt64)h->data_offset | ||
1281 | + (UInt64)h->rt_unit.start | ||
1282 | + (UInt64)h->reset_table.table_offset | ||
1283 | + (UInt64)block*8, | ||
1284 | remain) != remain || | ||
1285 | !_unmarshal_uint64(&dummy, &remain, start)) | ||
1286 | return 0; | ||
1287 | |||
1288 | *len = h->reset_table.compressed_len; | ||
1289 | } | ||
1290 | |||
1291 | /* compute the length and absolute start address */ | ||
1292 | *len -= *start; | ||
1293 | *start += h->data_offset + h->cn_unit.start; | ||
1294 | |||
1295 | return 1; | ||
1296 | } | ||
1297 | |||
1298 | /* decompress the block. must have lzx_mutex. */ | ||
1299 | static Int64 _chm_decompress_block(struct chmFile *h, | ||
1300 | UInt64 block, | ||
1301 | UChar **ubuffer) | ||
1302 | { | ||
1303 | #ifdef WIN32 | ||
1304 | UChar *cbuffer = alloca(((unsigned int)h->reset_table.block_len + 6144)); | ||
1305 | #else | ||
1306 | UChar cbuffer[h->reset_table.block_len + 6144]; /* compressed buffer */ | ||
1307 | #endif | ||
1308 | UInt64 cmpStart; /* compressed start */ | ||
1309 | Int64 cmpLen; /* compressed len */ | ||
1310 | int indexSlot; /* cache index slot */ | ||
1311 | UChar *lbuffer; /* local buffer ptr */ | ||
1312 | UInt32 blockAlign = (UInt32)(block % h->reset_blkcount); /* reset intvl. aln. */ | ||
1313 | UInt32 i; /* local loop index */ | ||
1314 | |||
1315 | /* check if we need previous blocks */ | ||
1316 | if (blockAlign != 0) | ||
1317 | { | ||
1318 | /* fetch all required previous blocks since last reset */ | ||
1319 | for (i = h->reset_blkcount - blockAlign; i > 0; i--) | ||
1320 | { | ||
1321 | |||
1322 | /* check if we most recently decompressed the previous block */ | ||
1323 | if (h->lzx_last_block != block-i) | ||
1324 | { | ||
1325 | indexSlot = (int)((block-i) % h->cache_num_blocks); | ||
1326 | h->cache_block_indices[indexSlot] = block-i; | ||
1327 | if (! h->cache_blocks[indexSlot]) | ||
1328 | h->cache_blocks[indexSlot] = (UChar *)malloc( | ||
1329 | (unsigned int)(h->reset_table.block_len)); | ||
1330 | lbuffer = h->cache_blocks[indexSlot]; | ||
1331 | |||
1332 | /* decompress the previous block */ | ||
1333 | LZXreset(h->lzx_state); | ||
1334 | if (!_chm_get_cmpblock_bounds(h, block-i, &cmpStart, &cmpLen) || | ||
1335 | _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen || | ||
1336 | LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen, | ||
1337 | (int)h->reset_table.block_len) != DECR_OK) | ||
1338 | return (Int64)0; | ||
1339 | } | ||
1340 | |||
1341 | h->lzx_last_block = (int)(block - i); | ||
1342 | } | ||
1343 | } | ||
1344 | else | ||
1345 | LZXreset(h->lzx_state); | ||
1346 | |||
1347 | /* allocate slot in cache */ | ||
1348 | indexSlot = (int)(block % h->cache_num_blocks); | ||
1349 | h->cache_block_indices[indexSlot] = block; | ||
1350 | if (! h->cache_blocks[indexSlot]) | ||
1351 | h->cache_blocks[indexSlot] = (UChar *)malloc( | ||
1352 | ((unsigned int)h->reset_table.block_len)); | ||
1353 | lbuffer = h->cache_blocks[indexSlot]; | ||
1354 | *ubuffer = lbuffer; | ||
1355 | |||
1356 | /* decompress the block we actually want */ | ||
1357 | if (! _chm_get_cmpblock_bounds(h, block, &cmpStart, &cmpLen) || | ||
1358 | _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen || | ||
1359 | LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen, | ||
1360 | (int)h->reset_table.block_len) != DECR_OK) | ||
1361 | return (Int64)0; | ||
1362 | h->lzx_last_block = (int)block; | ||
1363 | |||
1364 | /* XXX: modify LZX routines to return the length of the data they | ||
1365 | * decompressed and return that instead, for an extra sanity check. | ||
1366 | */ | ||
1367 | return h->reset_table.block_len; | ||
1368 | } | ||
1369 | |||
1370 | /* grab a region from a compressed block */ | ||
1371 | static Int64 _chm_decompress_region(struct chmFile *h, | ||
1372 | UChar *buf, | ||
1373 | UInt64 start, | ||
1374 | Int64 len) | ||
1375 | { | ||
1376 | UInt64 nBlock, nOffset; | ||
1377 | UInt64 nLen; | ||
1378 | UInt64 gotLen; | ||
1379 | UChar *ubuffer; | ||
1380 | |||
1381 | if (len <= 0) | ||
1382 | return (Int64)0; | ||
1383 | |||
1384 | /* figure out what we need to read */ | ||
1385 | nBlock = start / h->reset_table.block_len; | ||
1386 | nOffset = start % h->reset_table.block_len; | ||
1387 | nLen = len; | ||
1388 | if (nLen > (h->reset_table.block_len - nOffset)) | ||
1389 | nLen = h->reset_table.block_len - nOffset; | ||
1390 | |||
1391 | /* if block is cached, return data from it. */ | ||
1392 | CHM_ACQUIRE_LOCK(h->lzx_mutex); | ||
1393 | CHM_ACQUIRE_LOCK(h->cache_mutex); | ||
1394 | if (h->cache_block_indices[nBlock % h->cache_num_blocks] == nBlock && | ||
1395 | h->cache_blocks[nBlock % h->cache_num_blocks] != NULL) | ||
1396 | { | ||
1397 | memcpy(buf, | ||
1398 | h->cache_blocks[nBlock % h->cache_num_blocks] + nOffset, | ||
1399 | (unsigned int)nLen); | ||
1400 | CHM_RELEASE_LOCK(h->cache_mutex); | ||
1401 | CHM_RELEASE_LOCK(h->lzx_mutex); | ||
1402 | return nLen; | ||
1403 | } | ||
1404 | CHM_RELEASE_LOCK(h->cache_mutex); | ||
1405 | |||
1406 | /* data request not satisfied, so... start up the decompressor machine */ | ||
1407 | if (! h->lzx_state) | ||
1408 | { | ||
1409 | int window_size = ffs(h->window_size) - 1; | ||
1410 | h->lzx_last_block = -1; | ||
1411 | h->lzx_state = LZXinit(window_size); | ||
1412 | } | ||
1413 | |||
1414 | /* decompress some data */ | ||
1415 | gotLen = _chm_decompress_block(h, nBlock, &ubuffer); | ||
1416 | if (gotLen < nLen) | ||
1417 | nLen = gotLen; | ||
1418 | memcpy(buf, ubuffer+nOffset, (unsigned int)nLen); | ||
1419 | CHM_RELEASE_LOCK(h->lzx_mutex); | ||
1420 | return nLen; | ||
1421 | } | ||
1422 | |||
1423 | /* retrieve (part of) an object */ | ||
1424 | LONGINT64 chm_retrieve_object(struct chmFile *h, | ||
1425 | struct chmUnitInfo *ui, | ||
1426 | unsigned char *buf, | ||
1427 | LONGUINT64 addr, | ||
1428 | LONGINT64 len) | ||
1429 | { | ||
1430 | /* must be valid file handle */ | ||
1431 | if (h == NULL) | ||
1432 | return (Int64)0; | ||
1433 | |||
1434 | /* starting address must be in correct range */ | ||
1435 | if (addr < 0 || addr >= ui->length) | ||
1436 | return (Int64)0; | ||
1437 | |||
1438 | /* clip length */ | ||
1439 | if (addr + len > ui->length) | ||
1440 | len = ui->length - addr; | ||
1441 | |||
1442 | /* if the file is uncompressed, it's simple */ | ||
1443 | if (ui->space == CHM_UNCOMPRESSED) | ||
1444 | { | ||
1445 | /* read data */ | ||
1446 | return _chm_fetch_bytes(h, | ||
1447 | buf, | ||
1448 | (UInt64)h->data_offset + (UInt64)ui->start + (UInt64)addr, | ||
1449 | len); | ||
1450 | } | ||
1451 | |||
1452 | /* else if the file is compressed, it's a little trickier */ | ||
1453 | else /* ui->space == CHM_COMPRESSED */ | ||
1454 | { | ||
1455 | Int64 swath=0, total=0; | ||
1456 | do { | ||
1457 | |||
1458 | /* swill another mouthful */ | ||
1459 | swath = _chm_decompress_region(h, buf, ui->start + addr, len); | ||
1460 | |||
1461 | /* if we didn't get any... */ | ||
1462 | if (swath == 0) | ||
1463 | return total; | ||
1464 | |||
1465 | /* update stats */ | ||
1466 | total += swath; | ||
1467 | len -= swath; | ||
1468 | addr += swath; | ||
1469 | buf += swath; | ||
1470 | |||
1471 | } while (len != 0); | ||
1472 | |||
1473 | return total; | ||
1474 | } | ||
1475 | } | ||
1476 | |||
1477 | /* enumerate the objects in the .chm archive */ | ||
1478 | int chm_enumerate(struct chmFile *h, | ||
1479 | int what, | ||
1480 | CHM_ENUMERATOR e, | ||
1481 | void *context) | ||
1482 | { | ||
1483 | Int32 curPage; | ||
1484 | |||
1485 | /* buffer to hold whatever page we're looking at */ | ||
1486 | #ifdef WIN32 | ||
1487 | UChar *page_buf = alloca((unsigned int)h->block_len); | ||
1488 | #else | ||
1489 | UChar page_buf[h->block_len]; | ||
1490 | #endif | ||
1491 | struct chmPmglHeader header; | ||
1492 | UChar *end; | ||
1493 | UChar *cur; | ||
1494 | unsigned long lenRemain; | ||
1495 | |||
1496 | /* the current ui */ | ||
1497 | struct chmUnitInfo ui; | ||
1498 | int flag; | ||
1499 | |||
1500 | /* starting page */ | ||
1501 | curPage = h->index_head; | ||
1502 | |||
1503 | /* until we have either returned or given up */ | ||
1504 | while (curPage != -1) | ||
1505 | { | ||
1506 | |||
1507 | /* try to fetch the index page */ | ||
1508 | if (_chm_fetch_bytes(h, | ||
1509 | page_buf, | ||
1510 | (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, | ||
1511 | h->block_len) != h->block_len) | ||
1512 | return 0; | ||
1513 | |||
1514 | /* figure out start and end for this page */ | ||
1515 | cur = page_buf; | ||
1516 | lenRemain = _CHM_PMGL_LEN; | ||
1517 | if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header)) | ||
1518 | return 0; | ||
1519 | end = page_buf + h->block_len - (header.free_space); | ||
1520 | |||
1521 | /* loop over this page */ | ||
1522 | while (cur < end) | ||
1523 | { | ||
1524 | if (! _chm_parse_PMGL_entry(&cur, &ui)) | ||
1525 | return 0; | ||
1526 | |||
1527 | /* check for DIRS */ | ||
1528 | if (ui.length == 0 && !(what & CHM_ENUMERATE_DIRS)) | ||
1529 | continue; | ||
1530 | |||
1531 | /* check for FILES */ | ||
1532 | if (ui.length != 0 && !(what & CHM_ENUMERATE_FILES)) | ||
1533 | continue; | ||
1534 | |||
1535 | /* check for NORMAL vs. META */ | ||
1536 | if (ui.path[0] == '/') | ||
1537 | { | ||
1538 | |||
1539 | /* check for NORMAL vs. SPECIAL */ | ||
1540 | if (ui.path[1] == '#' || ui.path[1] == '$') | ||
1541 | flag = CHM_ENUMERATE_SPECIAL; | ||
1542 | else | ||
1543 | flag = CHM_ENUMERATE_NORMAL; | ||
1544 | } | ||
1545 | else | ||
1546 | flag = CHM_ENUMERATE_META; | ||
1547 | if (! (what & flag)) | ||
1548 | continue; | ||
1549 | |||
1550 | /* call the enumerator */ | ||
1551 | { | ||
1552 | int status = (*e)(h, &ui, context); | ||
1553 | switch (status) | ||
1554 | { | ||
1555 | case CHM_ENUMERATOR_FAILURE: return 0; | ||
1556 | case CHM_ENUMERATOR_CONTINUE: break; | ||
1557 | case CHM_ENUMERATOR_SUCCESS: return 1; | ||
1558 | default: break; | ||
1559 | } | ||
1560 | } | ||
1561 | } | ||
1562 | |||
1563 | /* advance to next page */ | ||
1564 | curPage = header.block_next; | ||
1565 | } | ||
1566 | |||
1567 | return 1; | ||
1568 | } | ||
1569 | |||
1570 | int chm_enumerate_dir(struct chmFile *h, | ||
1571 | const char *prefix, | ||
1572 | int what, | ||
1573 | CHM_ENUMERATOR e, | ||
1574 | void *context) | ||
1575 | { | ||
1576 | /* | ||
1577 | * XXX: do this efficiently (i.e. using the tree index) | ||
1578 | */ | ||
1579 | |||
1580 | Int32 curPage; | ||
1581 | |||
1582 | /* buffer to hold whatever page we're looking at */ | ||
1583 | #ifdef WIN32 | ||
1584 | UChar *page_buf = alloca((unsigned int)h->block_len); | ||
1585 | #else | ||
1586 | UChar page_buf[h->block_len]; | ||
1587 | #endif | ||
1588 | struct chmPmglHeader header; | ||
1589 | UChar *end; | ||
1590 | UChar *cur; | ||
1591 | unsigned long lenRemain; | ||
1592 | |||
1593 | /* set to 1 once we've started */ | ||
1594 | int it_has_begun=0; | ||
1595 | |||
1596 | /* the current ui */ | ||
1597 | struct chmUnitInfo ui; | ||
1598 | int flag; | ||
1599 | |||
1600 | /* the length of the prefix */ | ||
1601 | char prefixRectified[CHM_MAX_PATHLEN+1]; | ||
1602 | int prefixLen; | ||
1603 | char lastPath[CHM_MAX_PATHLEN]; | ||
1604 | int lastPathLen; | ||
1605 | |||
1606 | /* starting page */ | ||
1607 | curPage = h->index_head; | ||
1608 | |||
1609 | /* initialize pathname state */ | ||
1610 | strncpy(prefixRectified, prefix, CHM_MAX_PATHLEN); | ||
1611 | prefixLen = strlen(prefixRectified); | ||
1612 | if (prefixLen != 0) | ||
1613 | { | ||
1614 | if (prefixRectified[prefixLen-1] != '/') | ||
1615 | { | ||
1616 | prefixRectified[prefixLen] = '/'; | ||
1617 | prefixRectified[prefixLen+1] = '\0'; | ||
1618 | ++prefixLen; | ||
1619 | } | ||
1620 | } | ||
1621 | lastPath[0] = '\0'; | ||
1622 | lastPathLen = -1; | ||
1623 | |||
1624 | /* until we have either returned or given up */ | ||
1625 | while (curPage != -1) | ||
1626 | { | ||
1627 | |||
1628 | /* try to fetch the index page */ | ||
1629 | if (_chm_fetch_bytes(h, | ||
1630 | page_buf, | ||
1631 | (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, | ||
1632 | h->block_len) != h->block_len) | ||
1633 | return 0; | ||
1634 | |||
1635 | /* figure out start and end for this page */ | ||
1636 | cur = page_buf; | ||
1637 | lenRemain = _CHM_PMGL_LEN; | ||
1638 | if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header)) | ||
1639 | return 0; | ||
1640 | end = page_buf + h->block_len - (header.free_space); | ||
1641 | |||
1642 | /* loop over this page */ | ||
1643 | while (cur < end) | ||
1644 | { | ||
1645 | if (! _chm_parse_PMGL_entry(&cur, &ui)) | ||
1646 | return 0; | ||
1647 | |||
1648 | /* check if we should start */ | ||
1649 | if (! it_has_begun) | ||
1650 | { | ||
1651 | if (ui.length == 0 && strncmp(ui.path, prefixRectified, prefixLen) == 0) | ||
1652 | it_has_begun = 1; | ||
1653 | else | ||
1654 | continue; | ||
1655 | |||
1656 | if (ui.path[prefixLen] == '\0') | ||
1657 | continue; | ||
1658 | } | ||
1659 | |||
1660 | /* check if we should stop */ | ||
1661 | else | ||
1662 | { | ||
1663 | if (strncmp(ui.path, prefixRectified, prefixLen) != 0) | ||
1664 | return 1; | ||
1665 | } | ||
1666 | |||
1667 | /* check if we should include this path */ | ||
1668 | if (lastPathLen != -1) | ||
1669 | { | ||
1670 | if (strncmp(ui.path, lastPath, lastPathLen) == 0) | ||
1671 | continue; | ||
1672 | } | ||
1673 | strcpy(lastPath, ui.path); | ||
1674 | lastPathLen = strlen(lastPath); | ||
1675 | |||
1676 | /* check for DIRS */ | ||
1677 | if (ui.length == 0 && !(what & CHM_ENUMERATE_DIRS)) | ||
1678 | continue; | ||
1679 | |||
1680 | /* check for FILES */ | ||
1681 | if (ui.length != 0 && !(what & CHM_ENUMERATE_FILES)) | ||
1682 | continue; | ||
1683 | |||
1684 | /* check for NORMAL vs. META */ | ||
1685 | if (ui.path[0] == '/') | ||
1686 | { | ||
1687 | |||
1688 | /* check for NORMAL vs. SPECIAL */ | ||
1689 | if (ui.path[1] == '#' || ui.path[1] == '$') | ||
1690 | flag = CHM_ENUMERATE_SPECIAL; | ||
1691 | else | ||
1692 | flag = CHM_ENUMERATE_NORMAL; | ||
1693 | } | ||
1694 | else | ||
1695 | flag = CHM_ENUMERATE_META; | ||
1696 | if (! (what & flag)) | ||
1697 | continue; | ||
1698 | |||
1699 | /* call the enumerator */ | ||
1700 | { | ||
1701 | int status = (*e)(h, &ui, context); | ||
1702 | switch (status) | ||
1703 | { | ||
1704 | case CHM_ENUMERATOR_FAILURE: return 0; | ||
1705 | case CHM_ENUMERATOR_CONTINUE: break; | ||
1706 | case CHM_ENUMERATOR_SUCCESS: return 1; | ||
1707 | default: break; | ||
1708 | } | ||
1709 | } | ||
1710 | } | ||
1711 | |||
1712 | /* advance to next page */ | ||
1713 | curPage = header.block_next; | ||
1714 | } | ||
1715 | |||
1716 | return 1; | ||
1717 | } | ||
1718 | |||
1719 | /* resolve a particular object from the archive */ | ||
1720 | int chm_resolve_location(struct chmFile *h, | ||
1721 | unsigned long pos, | ||
1722 | struct chmUnitInfo *ui) | ||
1723 | { | ||
1724 | /* | ||
1725 | * XXX: implement caching scheme for dir pages | ||
1726 | */ | ||
1727 | |||
1728 | Int32 curPage; | ||
1729 | |||
1730 | /* buffer to hold whatever page we're looking at */ | ||
1731 | #ifdef WIN32 | ||
1732 | UChar *page_buf = alloca(h->block_len); | ||
1733 | #else | ||
1734 | UChar page_buf[h->block_len]; | ||
1735 | #endif | ||
1736 | |||
1737 | /* starting page */ | ||
1738 | curPage = h->index_root; | ||
1739 | |||
1740 | /* until we have either returned or given up */ | ||
1741 | while (curPage != -1) | ||
1742 | { | ||
1743 | |||
1744 | /* try to fetch the index page */ | ||
1745 | if (_chm_fetch_bytes(h, page_buf, | ||
1746 | (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, | ||
1747 | h->block_len) != h->block_len) | ||
1748 | return CHM_RESOLVE_FAILURE; | ||
1749 | |||
1750 | /* now, if it is a leaf node: */ | ||
1751 | if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0) | ||
1752 | { | ||
1753 | /* scan block */ | ||
1754 | /* UChar *pEntry = _chm_find_in_PMGL(page_buf, h->block_len, objPath);*/ | ||
1755 | { | ||
1756 | /* XXX: modify this to do a binary search using the nice index structure | ||
1757 | * that is provided for us. | ||
1758 | */ | ||
1759 | struct chmPmglHeader header; | ||
1760 | UInt32 hremain; | ||
1761 | UChar *end; | ||
1762 | UChar *cur; | ||
1763 | UChar *temp; | ||
1764 | /* | ||
1765 | UInt64 strLen; | ||
1766 | char buffer[CHM_MAX_PATHLEN+1]; | ||
1767 | */ | ||
1768 | /* figure out where to start and end */ | ||
1769 | cur = page_buf; | ||
1770 | hremain = _CHM_PMGL_LEN; | ||
1771 | if (! _unmarshal_pmgl_header(&cur, &hremain, &header)) | ||
1772 | return CHM_RESOLVE_FAILURE; | ||
1773 | end = page_buf + h->block_len - (header.free_space); | ||
1774 | |||
1775 | /* now, scan progressively */ | ||
1776 | while (cur < end) | ||
1777 | { | ||
1778 | UInt32 st = 0; | ||
1779 | UInt32 nd = 0; | ||
1780 | /* grab the name */ | ||
1781 | temp = cur; | ||
1782 | |||
1783 | if (_chm_parse_PMGL_entry(&cur, ui) == 0) | ||
1784 | { | ||
1785 | return CHM_RESOLVE_FAILURE; | ||
1786 | } | ||
1787 | st = ui->start; | ||
1788 | nd = ui->start+ui->length; | ||
1789 | if ((st <= pos) && (pos < nd)) | ||
1790 | { | ||
1791 | printf("Resolve:[%u,%u,%u]\n", st, pos, nd); | ||
1792 | return CHM_RESOLVE_SUCCESS; | ||
1793 | } | ||
1794 | } | ||
1795 | |||
1796 | return CHM_RESOLVE_FAILURE; | ||
1797 | } | ||
1798 | |||
1799 | } | ||
1800 | |||
1801 | /* else, if it is a branch node: */ | ||
1802 | else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0) | ||
1803 | { | ||
1804 | /* curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath);*/ | ||
1805 | return CHM_RESOLVE_FAILURE; | ||
1806 | if (0) | ||
1807 | { | ||
1808 | /* XXX: modify this to do a binary search using the nice index structure | ||
1809 | * that is provided for us | ||
1810 | */ | ||
1811 | struct chmPmgiHeader header; | ||
1812 | UInt32 hremain; | ||
1813 | int page=-1; | ||
1814 | UChar *end; | ||
1815 | UChar *cur; | ||
1816 | UInt64 strLen; | ||
1817 | char buffer[CHM_MAX_PATHLEN+1]; | ||
1818 | |||
1819 | /* figure out where to start and end */ | ||
1820 | cur = page_buf; | ||
1821 | hremain = _CHM_PMGI_LEN; | ||
1822 | if (! _unmarshal_pmgi_header(&cur, &hremain, &header)) | ||
1823 | return -1; | ||
1824 | end = page_buf + h->block_len - (header.free_space); | ||
1825 | |||
1826 | /* now, scan progressively */ | ||
1827 | while (cur < end) | ||
1828 | { | ||
1829 | |||
1830 | |||
1831 | |||
1832 | if (_chm_parse_PMGL_entry(&cur, ui) == 0) | ||
1833 | { | ||
1834 | return CHM_RESOLVE_FAILURE; | ||
1835 | } | ||
1836 | |||
1837 | if (ui->start <= pos && pos < ui->start + ui->length) | ||
1838 | { | ||
1839 | return CHM_RESOLVE_SUCCESS; | ||
1840 | } | ||
1841 | |||
1842 | |||
1843 | |||
1844 | |||
1845 | |||
1846 | /* grab the name */ | ||
1847 | strLen = _chm_parse_cword(&cur); | ||
1848 | if (! _chm_parse_UTF8(&cur, strLen, buffer)) | ||
1849 | return -1; | ||
1850 | |||
1851 | /* check if it is the right name */ | ||
1852 | /* | ||
1853 | #ifdef WIN32 | ||
1854 | if (stricmp(buffer, objPath) > 0) | ||
1855 | return page; | ||
1856 | #else | ||
1857 | if (strcasecmp(buffer, objPath) > 0) | ||
1858 | return page; | ||
1859 | #endif | ||
1860 | */ | ||
1861 | /* load next value for path */ | ||
1862 | page = (int)_chm_parse_cword(&cur); | ||
1863 | } | ||
1864 | |||
1865 | curPage = page; | ||
1866 | } | ||
1867 | } | ||
1868 | /* else, we are confused. give up. */ | ||
1869 | else | ||
1870 | return CHM_RESOLVE_FAILURE; | ||
1871 | } | ||
1872 | |||
1873 | /* didn't find anything. fail. */ | ||
1874 | return CHM_RESOLVE_FAILURE; | ||
1875 | |||
1876 | } | ||