summaryrefslogtreecommitdiff
path: root/noncore/apps/opie-reader/chm_lib.c
Unidiff
Diffstat (limited to 'noncore/apps/opie-reader/chm_lib.c') (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/apps/opie-reader/chm_lib.c1876
1 files changed, 1876 insertions, 0 deletions
diff --git a/noncore/apps/opie-reader/chm_lib.c b/noncore/apps/opie-reader/chm_lib.c
new file mode 100644
index 0000000..ecf8278
--- a/dev/null
+++ b/noncore/apps/opie-reader/chm_lib.c
@@ -0,0 +1,1876 @@
1/* $Id$ */
2/***************************************************************************
3 * chm_lib.c - CHM archive manipulation routines *
4 * ------------------- *
5 * *
6 * author: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * version: 0.3 *
8 * notes: These routines are meant for the manipulation of microsoft *
9 * .chm (compiled html help) files, but may likely be used *
10 * for the manipulation of any ITSS archive, if ever ITSS *
11 * archives are used for any other purpose. *
12 * *
13 * Note also that the section names are statically handled. *
14 * To be entirely correct, the section names should be read *
15 * from the section names meta-file, and then the various *
16 * content sections and the "transforms" to apply to the data *
17 * they contain should be inferred from the section name and *
18 * the meta-files referenced using that name; however, all of *
19 * the files I've been able to get my hands on appear to have *
20 * only two sections: Uncompressed and MSCompressed. *
21 * Additionally, the ITSS.DLL file included with Windows does *
22 * not appear to handle any different transforms than the *
23 * simple LZX-transform. Furthermore, the list of transforms *
24 * to apply is broken, in that only half the required space *
25 * is allocated for the list. (It appears as though the *
26 * space is allocated for ASCII strings, but the strings are *
27 * written as unicode. As a result, only the first half of *
28 * the string appears.) So this is probably not too big of *
29 * a deal, at least until CHM v4 (MS .lit files), which also *
30 * incorporate encryption, of some description. *
31 * *
32 * switches: CHM_MT: compile library with thread-safety *
33 * *
34 * switches (Linux only): *
35 * CHM_USE_PREAD: compile library to use pread instead of *
36 * lseek/read *
37 * CHM_USE_IO64: compile library to support full 64-bit I/O *
38 * as is needed to properly deal with the *
39 * 64-bit file offsets. *
40 ***************************************************************************/
41
42/***************************************************************************
43 * *
44 * This program is free software; you can redistribute it and/or modify *
45 * it under the terms of the GNU Lesser General Public License as *
46 * published by the Free Software Foundation; either version 2.1 of the *
47 * License, or (at your option) any later version. *
48 * *
49 ***************************************************************************/
50
51#include "chm_lib.h"
52
53#ifdef CHM_MT
54#define _REENTRANT
55#endif
56
57#include "lzx.h"
58
59#include <stdlib.h>
60#include <string.h>
61
62#if __sun || __sgi
63#include <strings.h>
64#endif
65
66#ifdef WIN32
67#include <windows.h>
68#include <malloc.h>
69#else
70/* basic Linux system includes */
71#define _XOPEN_SOURCE 500
72#include <unistd.h>
73#include <sys/types.h>
74#include <sys/stat.h>
75#include <fcntl.h>
76#include <malloc.h>
77#endif
78
79/* includes/defines for threading, if using them */
80#ifdef CHM_MT
81#ifdef WIN32
82#define CHM_ACQUIRE_LOCK(a) do { \
83 EnterCriticalSection(&(a)); \
84 } while(0)
85#define CHM_RELEASE_LOCK(a) do { \
86 EnterCriticalSection(&(a)); \
87 } while(0)
88
89#else
90#include <pthread.h>
91
92#define CHM_ACQUIRE_LOCK(a) do { \
93 pthread_mutex_lock(&(a)); \
94 } while(0)
95#define CHM_RELEASE_LOCK(a) do { \
96 pthread_mutex_unlock(&(a)); \
97 } while(0)
98
99#endif
100#else
101#define CHM_ACQUIRE_LOCK(a) /* do nothing */
102#define CHM_RELEASE_LOCK(a) /* do nothing */
103#endif
104
105#ifdef WIN32
106#define CHM_NULL_FD (INVALID_HANDLE_VALUE)
107#define CHM_USE_WIN32IO 1
108#define CHM_CLOSE_FILE(fd) CloseHandle((fd))
109#else
110#define CHM_NULL_FD (-1)
111#define CHM_CLOSE_FILE(fd) close((fd))
112#endif
113
114/*
115 * defines related to tuning
116 */
117#ifndef CHM_MAX_BLOCKS_CACHED
118#define CHM_MAX_BLOCKS_CACHED 5
119#endif
120
121/*
122 * architecture specific defines
123 *
124 * Note: as soon as C99 is more widespread, the below defines should
125 * probably just use the C99 sized-int types.
126 *
127 * The following settings will probably work for many platforms. The sizes
128 * don't have to be exactly correct, but the types must accommodate at least as
129 * many bits as they specify.
130 */
131
132/* i386, 32-bit, Windows */
133#ifdef WIN32
134typedef unsigned char UChar;
135typedef __int16 Int16;
136typedef unsigned __int16 UInt16;
137typedef __int32 Int32;
138typedef unsigned __int32 UInt32;
139typedef __int64 Int64;
140typedef unsigned __int64 UInt64;
141
142/* I386, 32-bit, non-Windows */
143/* Sparc */
144/* MIPS */
145#else
146typedef unsigned char UChar;
147typedef short Int16;
148typedef unsigned short UInt16;
149typedef long Int32;
150typedef unsigned long UInt32;
151typedef long long Int64;
152typedef unsigned long long UInt64;
153#endif
154
155/* GCC */
156#ifdef __GNUC__
157#define memcmp __builtin_memcmp
158#define memcpy __builtin_memcpy
159#define strlen __builtin_strlen
160
161#elif defined(WIN32)
162static int ffs(unsigned int val)
163{
164 int bit=1, idx=1;
165 while (bit != 0 && (val & bit) == 0)
166 {
167 bit <<= 1;
168 ++idx;
169 }
170 if (bit == 0)
171 return 0;
172 else
173 return idx;
174}
175
176#endif
177
178/* utilities for unmarshalling data */
179static int _unmarshal_char_array(unsigned char **pData,
180 unsigned long *pLenRemain,
181 char *dest,
182 int count)
183{
184 if (count <= 0 || (unsigned int)count > *pLenRemain)
185 return 0;
186 memcpy(dest, (*pData), count);
187 *pData += count;
188 *pLenRemain -= count;
189 return 1;
190}
191
192static int _unmarshal_uchar_array(unsigned char **pData,
193 unsigned long *pLenRemain,
194 unsigned char *dest,
195 int count)
196{
197 if (count <= 0 || (unsigned int)count > *pLenRemain)
198 return 0;
199 memcpy(dest, (*pData), count);
200 *pData += count;
201 *pLenRemain -= count;
202 return 1;
203}
204
205static int _unmarshal_int16(unsigned char **pData,
206 unsigned long *pLenRemain,
207 Int16 *dest)
208{
209 if (2 > *pLenRemain)
210 return 0;
211 *dest = (*pData)[0] | (*pData)[1]<<8;
212 *pData += 2;
213 *pLenRemain -= 2;
214 return 1;
215}
216
217static int _unmarshal_uint16(unsigned char **pData,
218 unsigned long *pLenRemain,
219 UInt16 *dest)
220{
221 if (2 > *pLenRemain)
222 return 0;
223 *dest = (*pData)[0] | (*pData)[1]<<8;
224 *pData += 2;
225 *pLenRemain -= 2;
226 return 1;
227}
228
229static int _unmarshal_int32(unsigned char **pData,
230 unsigned long *pLenRemain,
231 Int32 *dest)
232{
233 if (4 > *pLenRemain)
234 return 0;
235 *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
236 *pData += 4;
237 *pLenRemain -= 4;
238 return 1;
239}
240
241static int _unmarshal_uint32(unsigned char **pData,
242 unsigned long *pLenRemain,
243 UInt32 *dest)
244{
245 if (4 > *pLenRemain)
246 return 0;
247 *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
248 *pData += 4;
249 *pLenRemain -= 4;
250 return 1;
251}
252
253static int _unmarshal_int64(unsigned char **pData,
254 unsigned long *pLenRemain,
255 Int64 *dest)
256{
257 Int64 temp;
258 int i;
259 if (8 > *pLenRemain)
260 return 0;
261 temp=0;
262 for(i=8; i>0; i--)
263 {
264 temp <<= 8;
265 temp |= (*pData)[i-1];
266 }
267 *dest = temp;
268 *pData += 8;
269 *pLenRemain -= 8;
270 return 1;
271}
272
273static int _unmarshal_uint64(unsigned char **pData,
274 unsigned long *pLenRemain,
275 UInt64 *dest)
276{
277 UInt64 temp;
278 int i;
279 if (8 > *pLenRemain)
280 return 0;
281 temp=0;
282 for(i=8; i>0; i--)
283 {
284 temp <<= 8;
285 temp |= (*pData)[i-1];
286 }
287 *dest = temp;
288 *pData += 8;
289 *pLenRemain -= 8;
290 return 1;
291}
292
293static int _unmarshal_uuid(unsigned char **pData,
294 unsigned long *pDataLen,
295 unsigned char *dest)
296{
297 return _unmarshal_uchar_array(pData, pDataLen, dest, 16);
298}
299
300/* names of sections essential to decompression */
301static const char _CHMU_RESET_TABLE[] =
302 "::DataSpace/Storage/MSCompressed/Transform/"
303 "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/"
304 "InstanceData/ResetTable";
305static const char _CHMU_LZXC_CONTROLDATA[] =
306 "::DataSpace/Storage/MSCompressed/ControlData";
307static const char _CHMU_CONTENT[] =
308 "::DataSpace/Storage/MSCompressed/Content";
309static const char _CHMU_SPANINFO[] =
310 "::DataSpace/Storage/MSCompressed/SpanInfo";
311
312/*
313 * structures local to this module
314 */
315
316/* structure of ITSF headers */
317#define _CHM_ITSF_V2_LEN (0x58)
318#define _CHM_ITSF_V3_LEN (0x60)
319struct chmItsfHeader
320{
321 char signature[4]; /* 0 (ITSF) */
322 Int32 version; /* 4 */
323 Int32 header_len; /* 8 */
324 Int32 unknown_000c; /* c */
325 UInt32 last_modified; /* 10 */
326 UInt32 lang_id; /* 14 */
327 UChar dir_uuid[16]; /* 18 */
328 UChar stream_uuid[16]; /* 28 */
329 UInt64 unknown_offset; /* 38 */
330 UInt64 unknown_len; /* 40 */
331 UInt64 dir_offset; /* 48 */
332 UInt64 dir_len; /* 50 */
333 UInt64 data_offset; /* 58 (Not present before V3) */
334}; /* __attribute__ ((aligned (1))); */
335
336static int _unmarshal_itsf_header(unsigned char **pData,
337 unsigned long *pDataLen,
338 struct chmItsfHeader *dest)
339{
340 /* we only know how to deal with the 0x58 and 0x60 byte structures */
341 if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN)
342 return 0;
343
344 /* unmarshal common fields */
345 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
346 _unmarshal_int32 (pData, pDataLen, &dest->version);
347 _unmarshal_int32 (pData, pDataLen, &dest->header_len);
348 _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
349 _unmarshal_uint32 (pData, pDataLen, &dest->last_modified);
350 _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
351 _unmarshal_uuid (pData, pDataLen, dest->dir_uuid);
352 _unmarshal_uuid (pData, pDataLen, dest->stream_uuid);
353 _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset);
354 _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len);
355 _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset);
356 _unmarshal_uint64 (pData, pDataLen, &dest->dir_len);
357
358 /* error check the data */
359 /* XXX: should also check UUIDs, probably, though with a version 3 file,
360 * current MS tools do not seem to use them.
361 */
362 if (memcmp(dest->signature, "ITSF", 4) != 0)
363 return 0;
364 if (dest->version == 2)
365 {
366 if (dest->header_len < _CHM_ITSF_V2_LEN)
367 return 0;
368 }
369 else if (dest->version == 3)
370 {
371 if (dest->header_len < _CHM_ITSF_V3_LEN)
372 return 0;
373 }
374 else
375 return 0;
376
377 /* now, if we have a V3 structure, unmarshal the rest.
378 * otherwise, compute it
379 */
380 if (dest->version == 3)
381 {
382 if (*pDataLen != 0)
383 _unmarshal_uint64(pData, pDataLen, &dest->data_offset);
384 else
385 return 0;
386 }
387 else
388 dest->data_offset = dest->dir_offset + dest->dir_len;
389
390 return 1;
391}
392
393/* structure of ITSP headers */
394#define _CHM_ITSP_V1_LEN (0x54)
395struct chmItspHeader
396{
397 char signature[4]; /* 0 (ITSP) */
398 Int32 version; /* 4 */
399 Int32 header_len; /* 8 */
400 Int32 unknown_000c; /* c */
401 UInt32 block_len; /* 10 */
402 Int32 blockidx_intvl; /* 14 */
403 Int32 index_depth; /* 18 */
404 Int32 index_root; /* 1c */
405 Int32 index_head; /* 20 */
406 Int32 unknown_0024; /* 24 */
407 UInt32 num_blocks; /* 28 */
408 Int32 unknown_002c; /* 2c */
409 UInt32 lang_id; /* 30 */
410 UChar system_uuid[16]; /* 34 */
411 UChar unknown_0044[16]; /* 44 */
412}; /* __attribute__ ((aligned (1))); */
413
414static int _unmarshal_itsp_header(unsigned char **pData,
415 unsigned long *pDataLen,
416 struct chmItspHeader *dest)
417{
418 /* we only know how to deal with a 0x54 byte structures */
419 if (*pDataLen != _CHM_ITSP_V1_LEN)
420 return 0;
421
422 /* unmarshal fields */
423 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
424 _unmarshal_int32 (pData, pDataLen, &dest->version);
425 _unmarshal_int32 (pData, pDataLen, &dest->header_len);
426 _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
427 _unmarshal_uint32 (pData, pDataLen, &dest->block_len);
428 _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl);
429 _unmarshal_int32 (pData, pDataLen, &dest->index_depth);
430 _unmarshal_int32 (pData, pDataLen, &dest->index_root);
431 _unmarshal_int32 (pData, pDataLen, &dest->index_head);
432 _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024);
433 _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks);
434 _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c);
435 _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
436 _unmarshal_uuid (pData, pDataLen, dest->system_uuid);
437 _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16);
438
439 /* error check the data */
440 if (memcmp(dest->signature, "ITSP", 4) != 0)
441 return 0;
442 if (dest->version != 1)
443 return 0;
444 if (dest->header_len != _CHM_ITSP_V1_LEN)
445 return 0;
446
447 return 1;
448}
449
450/* structure of PMGL headers */
451static const char _chm_pmgl_marker[4] = "PMGL";
452#define _CHM_PMGL_LEN (0x14)
453struct chmPmglHeader
454{
455 char signature[4]; /* 0 (PMGL) */
456 UInt32 free_space; /* 4 */
457 UInt32 unknown_0008; /* 8 */
458 Int32 block_prev; /* c */
459 Int32 block_next; /* 10 */
460}; /* __attribute__ ((aligned (1))); */
461
462static int _unmarshal_pmgl_header(unsigned char **pData,
463 unsigned long *pDataLen,
464 struct chmPmglHeader *dest)
465{
466 /* we only know how to deal with a 0x14 byte structures */
467 if (*pDataLen != _CHM_PMGL_LEN)
468 return 0;
469
470 /* unmarshal fields */
471 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
472 _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
473 _unmarshal_uint32 (pData, pDataLen, &dest->unknown_0008);
474 _unmarshal_int32 (pData, pDataLen, &dest->block_prev);
475 _unmarshal_int32 (pData, pDataLen, &dest->block_next);
476
477 /* check structure */
478 if (memcmp(dest->signature, _chm_pmgl_marker, 4) != 0)
479 return 0;
480
481 return 1;
482}
483
484/* structure of PMGI headers */
485static const char _chm_pmgi_marker[4] = "PMGI";
486#define _CHM_PMGI_LEN (0x08)
487struct chmPmgiHeader
488{
489 char signature[4]; /* 0 (PMGI) */
490 UInt32 free_space; /* 4 */
491}; /* __attribute__ ((aligned (1))); */
492
493static int _unmarshal_pmgi_header(unsigned char **pData,
494 unsigned long *pDataLen,
495 struct chmPmgiHeader *dest)
496{
497 /* we only know how to deal with a 0x8 byte structures */
498 if (*pDataLen != _CHM_PMGI_LEN)
499 return 0;
500
501 /* unmarshal fields */
502 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
503 _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
504
505 /* check structure */
506 if (memcmp(dest->signature, _chm_pmgi_marker, 4) != 0)
507 return 0;
508
509 return 1;
510}
511
512/* structure of LZXC reset table */
513#define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
514struct chmLzxcResetTable
515{
516 UInt32 version;
517 UInt32 block_count;
518 UInt32 unknown;
519 UInt32 table_offset;
520 UInt64 uncompressed_len;
521 UInt64 compressed_len;
522 UInt64 block_len;
523}; /* __attribute__ ((aligned (1))); */
524
525static int _unmarshal_lzxc_reset_table(unsigned char **pData,
526 unsigned long *pDataLen,
527 struct chmLzxcResetTable *dest)
528{
529 /* we only know how to deal with a 0x28 byte structures */
530 if (*pDataLen != _CHM_LZXC_RESETTABLE_V1_LEN)
531 return 0;
532
533 /* unmarshal fields */
534 _unmarshal_uint32 (pData, pDataLen, &dest->version);
535 _unmarshal_uint32 (pData, pDataLen, &dest->block_count);
536 _unmarshal_uint32 (pData, pDataLen, &dest->unknown);
537 _unmarshal_uint32 (pData, pDataLen, &dest->table_offset);
538 _unmarshal_uint64 (pData, pDataLen, &dest->uncompressed_len);
539 _unmarshal_uint64 (pData, pDataLen, &dest->compressed_len);
540 _unmarshal_uint64 (pData, pDataLen, &dest->block_len);
541
542 /* check structure */
543 if (dest->version != 2)
544 return 0;
545
546 return 1;
547}
548
549/* structure of LZXC control data block */
550#define _CHM_LZXC_MIN_LEN (0x18)
551#define _CHM_LZXC_V2_LEN (0x1c)
552struct chmLzxcControlData
553{
554 UInt32 size; /* 0 */
555 char signature[4]; /* 4 (LZXC) */
556 UInt32 version; /* 8 */
557 UInt32 resetInterval; /* c */
558 UInt32 windowSize; /* 10 */
559 UInt32 unknown_14; /* 14 */
560 UInt32 unknown_18; /* 18 */
561};
562
563static int _unmarshal_lzxc_control_data(unsigned char **pData,
564 unsigned long *pDataLen,
565 struct chmLzxcControlData *dest)
566{
567 /* we want at least 0x18 bytes */
568 if (*pDataLen < _CHM_LZXC_MIN_LEN)
569 return 0;
570
571 /* unmarshal fields */
572 _unmarshal_uint32 (pData, pDataLen, &dest->size);
573 _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
574 _unmarshal_uint32 (pData, pDataLen, &dest->version);
575 _unmarshal_uint32 (pData, pDataLen, &dest->resetInterval);
576 _unmarshal_uint32 (pData, pDataLen, &dest->windowSize);
577 _unmarshal_uint32 (pData, pDataLen, &dest->unknown_14);
578
579 if (*pDataLen >= _CHM_LZXC_V2_LEN)
580 _unmarshal_uint32 (pData, pDataLen, &dest->unknown_18);
581 else
582 dest->unknown_18 = 0;
583
584 if (dest->version == 2)
585 {
586 dest->resetInterval *= 0x8000;
587 dest->windowSize *= 0x8000;
588 dest->unknown_14 *= 0x8000;
589 }
590 if (dest->windowSize == 0 || dest->resetInterval == 0)
591 return 0;
592
593 /* for now, only support resetInterval a multiple of windowSize/2 */
594 if (dest->windowSize == 1)
595 return 0;
596 if ((dest->resetInterval % (dest->windowSize/2)) != 0)
597 return 0;
598
599 /* check structure */
600 if (memcmp(dest->signature, "LZXC", 4) != 0)
601 return 0;
602
603 return 1;
604}
605
606/* the structure used for chm file handles */
607struct chmFile
608{
609#ifdef WIN32
610 HANDLE fd;
611#else
612 int fd;
613#endif
614
615#ifdef CHM_MT
616#ifdef WIN32
617 CRITICAL_SECTION mutex;
618 CRITICAL_SECTION lzx_mutex;
619 CRITICAL_SECTION cache_mutex;
620#else
621 pthread_mutex_t mutex;
622 pthread_mutex_t lzx_mutex;
623 pthread_mutex_t cache_mutex;
624#endif
625#endif
626
627 UInt64 dir_offset;
628 UInt64 dir_len;
629 UInt64 data_offset;
630 Int32 index_root;
631 Int32 index_head;
632 UInt32 block_len;
633
634 UInt64 span;
635 struct chmUnitInfo rt_unit;
636 struct chmUnitInfo cn_unit;
637 struct chmLzxcResetTable reset_table;
638
639 /* LZX control data */
640 UInt32 window_size;
641 UInt32 reset_interval;
642 UInt32 reset_blkcount;
643
644 /* decompressor state */
645 struct LZXstate *lzx_state;
646 int lzx_last_block;
647
648 /* cache for decompressed blocks */
649 UChar **cache_blocks;
650 Int64 *cache_block_indices;
651 Int32 cache_num_blocks;
652};
653
654/*
655 * utility functions local to this module
656 */
657
658/* utility function to handle differences between {pread,read}(64)? */
659static Int64 _chm_fetch_bytes(struct chmFile *h,
660 UChar *buf,
661 UInt64 os,
662 Int64 len)
663{
664 Int64 readLen=0, oldOs=0;
665 if (h->fd == CHM_NULL_FD)
666 return readLen;
667
668 CHM_ACQUIRE_LOCK(h->mutex);
669#ifdef CHM_USE_WIN32IO
670 /* NOTE: this might be better done with CreateFileMapping, et cetera... */
671 {
672 DWORD origOffsetLo=0, origOffsetHi=0;
673 DWORD offsetLo, offsetHi;
674 DWORD actualLen=0;
675
676 /* awkward Win32 Seek/Tell */
677 offsetLo = (unsigned long)(os & 0xffffffffL);
678 offsetHi = (unsigned long)((os >> 32) & 0xffffffffL);
679 origOffsetLo = SetFilePointer(h->fd, 0, &origOffsetHi, FILE_CURRENT);
680 offsetLo = SetFilePointer(h->fd, offsetLo, &offsetHi, FILE_BEGIN);
681
682 /* read the data */
683 if (ReadFile(h->fd,
684 buf,
685 (DWORD)len,
686 &actualLen,
687 NULL) == TRUE)
688 readLen = actualLen;
689 else
690 readLen = 0;
691
692 /* restore original position */
693 SetFilePointer(h->fd, origOffsetLo, &origOffsetHi, FILE_BEGIN);
694 }
695#else
696#ifdef CHM_USE_PREAD
697#ifdef CHM_USE_IO64
698 readLen = pread64(h->fd, buf, (long)len, os);
699#else
700 readLen = pread(h->fd, buf, (long)len, (unsigned long)os);
701#endif
702#else
703#ifdef CHM_USE_IO64
704 oldOs = lseek64(h->fd, 0, SEEK_CUR);
705 lseek64(h->fd, os, SEEK_SET);
706 readLen = read(h->fd, buf, len);
707 lseek64(h->fd, oldOs, SEEK_SET);
708#else
709 oldOs = lseek(h->fd, 0, SEEK_CUR);
710 lseek(h->fd, (long)os, SEEK_SET);
711 readLen = read(h->fd, buf, len);
712 lseek(h->fd, (long)oldOs, SEEK_SET);
713#endif
714#endif
715#endif
716 CHM_RELEASE_LOCK(h->mutex);
717 return readLen;
718}
719
720/* open an ITS archive */
721struct chmFile *chm_open(const char *filename)
722{
723 unsigned char sbuffer[256];
724 unsigned long sremain;
725 unsigned char *sbufpos;
726 struct chmFile *newHandle=NULL;
727 struct chmItsfHeader itsfHeader;
728 struct chmItspHeader itspHeader;
729 struct chmUnitInfo uiSpan;
730 struct chmUnitInfo uiLzxc;
731 struct chmLzxcControlData ctlData;
732
733 /* allocate handle */
734 newHandle = (struct chmFile *)malloc(sizeof(struct chmFile));
735 newHandle->fd = CHM_NULL_FD;
736 newHandle->lzx_state = NULL;
737 newHandle->cache_blocks = NULL;
738 newHandle->cache_block_indices = NULL;
739 newHandle->cache_num_blocks = 0;
740
741 /* open file */
742#ifdef WIN32
743 if ((newHandle->fd=CreateFileA(filename,
744 GENERIC_READ,
745 0,
746 NULL,
747 OPEN_EXISTING,
748 FILE_ATTRIBUTE_NORMAL,
749 NULL)) == CHM_NULL_FD)
750 {
751 free(newHandle);
752 return NULL;
753 }
754#else
755 if ((newHandle->fd=open(filename, O_RDONLY)) == CHM_NULL_FD)
756 {
757 free(newHandle);
758 return NULL;
759 }
760#endif
761
762 /* initialize mutexes, if needed */
763#ifdef CHM_MT
764#ifdef WIN32
765 InitializeCriticalSection(&newHandle->mutex);
766 InitializeCriticalSection(&newHandle->lzx_mutex);
767 InitializeCriticalSection(&newHandle->cache_mutex);
768#else
769 pthread_mutex_init(&newHandle->mutex, NULL);
770 pthread_mutex_init(&newHandle->lzx_mutex, NULL);
771 pthread_mutex_init(&newHandle->cache_mutex, NULL);
772#endif
773#endif
774
775 /* read and verify header */
776 sremain = _CHM_ITSF_V3_LEN;
777 sbufpos = sbuffer;
778 if (_chm_fetch_bytes(newHandle, sbuffer, (UInt64)0, sremain) != sremain ||
779 !_unmarshal_itsf_header(&sbufpos, &sremain, &itsfHeader))
780 {
781 chm_close(newHandle);
782 return NULL;
783 }
784
785 /* stash important values from header */
786 newHandle->dir_offset = itsfHeader.dir_offset;
787 newHandle->dir_len = itsfHeader.dir_len;
788 newHandle->data_offset = itsfHeader.data_offset;
789
790 /* now, read and verify the directory header chunk */
791 sremain = _CHM_ITSP_V1_LEN;
792 sbufpos = sbuffer;
793 if (_chm_fetch_bytes(newHandle, sbuffer,
794 (UInt64)itsfHeader.dir_offset, sremain) != sremain ||
795 !_unmarshal_itsp_header(&sbufpos, &sremain, &itspHeader))
796 {
797 chm_close(newHandle);
798 return NULL;
799 }
800
801 /* grab essential information from ITSP header */
802 newHandle->dir_offset += itspHeader.header_len;
803 newHandle->dir_len -= itspHeader.header_len;
804 newHandle->index_root = itspHeader.index_root;
805 newHandle->index_head = itspHeader.index_head;
806 newHandle->block_len = itspHeader.block_len;
807
808 /* if the index root is -1, this means we don't have any PMGI blocks.
809 * as a result, we must use the sole PMGL block as the index root
810 */
811 if (newHandle->index_root == -1)
812 newHandle->index_root = newHandle->index_head;
813
814 /* prefetch most commonly needed unit infos */
815 if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
816 _CHMU_SPANINFO,
817 &uiSpan) ||
818 uiSpan.space == CHM_COMPRESSED ||
819 CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
820 _CHMU_RESET_TABLE,
821 &newHandle->rt_unit) ||
822 newHandle->rt_unit.space == CHM_COMPRESSED ||
823 CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
824 _CHMU_CONTENT,
825 &newHandle->cn_unit) ||
826 newHandle->cn_unit.space == CHM_COMPRESSED ||
827 CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
828 _CHMU_LZXC_CONTROLDATA,
829 &uiLzxc) ||
830 uiLzxc.space == CHM_COMPRESSED)
831 {
832 chm_close(newHandle);
833 return NULL;
834 }
835
836 /* try to read span */
837 /* N.B.: we've already checked that uiSpan is in the uncompressed section,
838 * so this should not require attempting to decompress, which may
839 * rely on having a valid "span"
840 */
841 sremain = 8;
842 sbufpos = sbuffer;
843 if (chm_retrieve_object(newHandle, &uiSpan, sbuffer,
844 0, sremain) != sremain ||
845 !_unmarshal_uint64(&sbufpos, &sremain, &newHandle->span))
846 {
847 chm_close(newHandle);
848 return NULL;
849 }
850
851 /* read reset table info */
852 sremain = _CHM_LZXC_RESETTABLE_V1_LEN;
853 sbufpos = sbuffer;
854 if (chm_retrieve_object(newHandle, &newHandle->rt_unit, sbuffer,
855 0, sremain) != sremain ||
856 !_unmarshal_lzxc_reset_table(&sbufpos, &sremain,
857 &newHandle->reset_table))
858 {
859 chm_close(newHandle);
860 return NULL;
861 }
862
863 /* read control data */
864 sremain = (unsigned long)uiLzxc.length;
865 sbufpos = sbuffer;
866 if (chm_retrieve_object(newHandle, &uiLzxc, sbuffer,
867 0, sremain) != sremain ||
868 !_unmarshal_lzxc_control_data(&sbufpos, &sremain,
869 &ctlData))
870 {
871 chm_close(newHandle);
872 return NULL;
873 }
874 newHandle->window_size = ctlData.windowSize;
875 newHandle->reset_interval = ctlData.resetInterval;
876 newHandle->reset_blkcount = newHandle->reset_interval /
877 (newHandle->window_size / 2);
878
879 /* initialize cache */
880 chm_set_param(newHandle, CHM_PARAM_MAX_BLOCKS_CACHED,
881 CHM_MAX_BLOCKS_CACHED);
882
883 return newHandle;
884}
885
886/* close an ITS archive */
887void chm_close(struct chmFile *h)
888{
889 if (h != NULL)
890 {
891 if (h->fd != CHM_NULL_FD)
892 CHM_CLOSE_FILE(h->fd);
893 h->fd = CHM_NULL_FD;
894
895#ifdef CHM_MT
896#ifdef WIN32
897 DeleteCriticalSection(&h->mutex);
898 DeleteCriticalSection(&h->lzx_mutex);
899 DeleteCriticalSection(&h->cache_mutex);
900#else
901 pthread_mutex_destroy(&h->mutex);
902 pthread_mutex_destroy(&h->lzx_mutex);
903 pthread_mutex_destroy(&h->cache_mutex);
904#endif
905#endif
906
907 if (h->lzx_state)
908 LZXteardown(h->lzx_state);
909 h->lzx_state = NULL;
910
911 if (h->cache_blocks)
912 {
913 int i;
914 for (i=0; i<h->cache_num_blocks; i++)
915 {
916 if (h->cache_blocks[i])
917 free(h->cache_blocks[i]);
918 }
919 free(h->cache_blocks);
920 h->cache_blocks = NULL;
921 }
922
923 if (h->cache_block_indices)
924 free(h->cache_block_indices);
925 h->cache_block_indices = NULL;
926
927 free(h);
928 }
929}
930
931/*
932 * set a parameter on the file handle.
933 * valid parameter types:
934 * CHM_PARAM_MAX_BLOCKS_CACHED:
935 * how many decompressed blocks should be cached? A simple
936 * caching scheme is used, wherein the index of the block is
937 * used as a hash value, and hash collision results in the
938 * invalidation of the previously cached block.
939 */
940void chm_set_param(struct chmFile *h,
941 int paramType,
942 int paramVal)
943{
944 switch (paramType)
945 {
946 case CHM_PARAM_MAX_BLOCKS_CACHED:
947 CHM_ACQUIRE_LOCK(h->cache_mutex);
948 if (paramVal != h->cache_num_blocks)
949 {
950 UChar **newBlocks;
951 UInt64 *newIndices;
952 int i;
953
954 /* allocate new cached blocks */
955 newBlocks = (UChar **)malloc(paramVal * sizeof (UChar *));
956 newIndices = (UInt64 *)malloc(paramVal * sizeof (UInt64));
957 for (i=0; i<paramVal; i++)
958 {
959 newBlocks[i] = NULL;
960 newIndices[i] = 0;
961 }
962
963 /* re-distribute old cached blocks */
964 if (h->cache_blocks)
965 {
966 for (i=0; i<h->cache_num_blocks; i++)
967 {
968 int newSlot = (int)(h->cache_block_indices[i] % paramVal);
969
970 if (h->cache_blocks[i])
971 {
972 /* in case of collision, destroy newcomer */
973 if (newBlocks[newSlot])
974 {
975 free(h->cache_blocks[i]);
976 h->cache_blocks[i] = NULL;
977 }
978 else
979 {
980 newBlocks[newSlot] = h->cache_blocks[i];
981 newIndices[newSlot] =
982 h->cache_block_indices[i];
983 }
984 }
985 }
986
987 free(h->cache_blocks);
988 free(h->cache_block_indices);
989 }
990
991 /* now, set new values */
992 h->cache_blocks = newBlocks;
993 h->cache_block_indices = newIndices;
994 h->cache_num_blocks = paramVal;
995 }
996 CHM_RELEASE_LOCK(h->cache_mutex);
997 break;
998
999 default:
1000 break;
1001 }
1002}
1003
1004/*
1005 * helper methods for chm_resolve_object
1006 */
1007
1008/* skip a compressed dword */
1009static void _chm_skip_cword(UChar **pEntry)
1010{
1011 while (*(*pEntry)++ >= 0x80)
1012 ;
1013}
1014
1015/* skip the data from a PMGL entry */
1016static void _chm_skip_PMGL_entry_data(UChar **pEntry)
1017{
1018 _chm_skip_cword(pEntry);
1019 _chm_skip_cword(pEntry);
1020 _chm_skip_cword(pEntry);
1021}
1022
1023/* parse a compressed dword */
1024static UInt64 _chm_parse_cword(UChar **pEntry)
1025{
1026 UInt64 accum = 0;
1027 UChar temp;
1028 while ((temp=*(*pEntry)++) >= 0x80)
1029 {
1030 accum <<= 7;
1031 accum += temp & 0x7f;
1032 }
1033
1034 return (accum << 7) + temp;
1035}
1036
1037/* parse a utf-8 string into an ASCII char buffer */
1038static int _chm_parse_UTF8(UChar **pEntry, UInt64 count, char *path)
1039{
1040 /* XXX: implement UTF-8 support, including a real mapping onto
1041 * ISO-8859-1? probably there is a library to do this? As is
1042 * immediately apparent from the below code, I'm only handling files
1043 * in which none of the strings contain UTF-8 multi-byte characters.
1044 */
1045 while (count != 0)
1046 {
1047 if (*(*pEntry) > 0x7f)
1048 return 0;
1049
1050 *path++ = (char)(*(*pEntry)++);
1051 --count;
1052 }
1053
1054 *path = '\0';
1055 return 1;
1056}
1057
1058/* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
1059static int _chm_parse_PMGL_entry(UChar **pEntry, struct chmUnitInfo *ui)
1060{
1061 UInt64 strLen;
1062
1063 /* parse str len */
1064 strLen = _chm_parse_cword(pEntry);
1065 if (strLen > CHM_MAX_PATHLEN)
1066 return 0;
1067
1068 /* parse path */
1069 if (! _chm_parse_UTF8(pEntry, strLen, ui->path))
1070 return 0;
1071
1072 /* parse info */
1073 ui->space = (int)_chm_parse_cword(pEntry);
1074 ui->start = _chm_parse_cword(pEntry);
1075 ui->length = _chm_parse_cword(pEntry);
1076 return 1;
1077}
1078
1079/* find an exact entry in PMGL; return NULL if we fail */
1080static UChar *_chm_find_in_PMGL(UChar *page_buf,
1081 UInt32 block_len,
1082 const char *objPath)
1083{
1084 /* XXX: modify this to do a binary search using the nice index structure
1085 * that is provided for us.
1086 */
1087 struct chmPmglHeader header;
1088 UInt32 hremain;
1089 UChar *end;
1090 UChar *cur;
1091 UChar *temp;
1092 UInt64 strLen;
1093 char buffer[CHM_MAX_PATHLEN+1];
1094
1095 /* figure out where to start and end */
1096 cur = page_buf;
1097 hremain = _CHM_PMGL_LEN;
1098 if (! _unmarshal_pmgl_header(&cur, &hremain, &header))
1099 return NULL;
1100 end = page_buf + block_len - (header.free_space);
1101
1102 /* now, scan progressively */
1103 while (cur < end)
1104 {
1105 /* grab the name */
1106 temp = cur;
1107 strLen = _chm_parse_cword(&cur);
1108 if (! _chm_parse_UTF8(&cur, strLen, buffer))
1109 return NULL;
1110
1111 /* check if it is the right name */
1112#ifdef WIN32
1113 if (! stricmp(buffer, objPath))
1114 return temp;
1115#else
1116 if (! strcasecmp(buffer, objPath))
1117 return temp;
1118#endif
1119
1120 _chm_skip_PMGL_entry_data(&cur);
1121 }
1122
1123 return NULL;
1124}
1125
1126/* find which block should be searched next for the entry; -1 if no block */
1127static Int32 _chm_find_in_PMGI(UChar *page_buf,
1128 UInt32 block_len,
1129 const char *objPath)
1130{
1131 /* XXX: modify this to do a binary search using the nice index structure
1132 * that is provided for us
1133 */
1134 struct chmPmgiHeader header;
1135 UInt32 hremain;
1136 int page=-1;
1137 UChar *end;
1138 UChar *cur;
1139 UInt64 strLen;
1140 char buffer[CHM_MAX_PATHLEN+1];
1141
1142 /* figure out where to start and end */
1143 cur = page_buf;
1144 hremain = _CHM_PMGI_LEN;
1145 if (! _unmarshal_pmgi_header(&cur, &hremain, &header))
1146 return -1;
1147 end = page_buf + block_len - (header.free_space);
1148
1149 /* now, scan progressively */
1150 while (cur < end)
1151 {
1152 /* grab the name */
1153 strLen = _chm_parse_cword(&cur);
1154 if (! _chm_parse_UTF8(&cur, strLen, buffer))
1155 return -1;
1156
1157 /* check if it is the right name */
1158#ifdef WIN32
1159 if (stricmp(buffer, objPath) > 0)
1160 return page;
1161#else
1162 if (strcasecmp(buffer, objPath) > 0)
1163 return page;
1164#endif
1165
1166 /* load next value for path */
1167 page = (int)_chm_parse_cword(&cur);
1168 }
1169
1170 return page;
1171}
1172
1173/* resolve a particular object from the archive */
1174int chm_resolve_object(struct chmFile *h,
1175 const char *objPath,
1176 struct chmUnitInfo *ui)
1177{
1178 /*
1179 * XXX: implement caching scheme for dir pages
1180 */
1181
1182 Int32 curPage;
1183
1184 /* buffer to hold whatever page we're looking at */
1185#ifdef WIN32
1186 UChar *page_buf = alloca(h->block_len);
1187#else
1188 UChar page_buf[h->block_len];
1189#endif
1190
1191 /* starting page */
1192 curPage = h->index_root;
1193
1194 /* until we have either returned or given up */
1195 while (curPage != -1)
1196 {
1197
1198 /* try to fetch the index page */
1199 if (_chm_fetch_bytes(h, page_buf,
1200 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1201 h->block_len) != h->block_len)
1202 return CHM_RESOLVE_FAILURE;
1203
1204 /* now, if it is a leaf node: */
1205 if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0)
1206 {
1207 /* scan block */
1208 UChar *pEntry = _chm_find_in_PMGL(page_buf,
1209 h->block_len,
1210 objPath);
1211 if (pEntry == NULL)
1212 return CHM_RESOLVE_FAILURE;
1213
1214 /* parse entry and return */
1215 _chm_parse_PMGL_entry(&pEntry, ui);
1216 return CHM_RESOLVE_SUCCESS;
1217 }
1218
1219 /* else, if it is a branch node: */
1220 else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0)
1221 curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath);
1222
1223 /* else, we are confused. give up. */
1224 else
1225 return CHM_RESOLVE_FAILURE;
1226 }
1227
1228 /* didn't find anything. fail. */
1229 return CHM_RESOLVE_FAILURE;
1230}
1231
1232/*
1233 * utility methods for dealing with compressed data
1234 */
1235
1236/* get the bounds of a compressed block. return 0 on failure */
1237static int _chm_get_cmpblock_bounds(struct chmFile *h,
1238 UInt64 block,
1239 UInt64 *start,
1240 Int64 *len)
1241{
1242 UChar buffer[8], *dummy;
1243 UInt32 remain;
1244
1245 /* for all but the last block, use the reset table */
1246 if (block < h->reset_table.block_count-1)
1247 {
1248 /* unpack the start address */
1249 dummy = buffer;
1250 remain = 8;
1251 if (_chm_fetch_bytes(h, buffer,
1252 (UInt64)h->data_offset
1253 + (UInt64)h->rt_unit.start
1254 + (UInt64)h->reset_table.table_offset
1255 + (UInt64)block*8,
1256 remain) != remain ||
1257 !_unmarshal_uint64(&dummy, &remain, start))
1258 return 0;
1259
1260 /* unpack the end address */
1261 dummy = buffer;
1262 remain = 8;
1263 if (_chm_fetch_bytes(h, buffer,
1264 (UInt64)h->data_offset
1265 + (UInt64)h->rt_unit.start
1266 + (UInt64)h->reset_table.table_offset
1267 + (UInt64)block*8 + 8,
1268 remain) != remain ||
1269 !_unmarshal_int64(&dummy, &remain, len))
1270 return 0;
1271 }
1272
1273 /* for the last block, use the span in addition to the reset table */
1274 else
1275 {
1276 /* unpack the start address */
1277 dummy = buffer;
1278 remain = 8;
1279 if (_chm_fetch_bytes(h, buffer,
1280 (UInt64)h->data_offset
1281 + (UInt64)h->rt_unit.start
1282 + (UInt64)h->reset_table.table_offset
1283 + (UInt64)block*8,
1284 remain) != remain ||
1285 !_unmarshal_uint64(&dummy, &remain, start))
1286 return 0;
1287
1288 *len = h->reset_table.compressed_len;
1289 }
1290
1291 /* compute the length and absolute start address */
1292 *len -= *start;
1293 *start += h->data_offset + h->cn_unit.start;
1294
1295 return 1;
1296}
1297
1298/* decompress the block. must have lzx_mutex. */
1299static Int64 _chm_decompress_block(struct chmFile *h,
1300 UInt64 block,
1301 UChar **ubuffer)
1302{
1303#ifdef WIN32
1304 UChar *cbuffer = alloca(((unsigned int)h->reset_table.block_len + 6144));
1305#else
1306 UChar cbuffer[h->reset_table.block_len + 6144]; /* compressed buffer */
1307#endif
1308 UInt64 cmpStart; /* compressed start */
1309 Int64 cmpLen; /* compressed len */
1310 int indexSlot; /* cache index slot */
1311 UChar *lbuffer; /* local buffer ptr */
1312 UInt32 blockAlign = (UInt32)(block % h->reset_blkcount); /* reset intvl. aln. */
1313 UInt32 i; /* local loop index */
1314
1315 /* check if we need previous blocks */
1316 if (blockAlign != 0)
1317 {
1318 /* fetch all required previous blocks since last reset */
1319 for (i = h->reset_blkcount - blockAlign; i > 0; i--)
1320 {
1321
1322 /* check if we most recently decompressed the previous block */
1323 if (h->lzx_last_block != block-i)
1324 {
1325 indexSlot = (int)((block-i) % h->cache_num_blocks);
1326 h->cache_block_indices[indexSlot] = block-i;
1327 if (! h->cache_blocks[indexSlot])
1328 h->cache_blocks[indexSlot] = (UChar *)malloc(
1329 (unsigned int)(h->reset_table.block_len));
1330 lbuffer = h->cache_blocks[indexSlot];
1331
1332 /* decompress the previous block */
1333 LZXreset(h->lzx_state);
1334 if (!_chm_get_cmpblock_bounds(h, block-i, &cmpStart, &cmpLen) ||
1335 _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
1336 LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
1337 (int)h->reset_table.block_len) != DECR_OK)
1338 return (Int64)0;
1339 }
1340
1341 h->lzx_last_block = (int)(block - i);
1342 }
1343 }
1344 else
1345 LZXreset(h->lzx_state);
1346
1347 /* allocate slot in cache */
1348 indexSlot = (int)(block % h->cache_num_blocks);
1349 h->cache_block_indices[indexSlot] = block;
1350 if (! h->cache_blocks[indexSlot])
1351 h->cache_blocks[indexSlot] = (UChar *)malloc(
1352 ((unsigned int)h->reset_table.block_len));
1353 lbuffer = h->cache_blocks[indexSlot];
1354 *ubuffer = lbuffer;
1355
1356 /* decompress the block we actually want */
1357 if (! _chm_get_cmpblock_bounds(h, block, &cmpStart, &cmpLen) ||
1358 _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
1359 LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
1360 (int)h->reset_table.block_len) != DECR_OK)
1361 return (Int64)0;
1362 h->lzx_last_block = (int)block;
1363
1364 /* XXX: modify LZX routines to return the length of the data they
1365 * decompressed and return that instead, for an extra sanity check.
1366 */
1367 return h->reset_table.block_len;
1368}
1369
1370/* grab a region from a compressed block */
1371static Int64 _chm_decompress_region(struct chmFile *h,
1372 UChar *buf,
1373 UInt64 start,
1374 Int64 len)
1375{
1376 UInt64 nBlock, nOffset;
1377 UInt64 nLen;
1378 UInt64 gotLen;
1379 UChar *ubuffer;
1380
1381 if (len <= 0)
1382 return (Int64)0;
1383
1384 /* figure out what we need to read */
1385 nBlock = start / h->reset_table.block_len;
1386 nOffset = start % h->reset_table.block_len;
1387 nLen = len;
1388 if (nLen > (h->reset_table.block_len - nOffset))
1389 nLen = h->reset_table.block_len - nOffset;
1390
1391 /* if block is cached, return data from it. */
1392 CHM_ACQUIRE_LOCK(h->lzx_mutex);
1393 CHM_ACQUIRE_LOCK(h->cache_mutex);
1394 if (h->cache_block_indices[nBlock % h->cache_num_blocks] == nBlock &&
1395 h->cache_blocks[nBlock % h->cache_num_blocks] != NULL)
1396 {
1397 memcpy(buf,
1398 h->cache_blocks[nBlock % h->cache_num_blocks] + nOffset,
1399 (unsigned int)nLen);
1400 CHM_RELEASE_LOCK(h->cache_mutex);
1401 CHM_RELEASE_LOCK(h->lzx_mutex);
1402 return nLen;
1403 }
1404 CHM_RELEASE_LOCK(h->cache_mutex);
1405
1406 /* data request not satisfied, so... start up the decompressor machine */
1407 if (! h->lzx_state)
1408 {
1409 int window_size = ffs(h->window_size) - 1;
1410 h->lzx_last_block = -1;
1411 h->lzx_state = LZXinit(window_size);
1412 }
1413
1414 /* decompress some data */
1415 gotLen = _chm_decompress_block(h, nBlock, &ubuffer);
1416 if (gotLen < nLen)
1417 nLen = gotLen;
1418 memcpy(buf, ubuffer+nOffset, (unsigned int)nLen);
1419 CHM_RELEASE_LOCK(h->lzx_mutex);
1420 return nLen;
1421}
1422
1423/* retrieve (part of) an object */
1424LONGINT64 chm_retrieve_object(struct chmFile *h,
1425 struct chmUnitInfo *ui,
1426 unsigned char *buf,
1427 LONGUINT64 addr,
1428 LONGINT64 len)
1429{
1430 /* must be valid file handle */
1431 if (h == NULL)
1432 return (Int64)0;
1433
1434 /* starting address must be in correct range */
1435 if (addr < 0 || addr >= ui->length)
1436 return (Int64)0;
1437
1438 /* clip length */
1439 if (addr + len > ui->length)
1440 len = ui->length - addr;
1441
1442 /* if the file is uncompressed, it's simple */
1443 if (ui->space == CHM_UNCOMPRESSED)
1444 {
1445 /* read data */
1446 return _chm_fetch_bytes(h,
1447 buf,
1448 (UInt64)h->data_offset + (UInt64)ui->start + (UInt64)addr,
1449 len);
1450 }
1451
1452 /* else if the file is compressed, it's a little trickier */
1453 else /* ui->space == CHM_COMPRESSED */
1454 {
1455 Int64 swath=0, total=0;
1456 do {
1457
1458 /* swill another mouthful */
1459 swath = _chm_decompress_region(h, buf, ui->start + addr, len);
1460
1461 /* if we didn't get any... */
1462 if (swath == 0)
1463 return total;
1464
1465 /* update stats */
1466 total += swath;
1467 len -= swath;
1468 addr += swath;
1469 buf += swath;
1470
1471 } while (len != 0);
1472
1473 return total;
1474 }
1475}
1476
1477/* enumerate the objects in the .chm archive */
1478int chm_enumerate(struct chmFile *h,
1479 int what,
1480 CHM_ENUMERATOR e,
1481 void *context)
1482{
1483 Int32 curPage;
1484
1485 /* buffer to hold whatever page we're looking at */
1486#ifdef WIN32
1487 UChar *page_buf = alloca((unsigned int)h->block_len);
1488#else
1489 UChar page_buf[h->block_len];
1490#endif
1491 struct chmPmglHeader header;
1492 UChar *end;
1493 UChar *cur;
1494 unsigned long lenRemain;
1495
1496 /* the current ui */
1497 struct chmUnitInfo ui;
1498 int flag;
1499
1500 /* starting page */
1501 curPage = h->index_head;
1502
1503 /* until we have either returned or given up */
1504 while (curPage != -1)
1505 {
1506
1507 /* try to fetch the index page */
1508 if (_chm_fetch_bytes(h,
1509 page_buf,
1510 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1511 h->block_len) != h->block_len)
1512 return 0;
1513
1514 /* figure out start and end for this page */
1515 cur = page_buf;
1516 lenRemain = _CHM_PMGL_LEN;
1517 if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
1518 return 0;
1519 end = page_buf + h->block_len - (header.free_space);
1520
1521 /* loop over this page */
1522 while (cur < end)
1523 {
1524 if (! _chm_parse_PMGL_entry(&cur, &ui))
1525 return 0;
1526
1527 /* check for DIRS */
1528 if (ui.length == 0 && !(what & CHM_ENUMERATE_DIRS))
1529 continue;
1530
1531 /* check for FILES */
1532 if (ui.length != 0 && !(what & CHM_ENUMERATE_FILES))
1533 continue;
1534
1535 /* check for NORMAL vs. META */
1536 if (ui.path[0] == '/')
1537 {
1538
1539 /* check for NORMAL vs. SPECIAL */
1540 if (ui.path[1] == '#' || ui.path[1] == '$')
1541 flag = CHM_ENUMERATE_SPECIAL;
1542 else
1543 flag = CHM_ENUMERATE_NORMAL;
1544 }
1545 else
1546 flag = CHM_ENUMERATE_META;
1547 if (! (what & flag))
1548 continue;
1549
1550 /* call the enumerator */
1551 {
1552 int status = (*e)(h, &ui, context);
1553 switch (status)
1554 {
1555 case CHM_ENUMERATOR_FAILURE: return 0;
1556 case CHM_ENUMERATOR_CONTINUE: break;
1557 case CHM_ENUMERATOR_SUCCESS: return 1;
1558 default: break;
1559 }
1560 }
1561 }
1562
1563 /* advance to next page */
1564 curPage = header.block_next;
1565 }
1566
1567 return 1;
1568}
1569
1570int chm_enumerate_dir(struct chmFile *h,
1571 const char *prefix,
1572 int what,
1573 CHM_ENUMERATOR e,
1574 void *context)
1575{
1576 /*
1577 * XXX: do this efficiently (i.e. using the tree index)
1578 */
1579
1580 Int32 curPage;
1581
1582 /* buffer to hold whatever page we're looking at */
1583#ifdef WIN32
1584 UChar *page_buf = alloca((unsigned int)h->block_len);
1585#else
1586 UChar page_buf[h->block_len];
1587#endif
1588 struct chmPmglHeader header;
1589 UChar *end;
1590 UChar *cur;
1591 unsigned long lenRemain;
1592
1593 /* set to 1 once we've started */
1594 int it_has_begun=0;
1595
1596 /* the current ui */
1597 struct chmUnitInfo ui;
1598 int flag;
1599
1600 /* the length of the prefix */
1601 char prefixRectified[CHM_MAX_PATHLEN+1];
1602 int prefixLen;
1603 char lastPath[CHM_MAX_PATHLEN];
1604 int lastPathLen;
1605
1606 /* starting page */
1607 curPage = h->index_head;
1608
1609 /* initialize pathname state */
1610 strncpy(prefixRectified, prefix, CHM_MAX_PATHLEN);
1611 prefixLen = strlen(prefixRectified);
1612 if (prefixLen != 0)
1613 {
1614 if (prefixRectified[prefixLen-1] != '/')
1615 {
1616 prefixRectified[prefixLen] = '/';
1617 prefixRectified[prefixLen+1] = '\0';
1618 ++prefixLen;
1619 }
1620 }
1621 lastPath[0] = '\0';
1622 lastPathLen = -1;
1623
1624 /* until we have either returned or given up */
1625 while (curPage != -1)
1626 {
1627
1628 /* try to fetch the index page */
1629 if (_chm_fetch_bytes(h,
1630 page_buf,
1631 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1632 h->block_len) != h->block_len)
1633 return 0;
1634
1635 /* figure out start and end for this page */
1636 cur = page_buf;
1637 lenRemain = _CHM_PMGL_LEN;
1638 if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
1639 return 0;
1640 end = page_buf + h->block_len - (header.free_space);
1641
1642 /* loop over this page */
1643 while (cur < end)
1644 {
1645 if (! _chm_parse_PMGL_entry(&cur, &ui))
1646 return 0;
1647
1648 /* check if we should start */
1649 if (! it_has_begun)
1650 {
1651 if (ui.length == 0 && strncmp(ui.path, prefixRectified, prefixLen) == 0)
1652 it_has_begun = 1;
1653 else
1654 continue;
1655
1656 if (ui.path[prefixLen] == '\0')
1657 continue;
1658 }
1659
1660 /* check if we should stop */
1661 else
1662 {
1663 if (strncmp(ui.path, prefixRectified, prefixLen) != 0)
1664 return 1;
1665 }
1666
1667 /* check if we should include this path */
1668 if (lastPathLen != -1)
1669 {
1670 if (strncmp(ui.path, lastPath, lastPathLen) == 0)
1671 continue;
1672 }
1673 strcpy(lastPath, ui.path);
1674 lastPathLen = strlen(lastPath);
1675
1676 /* check for DIRS */
1677 if (ui.length == 0 && !(what & CHM_ENUMERATE_DIRS))
1678 continue;
1679
1680 /* check for FILES */
1681 if (ui.length != 0 && !(what & CHM_ENUMERATE_FILES))
1682 continue;
1683
1684 /* check for NORMAL vs. META */
1685 if (ui.path[0] == '/')
1686 {
1687
1688 /* check for NORMAL vs. SPECIAL */
1689 if (ui.path[1] == '#' || ui.path[1] == '$')
1690 flag = CHM_ENUMERATE_SPECIAL;
1691 else
1692 flag = CHM_ENUMERATE_NORMAL;
1693 }
1694 else
1695 flag = CHM_ENUMERATE_META;
1696 if (! (what & flag))
1697 continue;
1698
1699 /* call the enumerator */
1700 {
1701 int status = (*e)(h, &ui, context);
1702 switch (status)
1703 {
1704 case CHM_ENUMERATOR_FAILURE: return 0;
1705 case CHM_ENUMERATOR_CONTINUE: break;
1706 case CHM_ENUMERATOR_SUCCESS: return 1;
1707 default: break;
1708 }
1709 }
1710 }
1711
1712 /* advance to next page */
1713 curPage = header.block_next;
1714 }
1715
1716 return 1;
1717}
1718
1719/* resolve a particular object from the archive */
1720int chm_resolve_location(struct chmFile *h,
1721 unsigned long pos,
1722 struct chmUnitInfo *ui)
1723{
1724 /*
1725 * XXX: implement caching scheme for dir pages
1726 */
1727
1728 Int32 curPage;
1729
1730 /* buffer to hold whatever page we're looking at */
1731#ifdef WIN32
1732 UChar *page_buf = alloca(h->block_len);
1733#else
1734 UChar page_buf[h->block_len];
1735#endif
1736
1737 /* starting page */
1738 curPage = h->index_root;
1739
1740 /* until we have either returned or given up */
1741 while (curPage != -1)
1742 {
1743
1744 /* try to fetch the index page */
1745 if (_chm_fetch_bytes(h, page_buf,
1746 (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
1747 h->block_len) != h->block_len)
1748 return CHM_RESOLVE_FAILURE;
1749
1750 /* now, if it is a leaf node: */
1751 if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0)
1752 {
1753 /* scan block */
1754 /* UChar *pEntry = _chm_find_in_PMGL(page_buf, h->block_len, objPath);*/
1755 {
1756 /* XXX: modify this to do a binary search using the nice index structure
1757 * that is provided for us.
1758 */
1759 struct chmPmglHeader header;
1760 UInt32 hremain;
1761 UChar *end;
1762 UChar *cur;
1763 UChar *temp;
1764/*
1765 UInt64 strLen;
1766 char buffer[CHM_MAX_PATHLEN+1];
1767*/
1768 /* figure out where to start and end */
1769 cur = page_buf;
1770 hremain = _CHM_PMGL_LEN;
1771 if (! _unmarshal_pmgl_header(&cur, &hremain, &header))
1772 return CHM_RESOLVE_FAILURE;
1773 end = page_buf + h->block_len - (header.free_space);
1774
1775 /* now, scan progressively */
1776 while (cur < end)
1777 {
1778 UInt32 st = 0;
1779 UInt32 nd = 0;
1780 /* grab the name */
1781 temp = cur;
1782
1783 if (_chm_parse_PMGL_entry(&cur, ui) == 0)
1784 {
1785 return CHM_RESOLVE_FAILURE;
1786 }
1787 st = ui->start;
1788 nd = ui->start+ui->length;
1789 if ((st <= pos) && (pos < nd))
1790 {
1791 printf("Resolve:[%u,%u,%u]\n", st, pos, nd);
1792 return CHM_RESOLVE_SUCCESS;
1793 }
1794 }
1795
1796 return CHM_RESOLVE_FAILURE;
1797 }
1798
1799 }
1800
1801 /* else, if it is a branch node: */
1802 else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0)
1803 {
1804 /* curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath);*/
1805 return CHM_RESOLVE_FAILURE;
1806 if (0)
1807 {
1808 /* XXX: modify this to do a binary search using the nice index structure
1809 * that is provided for us
1810 */
1811 struct chmPmgiHeader header;
1812 UInt32 hremain;
1813 int page=-1;
1814 UChar *end;
1815 UChar *cur;
1816 UInt64 strLen;
1817 char buffer[CHM_MAX_PATHLEN+1];
1818
1819 /* figure out where to start and end */
1820 cur = page_buf;
1821 hremain = _CHM_PMGI_LEN;
1822 if (! _unmarshal_pmgi_header(&cur, &hremain, &header))
1823 return -1;
1824 end = page_buf + h->block_len - (header.free_space);
1825
1826 /* now, scan progressively */
1827 while (cur < end)
1828 {
1829
1830
1831
1832 if (_chm_parse_PMGL_entry(&cur, ui) == 0)
1833 {
1834 return CHM_RESOLVE_FAILURE;
1835 }
1836
1837 if (ui->start <= pos && pos < ui->start + ui->length)
1838 {
1839 return CHM_RESOLVE_SUCCESS;
1840 }
1841
1842
1843
1844
1845
1846 /* grab the name */
1847 strLen = _chm_parse_cword(&cur);
1848 if (! _chm_parse_UTF8(&cur, strLen, buffer))
1849 return -1;
1850
1851 /* check if it is the right name */
1852 /*
1853#ifdef WIN32
1854 if (stricmp(buffer, objPath) > 0)
1855 return page;
1856#else
1857 if (strcasecmp(buffer, objPath) > 0)
1858 return page;
1859#endif
1860 */
1861 /* load next value for path */
1862 page = (int)_chm_parse_cword(&cur);
1863 }
1864
1865 curPage = page;
1866 }
1867 }
1868 /* else, we are confused. give up. */
1869 else
1870 return CHM_RESOLVE_FAILURE;
1871 }
1872
1873 /* didn't find anything. fail. */
1874 return CHM_RESOLVE_FAILURE;
1875
1876}