author | sandman <sandman> | 2002-04-13 00:47:20 (UTC) |
---|---|---|
committer | sandman <sandman> | 2002-04-13 00:47:20 (UTC) |
commit | 98a1e3f36567639344f12932b629e526a8783aa8 (patch) (unidiff) | |
tree | 0433d296857faceeafc54f7deabddb621f45a933 /noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | |
parent | 7e31b1fba119f69929d6744d7295555ff1727f4f (diff) | |
download | opie-98a1e3f36567639344f12932b629e526a8783aa8.zip opie-98a1e3f36567639344f12932b629e526a8783aa8.tar.gz opie-98a1e3f36567639344f12932b629e526a8783aa8.tar.bz2 |
CVS import of QPdf
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') (more/less context) (show whitespace changes)
-rw-r--r-- | noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc new file mode 100644 index 0000000..6793398 --- a/dev/null +++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | |||
@@ -0,0 +1,394 @@ | |||
1 | //======================================================================== | ||
2 | // | ||
3 | // CharCodeToUnicode.cc | ||
4 | // | ||
5 | // Copyright 2001 Derek B. Noonburg | ||
6 | // | ||
7 | //======================================================================== | ||
8 | |||
9 | #ifdef __GNUC__ | ||
10 | #pragma implementation | ||
11 | #endif | ||
12 | |||
13 | #include <aconf.h> | ||
14 | #include <stdio.h> | ||
15 | #include <string.h> | ||
16 | #include "gmem.h" | ||
17 | #include "gfile.h" | ||
18 | #include "GString.h" | ||
19 | #include "Error.h" | ||
20 | #include "GlobalParams.h" | ||
21 | #include "CharCodeToUnicode.h" | ||
22 | |||
23 | //------------------------------------------------------------------------ | ||
24 | |||
25 | #define maxUnicodeString 8 | ||
26 | |||
27 | struct CharCodeToUnicodeString { | ||
28 | CharCode c; | ||
29 | Unicode u[maxUnicodeString]; | ||
30 | int len; | ||
31 | }; | ||
32 | |||
33 | //------------------------------------------------------------------------ | ||
34 | |||
35 | CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { | ||
36 | FILE *f; | ||
37 | Unicode *mapA; | ||
38 | CharCode size, mapLenA; | ||
39 | char buf[64]; | ||
40 | Unicode u; | ||
41 | CharCodeToUnicode *ctu; | ||
42 | |||
43 | if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) { | ||
44 | error(-1, "Couldn't find cidToUnicode file for the '%s' collection", | ||
45 | collectionA->getCString()); | ||
46 | return NULL; | ||
47 | } | ||
48 | |||
49 | size = 32768; | ||
50 | mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); | ||
51 | mapLenA = 0; | ||
52 | |||
53 | while (getLine(buf, sizeof(buf), f)) { | ||
54 | if (mapLenA == size) { | ||
55 | size *= 2; | ||
56 | mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); | ||
57 | } | ||
58 | if (sscanf(buf, "%x", &u) == 1) { | ||
59 | mapA[mapLenA] = u; | ||
60 | } else { | ||
61 | error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection", | ||
62 | (int)(mapLenA + 1), collectionA->getCString()); | ||
63 | mapA[mapLenA] = 0; | ||
64 | } | ||
65 | ++mapLenA; | ||
66 | } | ||
67 | |||
68 | ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue, | ||
69 | NULL, 0); | ||
70 | gfree(mapA); | ||
71 | return ctu; | ||
72 | } | ||
73 | |||
74 | CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { | ||
75 | return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); | ||
76 | } | ||
77 | |||
78 | static char *getLineFromString(char *buf, int size, char **s) { | ||
79 | char c; | ||
80 | int i; | ||
81 | |||
82 | i = 0; | ||
83 | while (i < size - 1 && **s) { | ||
84 | buf[i++] = c = *(*s)++; | ||
85 | if (c == '\x0a') { | ||
86 | break; | ||
87 | } | ||
88 | if (c == '\x0d') { | ||
89 | if (**s == '\x0a' && i < size - 1) { | ||
90 | buf[i++] = '\x0a'; | ||
91 | ++*s; | ||
92 | } | ||
93 | break; | ||
94 | } | ||
95 | } | ||
96 | buf[i] = '\0'; | ||
97 | if (i == 0) { | ||
98 | return NULL; | ||
99 | } | ||
100 | return buf; | ||
101 | } | ||
102 | |||
103 | CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { | ||
104 | CharCodeToUnicode *ctu; | ||
105 | char *p; | ||
106 | |||
107 | ctu = new CharCodeToUnicode(NULL); | ||
108 | p = buf->getCString(); | ||
109 | ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, | ||
110 | &p, nBits); | ||
111 | return ctu; | ||
112 | } | ||
113 | |||
114 | void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | ||
115 | void *data, int nBits) { | ||
116 | char buf[256]; | ||
117 | GBool inBFChar, inBFRange; | ||
118 | char *tok1, *tok2, *tok3; | ||
119 | int nDigits, n1, n2, n3; | ||
120 | CharCode oldLen, i; | ||
121 | CharCode code1, code2; | ||
122 | Unicode u; | ||
123 | char uHex[5]; | ||
124 | int j; | ||
125 | GString *name; | ||
126 | FILE *f; | ||
127 | |||
128 | nDigits = nBits / 4; | ||
129 | inBFChar = inBFRange = gFalse; | ||
130 | while ((*getLineFunc)(buf, sizeof(buf), data)) { | ||
131 | tok1 = strtok(buf, " \t\r\n"); | ||
132 | if (!tok1 || tok1[0] == '%') { | ||
133 | continue; | ||
134 | } | ||
135 | tok2 = strtok(NULL, " \t\r\n"); | ||
136 | tok3 = strtok(NULL, " \t\r\n"); | ||
137 | if (inBFChar) { | ||
138 | if (!strcmp(tok1, "endbfchar")) { | ||
139 | inBFChar = gFalse; | ||
140 | } else if (tok2) { | ||
141 | n1 = strlen(tok1); | ||
142 | n2 = strlen(tok2); | ||
143 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && | ||
144 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { | ||
145 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | ||
146 | continue; | ||
147 | } | ||
148 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; | ||
149 | if (sscanf(tok1 + 1, "%x", &code1) != 1) { | ||
150 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | ||
151 | continue; | ||
152 | } | ||
153 | if (code1 >= mapLen) { | ||
154 | oldLen = mapLen; | ||
155 | mapLen = (code1 + 256) & ~255; | ||
156 | map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); | ||
157 | for (i = oldLen; i < mapLen; ++i) { | ||
158 | map[i] = 0; | ||
159 | } | ||
160 | } | ||
161 | if (n2 == 6) { | ||
162 | if (sscanf(tok2 + 1, "%x", &u) != 1) { | ||
163 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | ||
164 | continue; | ||
165 | } | ||
166 | map[code1] = u; | ||
167 | } else { | ||
168 | map[code1] = 0; | ||
169 | if (sMapLen == sMapSize) { | ||
170 | sMapSize += 8; | ||
171 | sMap = (CharCodeToUnicodeString *) | ||
172 | grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); | ||
173 | } | ||
174 | sMap[sMapLen].c = code1; | ||
175 | sMap[sMapLen].len = (n2 - 2) / 4; | ||
176 | for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { | ||
177 | strncpy(uHex, tok2 + 1 + j*4, 4); | ||
178 | uHex[4] = '\0'; | ||
179 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { | ||
180 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | ||
181 | } | ||
182 | } | ||
183 | ++sMapLen; | ||
184 | } | ||
185 | } else { | ||
186 | error(-1, "Illegal bfchar block in ToUnicode CMap"); | ||
187 | } | ||
188 | } else if (inBFRange) { | ||
189 | if (!strcmp(tok1, "endbfrange")) { | ||
190 | inBFRange = gFalse; | ||
191 | } else if (tok2 && tok3) { | ||
192 | n1 = strlen(tok1); | ||
193 | n2 = strlen(tok2); | ||
194 | n3 = strlen(tok3); | ||
195 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && | ||
196 | n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && | ||
197 | tok3[0] == '<' && tok3[n3 - 1] == '>')) { | ||
198 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | ||
199 | continue; | ||
200 | } | ||
201 | tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; | ||
202 | if (sscanf(tok1 + 1, "%x", &code1) != 1 || | ||
203 | sscanf(tok2 + 1, "%x", &code2) != 1) { | ||
204 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | ||
205 | continue; | ||
206 | } | ||
207 | if (code2 >= mapLen) { | ||
208 | oldLen = mapLen; | ||
209 | mapLen = (code2 + 256) & ~255; | ||
210 | map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); | ||
211 | for (i = oldLen; i < mapLen; ++i) { | ||
212 | map[i] = 0; | ||
213 | } | ||
214 | } | ||
215 | if (n3 == 6) { | ||
216 | if (sscanf(tok3 + 1, "%x", &u) != 1) { | ||
217 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | ||
218 | continue; | ||
219 | } | ||
220 | for (; code1 <= code2; ++code1) { | ||
221 | map[code1] = u++; | ||
222 | } | ||
223 | } else { | ||
224 | if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) { | ||
225 | sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7; | ||
226 | sMap = (CharCodeToUnicodeString *) | ||
227 | grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); | ||
228 | } | ||
229 | for (i = 0; code1 <= code2; ++code1, ++i) { | ||
230 | map[code1] = 0; | ||
231 | sMap[sMapLen].c = code1; | ||
232 | sMap[sMapLen].len = (n3 - 2) / 4; | ||
233 | for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { | ||
234 | strncpy(uHex, tok3 + 1 + j*4, 4); | ||
235 | uHex[4] = '\0'; | ||
236 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { | ||
237 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | ||
238 | } | ||
239 | } | ||
240 | sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; | ||
241 | ++sMapLen; | ||
242 | } | ||
243 | } | ||
244 | } else { | ||
245 | error(-1, "Illegal bfrange block in ToUnicode CMap"); | ||
246 | } | ||
247 | } else if (tok2 && !strcmp(tok2, "usecmap")) { | ||
248 | if (tok1[0] == '/') { | ||
249 | name = new GString(tok1 + 1); | ||
250 | if ((f = globalParams->findToUnicodeFile(name))) { | ||
251 | parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits); | ||
252 | fclose(f); | ||
253 | } else { | ||
254 | error(-1, "Couldn't find ToUnicode CMap file for '%s'", | ||
255 | name->getCString()); | ||
256 | } | ||
257 | delete name; | ||
258 | } | ||
259 | } else if (tok2 && !strcmp(tok2, "beginbfchar")) { | ||
260 | inBFChar = gTrue; | ||
261 | } else if (tok2 && !strcmp(tok2, "beginbfrange")) { | ||
262 | inBFRange = gTrue; | ||
263 | } | ||
264 | } | ||
265 | } | ||
266 | |||
267 | CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { | ||
268 | CharCode i; | ||
269 | |||
270 | collection = collectionA; | ||
271 | mapLen = 256; | ||
272 | map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); | ||
273 | for (i = 0; i < mapLen; ++i) { | ||
274 | map[i] = 0; | ||
275 | } | ||
276 | sMap = NULL; | ||
277 | sMapLen = sMapSize = 0; | ||
278 | refCnt = 1; | ||
279 | } | ||
280 | |||
281 | CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA, | ||
282 | CharCode mapLenA, GBool copyMap, | ||
283 | CharCodeToUnicodeString *sMapA, | ||
284 | int sMapLenA) { | ||
285 | collection = collectionA; | ||
286 | mapLen = mapLenA; | ||
287 | if (copyMap) { | ||
288 | map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); | ||
289 | memcpy(map, mapA, mapLen * sizeof(Unicode)); | ||
290 | } else { | ||
291 | map = mapA; | ||
292 | } | ||
293 | sMap = sMapA; | ||
294 | sMapLen = sMapSize = sMapLenA; | ||
295 | refCnt = 1; | ||
296 | } | ||
297 | |||
298 | CharCodeToUnicode::~CharCodeToUnicode() { | ||
299 | if (collection) { | ||
300 | delete collection; | ||
301 | } | ||
302 | gfree(map); | ||
303 | if (sMap) { | ||
304 | gfree(sMap); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | void CharCodeToUnicode::incRefCnt() { | ||
309 | ++refCnt; | ||
310 | } | ||
311 | |||
312 | void CharCodeToUnicode::decRefCnt() { | ||
313 | if (--refCnt == 0) { | ||
314 | delete this; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | GBool CharCodeToUnicode::match(GString *collectionA) { | ||
319 | return collection && !collection->cmp(collectionA); | ||
320 | } | ||
321 | |||
322 | int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { | ||
323 | int i, j; | ||
324 | |||
325 | if (c >= mapLen) { | ||
326 | return 0; | ||
327 | } | ||
328 | if (map[c]) { | ||
329 | u[0] = map[c]; | ||
330 | return 1; | ||
331 | } | ||
332 | for (i = 0; i < sMapLen; ++i) { | ||
333 | if (sMap[i].c == c) { | ||
334 | for (j = 0; j < sMap[i].len && j < size; ++j) { | ||
335 | u[j] = sMap[i].u[j]; | ||
336 | } | ||
337 | return j; | ||
338 | } | ||
339 | } | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | //------------------------------------------------------------------------ | ||
344 | |||
345 | CIDToUnicodeCache::CIDToUnicodeCache() { | ||
346 | int i; | ||
347 | |||
348 | for (i = 0; i < cidToUnicodeCacheSize; ++i) { | ||
349 | cache[i] = NULL; | ||
350 | } | ||
351 | } | ||
352 | |||
353 | CIDToUnicodeCache::~CIDToUnicodeCache() { | ||
354 | int i; | ||
355 | |||
356 | for (i = 0; i < cidToUnicodeCacheSize; ++i) { | ||
357 | if (cache[i]) { | ||
358 | cache[i]->decRefCnt(); | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | |||
363 | CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) { | ||
364 | CharCodeToUnicode *ctu; | ||
365 | int i, j; | ||
366 | |||
367 | if (cache[0] && cache[0]->match(collection)) { | ||
368 | cache[0]->incRefCnt(); | ||
369 | return cache[0]; | ||
370 | } | ||
371 | for (i = 1; i < cidToUnicodeCacheSize; ++i) { | ||
372 | if (cache[i] && cache[i]->match(collection)) { | ||
373 | ctu = cache[i]; | ||
374 | for (j = i; j >= 1; --j) { | ||
375 | cache[j] = cache[j - 1]; | ||
376 | } | ||
377 | cache[0] = ctu; | ||
378 | ctu->incRefCnt(); | ||
379 | return ctu; | ||
380 | } | ||
381 | } | ||
382 | if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) { | ||
383 | if (cache[cidToUnicodeCacheSize - 1]) { | ||
384 | cache[cidToUnicodeCacheSize - 1]->decRefCnt(); | ||
385 | } | ||
386 | for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) { | ||
387 | cache[j] = cache[j - 1]; | ||
388 | } | ||
389 | cache[0] = ctu; | ||
390 | ctu->incRefCnt(); | ||
391 | return ctu; | ||
392 | } | ||
393 | return NULL; | ||
394 | } | ||