summaryrefslogtreecommitdiff
path: root/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
Unidiff
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') (more/less context) (show whitespace changes)
-rw-r--r--noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc394
1 files changed, 394 insertions, 0 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
new file mode 100644
index 0000000..6793398
--- a/dev/null
+++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
@@ -0,0 +1,394 @@
1//========================================================================
2//
3// CharCodeToUnicode.cc
4//
5// Copyright 2001 Derek B. Noonburg
6//
7//========================================================================
8
9#ifdef __GNUC__
10#pragma implementation
11#endif
12
13#include <aconf.h>
14#include <stdio.h>
15#include <string.h>
16#include "gmem.h"
17#include "gfile.h"
18#include "GString.h"
19#include "Error.h"
20#include "GlobalParams.h"
21#include "CharCodeToUnicode.h"
22
23//------------------------------------------------------------------------
24
25#define maxUnicodeString 8
26
27struct CharCodeToUnicodeString {
28 CharCode c;
29 Unicode u[maxUnicodeString];
30 int len;
31};
32
33//------------------------------------------------------------------------
34
35CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
36 FILE *f;
37 Unicode *mapA;
38 CharCode size, mapLenA;
39 char buf[64];
40 Unicode u;
41 CharCodeToUnicode *ctu;
42
43 if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
44 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
45 collectionA->getCString());
46 return NULL;
47 }
48
49 size = 32768;
50 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
51 mapLenA = 0;
52
53 while (getLine(buf, sizeof(buf), f)) {
54 if (mapLenA == size) {
55 size *= 2;
56 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
57 }
58 if (sscanf(buf, "%x", &u) == 1) {
59 mapA[mapLenA] = u;
60 } else {
61 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
62 (int)(mapLenA + 1), collectionA->getCString());
63 mapA[mapLenA] = 0;
64 }
65 ++mapLenA;
66 }
67
68 ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
69 NULL, 0);
70 gfree(mapA);
71 return ctu;
72}
73
74CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
75 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
76}
77
78static char *getLineFromString(char *buf, int size, char **s) {
79 char c;
80 int i;
81
82 i = 0;
83 while (i < size - 1 && **s) {
84 buf[i++] = c = *(*s)++;
85 if (c == '\x0a') {
86 break;
87 }
88 if (c == '\x0d') {
89 if (**s == '\x0a' && i < size - 1) {
90 buf[i++] = '\x0a';
91 ++*s;
92 }
93 break;
94 }
95 }
96 buf[i] = '\0';
97 if (i == 0) {
98 return NULL;
99 }
100 return buf;
101}
102
103CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
104 CharCodeToUnicode *ctu;
105 char *p;
106
107 ctu = new CharCodeToUnicode(NULL);
108 p = buf->getCString();
109 ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString,
110 &p, nBits);
111 return ctu;
112}
113
114void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
115 void *data, int nBits) {
116 char buf[256];
117 GBool inBFChar, inBFRange;
118 char *tok1, *tok2, *tok3;
119 int nDigits, n1, n2, n3;
120 CharCode oldLen, i;
121 CharCode code1, code2;
122 Unicode u;
123 char uHex[5];
124 int j;
125 GString *name;
126 FILE *f;
127
128 nDigits = nBits / 4;
129 inBFChar = inBFRange = gFalse;
130 while ((*getLineFunc)(buf, sizeof(buf), data)) {
131 tok1 = strtok(buf, " \t\r\n");
132 if (!tok1 || tok1[0] == '%') {
133 continue;
134 }
135 tok2 = strtok(NULL, " \t\r\n");
136 tok3 = strtok(NULL, " \t\r\n");
137 if (inBFChar) {
138 if (!strcmp(tok1, "endbfchar")) {
139 inBFChar = gFalse;
140 } else if (tok2) {
141 n1 = strlen(tok1);
142 n2 = strlen(tok2);
143 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
144 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
145 error(-1, "Illegal line in bfchar block in ToUnicode CMap");
146 continue;
147 }
148 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
149 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
150 error(-1, "Illegal line in bfchar block in ToUnicode CMap");
151 continue;
152 }
153 if (code1 >= mapLen) {
154 oldLen = mapLen;
155 mapLen = (code1 + 256) & ~255;
156 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
157 for (i = oldLen; i < mapLen; ++i) {
158 map[i] = 0;
159 }
160 }
161 if (n2 == 6) {
162 if (sscanf(tok2 + 1, "%x", &u) != 1) {
163 error(-1, "Illegal line in bfchar block in ToUnicode CMap");
164 continue;
165 }
166 map[code1] = u;
167 } else {
168 map[code1] = 0;
169 if (sMapLen == sMapSize) {
170 sMapSize += 8;
171 sMap = (CharCodeToUnicodeString *)
172 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
173 }
174 sMap[sMapLen].c = code1;
175 sMap[sMapLen].len = (n2 - 2) / 4;
176 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
177 strncpy(uHex, tok2 + 1 + j*4, 4);
178 uHex[4] = '\0';
179 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
180 error(-1, "Illegal line in bfchar block in ToUnicode CMap");
181 }
182 }
183 ++sMapLen;
184 }
185 } else {
186 error(-1, "Illegal bfchar block in ToUnicode CMap");
187 }
188 } else if (inBFRange) {
189 if (!strcmp(tok1, "endbfrange")) {
190 inBFRange = gFalse;
191 } else if (tok2 && tok3) {
192 n1 = strlen(tok1);
193 n2 = strlen(tok2);
194 n3 = strlen(tok3);
195 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
196 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
197 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
198 error(-1, "Illegal line in bfrange block in ToUnicode CMap");
199 continue;
200 }
201 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
202 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
203 sscanf(tok2 + 1, "%x", &code2) != 1) {
204 error(-1, "Illegal line in bfrange block in ToUnicode CMap");
205 continue;
206 }
207 if (code2 >= mapLen) {
208 oldLen = mapLen;
209 mapLen = (code2 + 256) & ~255;
210 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
211 for (i = oldLen; i < mapLen; ++i) {
212 map[i] = 0;
213 }
214 }
215 if (n3 == 6) {
216 if (sscanf(tok3 + 1, "%x", &u) != 1) {
217 error(-1, "Illegal line in bfrange block in ToUnicode CMap");
218 continue;
219 }
220 for (; code1 <= code2; ++code1) {
221 map[code1] = u++;
222 }
223 } else {
224 if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
225 sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
226 sMap = (CharCodeToUnicodeString *)
227 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
228 }
229 for (i = 0; code1 <= code2; ++code1, ++i) {
230 map[code1] = 0;
231 sMap[sMapLen].c = code1;
232 sMap[sMapLen].len = (n3 - 2) / 4;
233 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
234 strncpy(uHex, tok3 + 1 + j*4, 4);
235 uHex[4] = '\0';
236 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
237 error(-1, "Illegal line in bfrange block in ToUnicode CMap");
238 }
239 }
240 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
241 ++sMapLen;
242 }
243 }
244 } else {
245 error(-1, "Illegal bfrange block in ToUnicode CMap");
246 }
247 } else if (tok2 && !strcmp(tok2, "usecmap")) {
248 if (tok1[0] == '/') {
249 name = new GString(tok1 + 1);
250 if ((f = globalParams->findToUnicodeFile(name))) {
251 parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits);
252 fclose(f);
253 } else {
254 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
255 name->getCString());
256 }
257 delete name;
258 }
259 } else if (tok2 && !strcmp(tok2, "beginbfchar")) {
260 inBFChar = gTrue;
261 } else if (tok2 && !strcmp(tok2, "beginbfrange")) {
262 inBFRange = gTrue;
263 }
264 }
265}
266
267CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
268 CharCode i;
269
270 collection = collectionA;
271 mapLen = 256;
272 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
273 for (i = 0; i < mapLen; ++i) {
274 map[i] = 0;
275 }
276 sMap = NULL;
277 sMapLen = sMapSize = 0;
278 refCnt = 1;
279}
280
281CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
282 CharCode mapLenA, GBool copyMap,
283 CharCodeToUnicodeString *sMapA,
284 int sMapLenA) {
285 collection = collectionA;
286 mapLen = mapLenA;
287 if (copyMap) {
288 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
289 memcpy(map, mapA, mapLen * sizeof(Unicode));
290 } else {
291 map = mapA;
292 }
293 sMap = sMapA;
294 sMapLen = sMapSize = sMapLenA;
295 refCnt = 1;
296}
297
298CharCodeToUnicode::~CharCodeToUnicode() {
299 if (collection) {
300 delete collection;
301 }
302 gfree(map);
303 if (sMap) {
304 gfree(sMap);
305 }
306}
307
308void CharCodeToUnicode::incRefCnt() {
309 ++refCnt;
310}
311
312void CharCodeToUnicode::decRefCnt() {
313 if (--refCnt == 0) {
314 delete this;
315 }
316}
317
318GBool CharCodeToUnicode::match(GString *collectionA) {
319 return collection && !collection->cmp(collectionA);
320}
321
322int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
323 int i, j;
324
325 if (c >= mapLen) {
326 return 0;
327 }
328 if (map[c]) {
329 u[0] = map[c];
330 return 1;
331 }
332 for (i = 0; i < sMapLen; ++i) {
333 if (sMap[i].c == c) {
334 for (j = 0; j < sMap[i].len && j < size; ++j) {
335 u[j] = sMap[i].u[j];
336 }
337 return j;
338 }
339 }
340 return 0;
341}
342
343//------------------------------------------------------------------------
344
345CIDToUnicodeCache::CIDToUnicodeCache() {
346 int i;
347
348 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
349 cache[i] = NULL;
350 }
351}
352
353CIDToUnicodeCache::~CIDToUnicodeCache() {
354 int i;
355
356 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
357 if (cache[i]) {
358 cache[i]->decRefCnt();
359 }
360 }
361}
362
363CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
364 CharCodeToUnicode *ctu;
365 int i, j;
366
367 if (cache[0] && cache[0]->match(collection)) {
368 cache[0]->incRefCnt();
369 return cache[0];
370 }
371 for (i = 1; i < cidToUnicodeCacheSize; ++i) {
372 if (cache[i] && cache[i]->match(collection)) {
373 ctu = cache[i];
374 for (j = i; j >= 1; --j) {
375 cache[j] = cache[j - 1];
376 }
377 cache[0] = ctu;
378 ctu->incRefCnt();
379 return ctu;
380 }
381 }
382 if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
383 if (cache[cidToUnicodeCacheSize - 1]) {
384 cache[cidToUnicodeCacheSize - 1]->decRefCnt();
385 }
386 for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
387 cache[j] = cache[j - 1];
388 }
389 cache[0] = ctu;
390 ctu->incRefCnt();
391 return ctu;
392 }
393 return NULL;
394}