author | sandman <sandman> | 2002-05-23 23:51:52 (UTC) |
---|---|---|
committer | sandman <sandman> | 2002-05-23 23:51:52 (UTC) |
commit | 2f3bb7b07f833273d966d41813e68bfe8b9d8d76 (patch) (unidiff) | |
tree | 00beb1bd9e7f4ba79e22334a0d258269b28f4564 /noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | |
parent | 6e82b45dd416ceeba78765717b700e853c96a137 (diff) | |
download | opie-2f3bb7b07f833273d966d41813e68bfe8b9d8d76.zip opie-2f3bb7b07f833273d966d41813e68bfe8b9d8d76.tar.gz opie-2f3bb7b07f833273d966d41813e68bfe8b9d8d76.tar.bz2 |
Port to xpdf 1.01
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | 160 |
1 files changed, 78 insertions, 82 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc index 6793398..912981e 100644 --- a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc +++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc | |||
@@ -2,7 +2,7 @@ | |||
2 | // | 2 | // |
3 | // CharCodeToUnicode.cc | 3 | // CharCodeToUnicode.cc |
4 | // | 4 | // |
5 | // Copyright 2001 Derek B. Noonburg | 5 | // Copyright 2001-2002 Glyph & Cog, LLC |
6 | // | 6 | // |
7 | //======================================================================== | 7 | //======================================================================== |
8 | 8 | ||
@@ -18,6 +18,7 @@ | |||
18 | #include "GString.h" | 18 | #include "GString.h" |
19 | #include "Error.h" | 19 | #include "Error.h" |
20 | #include "GlobalParams.h" | 20 | #include "GlobalParams.h" |
21 | #include "PSTokenizer.h" | ||
21 | #include "CharCodeToUnicode.h" | 22 | #include "CharCodeToUnicode.h" |
22 | 23 | ||
23 | //------------------------------------------------------------------------ | 24 | //------------------------------------------------------------------------ |
@@ -32,6 +33,26 @@ struct CharCodeToUnicodeString { | |||
32 | 33 | ||
33 | //------------------------------------------------------------------------ | 34 | //------------------------------------------------------------------------ |
34 | 35 | ||
36 | static int getCharFromString(void *data) { | ||
37 | char *p; | ||
38 | int c; | ||
39 | |||
40 | p = *(char **)data; | ||
41 | if (*p) { | ||
42 | c = *p++; | ||
43 | *(char **)data = p; | ||
44 | } else { | ||
45 | c = EOF; | ||
46 | } | ||
47 | return c; | ||
48 | } | ||
49 | |||
50 | static int getCharFromFile(void *data) { | ||
51 | return fgetc((FILE *)data); | ||
52 | } | ||
53 | |||
54 | //------------------------------------------------------------------------ | ||
55 | |||
35 | CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { | 56 | CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { |
36 | FILE *f; | 57 | FILE *f; |
37 | Unicode *mapA; | 58 | Unicode *mapA; |
@@ -75,47 +96,20 @@ CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { | |||
75 | return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); | 96 | return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); |
76 | } | 97 | } |
77 | 98 | ||
78 | static char *getLineFromString(char *buf, int size, char **s) { | ||
79 | char c; | ||
80 | int i; | ||
81 | |||
82 | i = 0; | ||
83 | while (i < size - 1 && **s) { | ||
84 | buf[i++] = c = *(*s)++; | ||
85 | if (c == '\x0a') { | ||
86 | break; | ||
87 | } | ||
88 | if (c == '\x0d') { | ||
89 | if (**s == '\x0a' && i < size - 1) { | ||
90 | buf[i++] = '\x0a'; | ||
91 | ++*s; | ||
92 | } | ||
93 | break; | ||
94 | } | ||
95 | } | ||
96 | buf[i] = '\0'; | ||
97 | if (i == 0) { | ||
98 | return NULL; | ||
99 | } | ||
100 | return buf; | ||
101 | } | ||
102 | |||
103 | CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { | 99 | CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { |
104 | CharCodeToUnicode *ctu; | 100 | CharCodeToUnicode *ctu; |
105 | char *p; | 101 | char *p; |
106 | 102 | ||
107 | ctu = new CharCodeToUnicode(NULL); | 103 | ctu = new CharCodeToUnicode(NULL); |
108 | p = buf->getCString(); | 104 | p = buf->getCString(); |
109 | ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, | 105 | ctu->parseCMap1(&getCharFromString, &p, nBits); |
110 | &p, nBits); | ||
111 | return ctu; | 106 | return ctu; |
112 | } | 107 | } |
113 | 108 | ||
114 | void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | 109 | void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, |
115 | void *data, int nBits) { | 110 | int nBits) { |
116 | char buf[256]; | 111 | PSTokenizer *pst; |
117 | GBool inBFChar, inBFRange; | 112 | char tok1[256], tok2[256], tok3[256]; |
118 | char *tok1, *tok2, *tok3; | ||
119 | int nDigits, n1, n2, n3; | 113 | int nDigits, n1, n2, n3; |
120 | CharCode oldLen, i; | 114 | CharCode oldLen, i; |
121 | CharCode code1, code2; | 115 | CharCode code1, code2; |
@@ -126,28 +120,40 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | |||
126 | FILE *f; | 120 | FILE *f; |
127 | 121 | ||
128 | nDigits = nBits / 4; | 122 | nDigits = nBits / 4; |
129 | inBFChar = inBFRange = gFalse; | 123 | pst = new PSTokenizer(getCharFunc, data); |
130 | while ((*getLineFunc)(buf, sizeof(buf), data)) { | 124 | pst->getToken(tok1, sizeof(tok1), &n1); |
131 | tok1 = strtok(buf, " \t\r\n"); | 125 | while (pst->getToken(tok2, sizeof(tok2), &n2)) { |
132 | if (!tok1 || tok1[0] == '%') { | 126 | if (!strcmp(tok2, "usecmap")) { |
133 | continue; | 127 | if (tok1[0] == '/') { |
134 | } | 128 | name = new GString(tok1 + 1); |
135 | tok2 = strtok(NULL, " \t\r\n"); | 129 | if ((f = globalParams->findToUnicodeFile(name))) { |
136 | tok3 = strtok(NULL, " \t\r\n"); | 130 | parseCMap1(&getCharFromFile, f, nBits); |
137 | if (inBFChar) { | 131 | fclose(f); |
138 | if (!strcmp(tok1, "endbfchar")) { | 132 | } else { |
139 | inBFChar = gFalse; | 133 | error(-1, "Couldn't find ToUnicode CMap file for '%s'", |
140 | } else if (tok2) { | 134 | name->getCString()); |
141 | n1 = strlen(tok1); | 135 | } |
142 | n2 = strlen(tok2); | 136 | delete name; |
137 | } | ||
138 | pst->getToken(tok1, sizeof(tok1), &n1); | ||
139 | } else if (!strcmp(tok2, "beginbfchar")) { | ||
140 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { | ||
141 | if (!strcmp(tok1, "endbfchar")) { | ||
142 | break; | ||
143 | } | ||
144 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || | ||
145 | !strcmp(tok2, "endbfchar")) { | ||
146 | error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); | ||
147 | break; | ||
148 | } | ||
143 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && | 149 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
144 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { | 150 | tok2[0] == '<' && tok2[n2 - 1] == '>')) { |
145 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | 151 | error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
146 | continue; | 152 | continue; |
147 | } | 153 | } |
148 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; | 154 | tok1[n1 - 1] = tok2[n2 - 1] = '\0'; |
149 | if (sscanf(tok1 + 1, "%x", &code1) != 1) { | 155 | if (sscanf(tok1 + 1, "%x", &code1) != 1) { |
150 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | 156 | error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
151 | continue; | 157 | continue; |
152 | } | 158 | } |
153 | if (code1 >= mapLen) { | 159 | if (code1 >= mapLen) { |
@@ -160,7 +166,7 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | |||
160 | } | 166 | } |
161 | if (n2 == 6) { | 167 | if (n2 == 6) { |
162 | if (sscanf(tok2 + 1, "%x", &u) != 1) { | 168 | if (sscanf(tok2 + 1, "%x", &u) != 1) { |
163 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | 169 | error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
164 | continue; | 170 | continue; |
165 | } | 171 | } |
166 | map[code1] = u; | 172 | map[code1] = u; |
@@ -177,31 +183,35 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | |||
177 | strncpy(uHex, tok2 + 1 + j*4, 4); | 183 | strncpy(uHex, tok2 + 1 + j*4, 4); |
178 | uHex[4] = '\0'; | 184 | uHex[4] = '\0'; |
179 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { | 185 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { |
180 | error(-1, "Illegal line in bfchar block in ToUnicode CMap"); | 186 | error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); |
181 | } | 187 | } |
182 | } | 188 | } |
183 | ++sMapLen; | 189 | ++sMapLen; |
184 | } | 190 | } |
185 | } else { | ||
186 | error(-1, "Illegal bfchar block in ToUnicode CMap"); | ||
187 | } | 191 | } |
188 | } else if (inBFRange) { | 192 | pst->getToken(tok1, sizeof(tok1), &n1); |
189 | if (!strcmp(tok1, "endbfrange")) { | 193 | } else if (!strcmp(tok2, "beginbfrange")) { |
190 | inBFRange = gFalse; | 194 | while (pst->getToken(tok1, sizeof(tok1), &n1)) { |
191 | } else if (tok2 && tok3) { | 195 | if (!strcmp(tok1, "endbfrange")) { |
192 | n1 = strlen(tok1); | 196 | break; |
193 | n2 = strlen(tok2); | 197 | } |
194 | n3 = strlen(tok3); | 198 | if (!pst->getToken(tok2, sizeof(tok2), &n2) || |
199 | !strcmp(tok2, "endbfrange") || | ||
200 | !pst->getToken(tok3, sizeof(tok3), &n3) || | ||
201 | !strcmp(tok3, "endbfrange")) { | ||
202 | error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); | ||
203 | break; | ||
204 | } | ||
195 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && | 205 | if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && |
196 | n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && | 206 | n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && |
197 | tok3[0] == '<' && tok3[n3 - 1] == '>')) { | 207 | tok3[0] == '<' && tok3[n3 - 1] == '>')) { |
198 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | 208 | error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
199 | continue; | 209 | continue; |
200 | } | 210 | } |
201 | tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; | 211 | tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; |
202 | if (sscanf(tok1 + 1, "%x", &code1) != 1 || | 212 | if (sscanf(tok1 + 1, "%x", &code1) != 1 || |
203 | sscanf(tok2 + 1, "%x", &code2) != 1) { | 213 | sscanf(tok2 + 1, "%x", &code2) != 1) { |
204 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | 214 | error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
205 | continue; | 215 | continue; |
206 | } | 216 | } |
207 | if (code2 >= mapLen) { | 217 | if (code2 >= mapLen) { |
@@ -214,7 +224,7 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | |||
214 | } | 224 | } |
215 | if (n3 == 6) { | 225 | if (n3 == 6) { |
216 | if (sscanf(tok3 + 1, "%x", &u) != 1) { | 226 | if (sscanf(tok3 + 1, "%x", &u) != 1) { |
217 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | 227 | error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
218 | continue; | 228 | continue; |
219 | } | 229 | } |
220 | for (; code1 <= code2; ++code1) { | 230 | for (; code1 <= code2; ++code1) { |
@@ -234,34 +244,20 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), | |||
234 | strncpy(uHex, tok3 + 1 + j*4, 4); | 244 | strncpy(uHex, tok3 + 1 + j*4, 4); |
235 | uHex[4] = '\0'; | 245 | uHex[4] = '\0'; |
236 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { | 246 | if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { |
237 | error(-1, "Illegal line in bfrange block in ToUnicode CMap"); | 247 | error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); |
238 | } | 248 | } |
239 | } | 249 | } |
240 | sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; | 250 | sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; |
241 | ++sMapLen; | 251 | ++sMapLen; |
242 | } | 252 | } |
243 | } | 253 | } |
244 | } else { | ||
245 | error(-1, "Illegal bfrange block in ToUnicode CMap"); | ||
246 | } | 254 | } |
247 | } else if (tok2 && !strcmp(tok2, "usecmap")) { | 255 | pst->getToken(tok1, sizeof(tok1), &n1); |
248 | if (tok1[0] == '/') { | 256 | } else { |
249 | name = new GString(tok1 + 1); | 257 | strcpy(tok1, tok2); |
250 | if ((f = globalParams->findToUnicodeFile(name))) { | ||
251 | parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits); | ||
252 | fclose(f); | ||
253 | } else { | ||
254 | error(-1, "Couldn't find ToUnicode CMap file for '%s'", | ||
255 | name->getCString()); | ||
256 | } | ||
257 | delete name; | ||
258 | } | ||
259 | } else if (tok2 && !strcmp(tok2, "beginbfchar")) { | ||
260 | inBFChar = gTrue; | ||
261 | } else if (tok2 && !strcmp(tok2, "beginbfrange")) { | ||
262 | inBFRange = gTrue; | ||
263 | } | 258 | } |
264 | } | 259 | } |
260 | delete pst; | ||
265 | } | 261 | } |
266 | 262 | ||
267 | CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { | 263 | CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { |