Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/Lexer.cc') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/unsupported/qpdf/xpdf/Lexer.cc | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/Lexer.cc b/noncore/unsupported/qpdf/xpdf/Lexer.cc new file mode 100644 index 0000000..fff4bcb --- a/dev/null +++ b/noncore/unsupported/qpdf/xpdf/Lexer.cc | |||
@@ -0,0 +1,473 @@ | |||
1 | //======================================================================== | ||
2 | // | ||
3 | // Lexer.cc | ||
4 | // | ||
5 | // Copyright 1996 Derek B. Noonburg | ||
6 | // | ||
7 | //======================================================================== | ||
8 | |||
9 | #ifdef __GNUC__ | ||
10 | #pragma implementation | ||
11 | #endif | ||
12 | |||
13 | #include <aconf.h> | ||
14 | #include <stdlib.h> | ||
15 | #include <stddef.h> | ||
16 | #include <string.h> | ||
17 | #include <ctype.h> | ||
18 | #include "Lexer.h" | ||
19 | #include "Error.h" | ||
20 | |||
21 | //------------------------------------------------------------------------ | ||
22 | |||
23 | // A '1' in this array means the character is white space. A '1' or | ||
24 | // '2' means the character ends a name or command. | ||
25 | static char specialChars[256] = { | ||
26 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x | ||
27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x | ||
28 | 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x | ||
29 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x | ||
30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x | ||
31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x | ||
32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x | ||
33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x | ||
34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x | ||
35 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x | ||
36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax | ||
37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx | ||
38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx | ||
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx | ||
40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex | ||
41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx | ||
42 | }; | ||
43 | |||
44 | //------------------------------------------------------------------------ | ||
45 | // Lexer | ||
46 | //------------------------------------------------------------------------ | ||
47 | |||
48 | Lexer::Lexer(XRef *xref, Stream *str) { | ||
49 | Object obj; | ||
50 | |||
51 | curStr.initStream(str); | ||
52 | streams = new Array(xref); | ||
53 | streams->add(curStr.copy(&obj)); | ||
54 | strPtr = 0; | ||
55 | freeArray = gTrue; | ||
56 | curStr.streamReset(); | ||
57 | } | ||
58 | |||
59 | Lexer::Lexer(XRef *xref, Object *obj) { | ||
60 | Object obj2; | ||
61 | |||
62 | if (obj->isStream()) { | ||
63 | streams = new Array(xref); | ||
64 | freeArray = gTrue; | ||
65 | streams->add(obj->copy(&obj2)); | ||
66 | } else { | ||
67 | streams = obj->getArray(); | ||
68 | freeArray = gFalse; | ||
69 | } | ||
70 | strPtr = 0; | ||
71 | if (streams->getLength() > 0) { | ||
72 | streams->get(strPtr, &curStr); | ||
73 | curStr.streamReset(); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | Lexer::~Lexer() { | ||
78 | if (!curStr.isNone()) { | ||
79 | curStr.streamClose(); | ||
80 | curStr.free(); | ||
81 | } | ||
82 | if (freeArray) { | ||
83 | delete streams; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | int Lexer::getChar() { | ||
88 | int c; | ||
89 | |||
90 | c = EOF; | ||
91 | while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { | ||
92 | curStr.streamClose(); | ||
93 | curStr.free(); | ||
94 | ++strPtr; | ||
95 | if (strPtr < streams->getLength()) { | ||
96 | streams->get(strPtr, &curStr); | ||
97 | curStr.streamReset(); | ||
98 | } | ||
99 | } | ||
100 | return c; | ||
101 | } | ||
102 | |||
103 | int Lexer::lookChar() { | ||
104 | if (curStr.isNone()) { | ||
105 | return EOF; | ||
106 | } | ||
107 | return curStr.streamLookChar(); | ||
108 | } | ||
109 | |||
110 | Object *Lexer::getObj(Object *obj) { | ||
111 | char *p; | ||
112 | int c, c2; | ||
113 | GBool comment, neg, done; | ||
114 | int numParen; | ||
115 | int xi; | ||
116 | fouble xf, scale; | ||
117 | GString *s; | ||
118 | int n, m; | ||
119 | |||
120 | // skip whitespace and comments | ||
121 | comment = gFalse; | ||
122 | while (1) { | ||
123 | if ((c = getChar()) == EOF) { | ||
124 | return obj->initEOF(); | ||
125 | } | ||
126 | if (comment) { | ||
127 | if (c == '\r' || c == '\n') | ||
128 | comment = gFalse; | ||
129 | } else if (c == '%') { | ||
130 | comment = gTrue; | ||
131 | } else if (specialChars[c] != 1) { | ||
132 | break; | ||
133 | } | ||
134 | } | ||
135 | |||
136 | // start reading token | ||
137 | switch (c) { | ||
138 | |||
139 | // number | ||
140 | case '0': case '1': case '2': case '3': case '4': | ||
141 | case '5': case '6': case '7': case '8': case '9': | ||
142 | case '-': case '.': | ||
143 | neg = gFalse; | ||
144 | xi = 0; | ||
145 | if (c == '-') { | ||
146 | neg = gTrue; | ||
147 | } else if (c == '.') { | ||
148 | goto doReal; | ||
149 | } else { | ||
150 | xi = c - '0'; | ||
151 | } | ||
152 | while (1) { | ||
153 | c = lookChar(); | ||
154 | if (isdigit(c)) { | ||
155 | getChar(); | ||
156 | xi = xi * 10 + (c - '0'); | ||
157 | } else if (c == '.') { | ||
158 | getChar(); | ||
159 | goto doReal; | ||
160 | } else { | ||
161 | break; | ||
162 | } | ||
163 | } | ||
164 | if (neg) | ||
165 | xi = -xi; | ||
166 | obj->initInt(xi); | ||
167 | break; | ||
168 | doReal: | ||
169 | xf = xi; | ||
170 | scale = 0.1; | ||
171 | while (1) { | ||
172 | c = lookChar(); | ||
173 | if (!isdigit(c)) { | ||
174 | break; | ||
175 | } | ||
176 | getChar(); | ||
177 | xf = xf + scale * (c - '0'); | ||
178 | scale *= 0.1; | ||
179 | } | ||
180 | if (neg) | ||
181 | xf = -xf; | ||
182 | obj->initReal(xf); | ||
183 | break; | ||
184 | |||
185 | // string | ||
186 | case '(': | ||
187 | p = tokBuf; | ||
188 | n = 0; | ||
189 | numParen = 1; | ||
190 | done = gFalse; | ||
191 | s = NULL; | ||
192 | do { | ||
193 | c2 = EOF; | ||
194 | switch (c = getChar()) { | ||
195 | |||
196 | case EOF: | ||
197 | #if 0 | ||
198 | // This breaks some PDF files, e.g., ones from Photoshop. | ||
199 | case '\r': | ||
200 | case '\n': | ||
201 | #endif | ||
202 | error(getPos(), "Unterminated string"); | ||
203 | done = gTrue; | ||
204 | break; | ||
205 | |||
206 | case '(': | ||
207 | ++numParen; | ||
208 | c2 = c; | ||
209 | break; | ||
210 | |||
211 | case ')': | ||
212 | if (--numParen == 0) { | ||
213 | done = gTrue; | ||
214 | } else { | ||
215 | c2 = c; | ||
216 | } | ||
217 | break; | ||
218 | |||
219 | case '\\': | ||
220 | switch (c = getChar()) { | ||
221 | case 'n': | ||
222 | c2 = '\n'; | ||
223 | break; | ||
224 | case 'r': | ||
225 | c2 = '\r'; | ||
226 | break; | ||
227 | case 't': | ||
228 | c2 = '\t'; | ||
229 | break; | ||
230 | case 'b': | ||
231 | c2 = '\b'; | ||
232 | break; | ||
233 | case 'f': | ||
234 | c2 = '\f'; | ||
235 | break; | ||
236 | case '\\': | ||
237 | case '(': | ||
238 | case ')': | ||
239 | c2 = c; | ||
240 | break; | ||
241 | case '0': case '1': case '2': case '3': | ||
242 | case '4': case '5': case '6': case '7': | ||
243 | c2 = c - '0'; | ||
244 | c = lookChar(); | ||
245 | if (c >= '0' && c <= '7') { | ||
246 | getChar(); | ||
247 | c2 = (c2 << 3) + (c - '0'); | ||
248 | c = lookChar(); | ||
249 | if (c >= '0' && c <= '7') { | ||
250 | getChar(); | ||
251 | c2 = (c2 << 3) + (c - '0'); | ||
252 | } | ||
253 | } | ||
254 | break; | ||
255 | case '\r': | ||
256 | c = lookChar(); | ||
257 | if (c == '\n') { | ||
258 | getChar(); | ||
259 | } | ||
260 | break; | ||
261 | case '\n': | ||
262 | break; | ||
263 | case EOF: | ||
264 | error(getPos(), "Unterminated string"); | ||
265 | done = gTrue; | ||
266 | break; | ||
267 | default: | ||
268 | c2 = c; | ||
269 | break; | ||
270 | } | ||
271 | break; | ||
272 | |||
273 | default: | ||
274 | c2 = c; | ||
275 | break; | ||
276 | } | ||
277 | |||
278 | if (c2 != EOF) { | ||
279 | if (n == tokBufSize) { | ||
280 | if (!s) | ||
281 | s = new GString(tokBuf, tokBufSize); | ||
282 | else | ||
283 | s->append(tokBuf, tokBufSize); | ||
284 | p = tokBuf; | ||
285 | n = 0; | ||
286 | } | ||
287 | *p++ = (char)c2; | ||
288 | ++n; | ||
289 | } | ||
290 | } while (!done); | ||
291 | if (!s) | ||
292 | s = new GString(tokBuf, n); | ||
293 | else | ||
294 | s->append(tokBuf, n); | ||
295 | obj->initString(s); | ||
296 | break; | ||
297 | |||
298 | // name | ||
299 | case '/': | ||
300 | p = tokBuf; | ||
301 | n = 0; | ||
302 | while ((c = lookChar()) != EOF && !specialChars[c]) { | ||
303 | getChar(); | ||
304 | if (c == '#') { | ||
305 | c2 = lookChar(); | ||
306 | if (c2 >= '0' && c2 <= '9') { | ||
307 | c = c2 - '0'; | ||
308 | } else if (c2 >= 'A' && c2 <= 'F') { | ||
309 | c = c2 - 'A' + 10; | ||
310 | } else if (c2 >= 'a' && c2 <= 'f') { | ||
311 | c = c2 - 'a' + 10; | ||
312 | } else { | ||
313 | goto notEscChar; | ||
314 | } | ||
315 | getChar(); | ||
316 | c <<= 4; | ||
317 | c2 = getChar(); | ||
318 | if (c2 >= '0' && c2 <= '9') { | ||
319 | c += c2 - '0'; | ||
320 | } else if (c2 >= 'A' && c2 <= 'F') { | ||
321 | c += c2 - 'A' + 10; | ||
322 | } else if (c2 >= 'a' && c2 <= 'f') { | ||
323 | c += c2 - 'a' + 10; | ||
324 | } else { | ||
325 | error(getPos(), "Illegal digit in hex char in name"); | ||
326 | } | ||
327 | } | ||
328 | notEscChar: | ||
329 | if (++n == tokBufSize) { | ||
330 | error(getPos(), "Name token too long"); | ||
331 | break; | ||
332 | } | ||
333 | *p++ = c; | ||
334 | } | ||
335 | *p = '\0'; | ||
336 | obj->initName(tokBuf); | ||
337 | break; | ||
338 | |||
339 | // array punctuation | ||
340 | case '[': | ||
341 | case ']': | ||
342 | tokBuf[0] = c; | ||
343 | tokBuf[1] = '\0'; | ||
344 | obj->initCmd(tokBuf); | ||
345 | break; | ||
346 | |||
347 | // hex string or dict punctuation | ||
348 | case '<': | ||
349 | c = lookChar(); | ||
350 | |||
351 | // dict punctuation | ||
352 | if (c == '<') { | ||
353 | getChar(); | ||
354 | tokBuf[0] = tokBuf[1] = '<'; | ||
355 | tokBuf[2] = '\0'; | ||
356 | obj->initCmd(tokBuf); | ||
357 | |||
358 | // hex string | ||
359 | } else { | ||
360 | p = tokBuf; | ||
361 | m = n = 0; | ||
362 | c2 = 0; | ||
363 | s = NULL; | ||
364 | while (1) { | ||
365 | c = getChar(); | ||
366 | if (c == '>') { | ||
367 | break; | ||
368 | } else if (c == EOF) { | ||
369 | error(getPos(), "Unterminated hex string"); | ||
370 | break; | ||
371 | } else if (specialChars[c] != 1) { | ||
372 | c2 = c2 << 4; | ||
373 | if (c >= '0' && c <= '9') | ||
374 | c2 += c - '0'; | ||
375 | else if (c >= 'A' && c <= 'F') | ||
376 | c2 += c - 'A' + 10; | ||
377 | else if (c >= 'a' && c <= 'f') | ||
378 | c2 += c - 'a' + 10; | ||
379 | else | ||
380 | error(getPos(), "Illegal character <%02x> in hex string", c); | ||
381 | if (++m == 2) { | ||
382 | if (n == tokBufSize) { | ||
383 | if (!s) | ||
384 | s = new GString(tokBuf, tokBufSize); | ||
385 | else | ||
386 | s->append(tokBuf, tokBufSize); | ||
387 | p = tokBuf; | ||
388 | n = 0; | ||
389 | } | ||
390 | *p++ = (char)c2; | ||
391 | ++n; | ||
392 | c2 = 0; | ||
393 | m = 0; | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | if (!s) | ||
398 | s = new GString(tokBuf, n); | ||
399 | else | ||
400 | s->append(tokBuf, n); | ||
401 | if (m == 1) | ||
402 | s->append((char)(c2 << 4)); | ||
403 | obj->initString(s); | ||
404 | } | ||
405 | break; | ||
406 | |||
407 | // dict punctuation | ||
408 | case '>': | ||
409 | c = lookChar(); | ||
410 | if (c == '>') { | ||
411 | getChar(); | ||
412 | tokBuf[0] = tokBuf[1] = '>'; | ||
413 | tokBuf[2] = '\0'; | ||
414 | obj->initCmd(tokBuf); | ||
415 | } else { | ||
416 | error(getPos(), "Illegal character '>'"); | ||
417 | obj->initError(); | ||
418 | } | ||
419 | break; | ||
420 | |||
421 | // error | ||
422 | case ')': | ||
423 | case '{': | ||
424 | case '}': | ||
425 | error(getPos(), "Illegal character '%c'", c); | ||
426 | obj->initError(); | ||
427 | break; | ||
428 | |||
429 | // command | ||
430 | default: | ||
431 | p = tokBuf; | ||
432 | *p++ = c; | ||
433 | n = 1; | ||
434 | while ((c = lookChar()) != EOF && !specialChars[c]) { | ||
435 | getChar(); | ||
436 | if (++n == tokBufSize) { | ||
437 | error(getPos(), "Command token too long"); | ||
438 | break; | ||
439 | } | ||
440 | *p++ = c; | ||
441 | } | ||
442 | *p = '\0'; | ||
443 | if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) { | ||
444 | obj->initBool(gTrue); | ||
445 | } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) { | ||
446 | obj->initBool(gFalse); | ||
447 | } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) { | ||
448 | obj->initNull(); | ||
449 | } else { | ||
450 | obj->initCmd(tokBuf); | ||
451 | } | ||
452 | break; | ||
453 | } | ||
454 | |||
455 | return obj; | ||
456 | } | ||
457 | |||
458 | void Lexer::skipToNextLine() { | ||
459 | int c; | ||
460 | |||
461 | while (1) { | ||
462 | c = getChar(); | ||
463 | if (c == EOF || c == '\n') { | ||
464 | return; | ||
465 | } | ||
466 | if (c == '\r') { | ||
467 | if ((c = lookChar()) == '\n') { | ||
468 | getChar(); | ||
469 | } | ||
470 | return; | ||
471 | } | ||
472 | } | ||
473 | } | ||