summaryrefslogtreecommitdiff
path: root/noncore/unsupported/qpdf/xpdf/Lexer.cc
Unidiff
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/Lexer.cc') (more/less context) (show whitespace changes)
-rw-r--r--noncore/unsupported/qpdf/xpdf/Lexer.cc473
1 files changed, 473 insertions, 0 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/Lexer.cc b/noncore/unsupported/qpdf/xpdf/Lexer.cc
new file mode 100644
index 0000000..fff4bcb
--- a/dev/null
+++ b/noncore/unsupported/qpdf/xpdf/Lexer.cc
@@ -0,0 +1,473 @@
1//========================================================================
2//
3// Lexer.cc
4//
5// Copyright 1996 Derek B. Noonburg
6//
7//========================================================================
8
9#ifdef __GNUC__
10#pragma implementation
11#endif
12
13#include <aconf.h>
14#include <stdlib.h>
15#include <stddef.h>
16#include <string.h>
17#include <ctype.h>
18#include "Lexer.h"
19#include "Error.h"
20
21//------------------------------------------------------------------------
22
23// A '1' in this array means the character is white space. A '1' or
24// '2' means the character ends a name or command.
25static char specialChars[256] = {
26 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
28 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
42};
43
44//------------------------------------------------------------------------
45// Lexer
46//------------------------------------------------------------------------
47
48Lexer::Lexer(XRef *xref, Stream *str) {
49 Object obj;
50
51 curStr.initStream(str);
52 streams = new Array(xref);
53 streams->add(curStr.copy(&obj));
54 strPtr = 0;
55 freeArray = gTrue;
56 curStr.streamReset();
57}
58
59Lexer::Lexer(XRef *xref, Object *obj) {
60 Object obj2;
61
62 if (obj->isStream()) {
63 streams = new Array(xref);
64 freeArray = gTrue;
65 streams->add(obj->copy(&obj2));
66 } else {
67 streams = obj->getArray();
68 freeArray = gFalse;
69 }
70 strPtr = 0;
71 if (streams->getLength() > 0) {
72 streams->get(strPtr, &curStr);
73 curStr.streamReset();
74 }
75}
76
77Lexer::~Lexer() {
78 if (!curStr.isNone()) {
79 curStr.streamClose();
80 curStr.free();
81 }
82 if (freeArray) {
83 delete streams;
84 }
85}
86
87int Lexer::getChar() {
88 int c;
89
90 c = EOF;
91 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
92 curStr.streamClose();
93 curStr.free();
94 ++strPtr;
95 if (strPtr < streams->getLength()) {
96 streams->get(strPtr, &curStr);
97 curStr.streamReset();
98 }
99 }
100 return c;
101}
102
103int Lexer::lookChar() {
104 if (curStr.isNone()) {
105 return EOF;
106 }
107 return curStr.streamLookChar();
108}
109
110Object *Lexer::getObj(Object *obj) {
111 char *p;
112 int c, c2;
113 GBool comment, neg, done;
114 int numParen;
115 int xi;
116 fouble xf, scale;
117 GString *s;
118 int n, m;
119
120 // skip whitespace and comments
121 comment = gFalse;
122 while (1) {
123 if ((c = getChar()) == EOF) {
124 return obj->initEOF();
125 }
126 if (comment) {
127 if (c == '\r' || c == '\n')
128 comment = gFalse;
129 } else if (c == '%') {
130 comment = gTrue;
131 } else if (specialChars[c] != 1) {
132 break;
133 }
134 }
135
136 // start reading token
137 switch (c) {
138
139 // number
140 case '0': case '1': case '2': case '3': case '4':
141 case '5': case '6': case '7': case '8': case '9':
142 case '-': case '.':
143 neg = gFalse;
144 xi = 0;
145 if (c == '-') {
146 neg = gTrue;
147 } else if (c == '.') {
148 goto doReal;
149 } else {
150 xi = c - '0';
151 }
152 while (1) {
153 c = lookChar();
154 if (isdigit(c)) {
155 getChar();
156 xi = xi * 10 + (c - '0');
157 } else if (c == '.') {
158 getChar();
159 goto doReal;
160 } else {
161 break;
162 }
163 }
164 if (neg)
165 xi = -xi;
166 obj->initInt(xi);
167 break;
168 doReal:
169 xf = xi;
170 scale = 0.1;
171 while (1) {
172 c = lookChar();
173 if (!isdigit(c)) {
174 break;
175 }
176 getChar();
177 xf = xf + scale * (c - '0');
178 scale *= 0.1;
179 }
180 if (neg)
181 xf = -xf;
182 obj->initReal(xf);
183 break;
184
185 // string
186 case '(':
187 p = tokBuf;
188 n = 0;
189 numParen = 1;
190 done = gFalse;
191 s = NULL;
192 do {
193 c2 = EOF;
194 switch (c = getChar()) {
195
196 case EOF:
197#if 0
198 // This breaks some PDF files, e.g., ones from Photoshop.
199 case '\r':
200 case '\n':
201#endif
202 error(getPos(), "Unterminated string");
203 done = gTrue;
204 break;
205
206 case '(':
207 ++numParen;
208 c2 = c;
209 break;
210
211 case ')':
212 if (--numParen == 0) {
213 done = gTrue;
214 } else {
215 c2 = c;
216 }
217 break;
218
219 case '\\':
220 switch (c = getChar()) {
221 case 'n':
222 c2 = '\n';
223 break;
224 case 'r':
225 c2 = '\r';
226 break;
227 case 't':
228 c2 = '\t';
229 break;
230 case 'b':
231 c2 = '\b';
232 break;
233 case 'f':
234 c2 = '\f';
235 break;
236 case '\\':
237 case '(':
238 case ')':
239 c2 = c;
240 break;
241 case '0': case '1': case '2': case '3':
242 case '4': case '5': case '6': case '7':
243 c2 = c - '0';
244 c = lookChar();
245 if (c >= '0' && c <= '7') {
246 getChar();
247 c2 = (c2 << 3) + (c - '0');
248 c = lookChar();
249 if (c >= '0' && c <= '7') {
250 getChar();
251 c2 = (c2 << 3) + (c - '0');
252 }
253 }
254 break;
255 case '\r':
256 c = lookChar();
257 if (c == '\n') {
258 getChar();
259 }
260 break;
261 case '\n':
262 break;
263 case EOF:
264 error(getPos(), "Unterminated string");
265 done = gTrue;
266 break;
267 default:
268 c2 = c;
269 break;
270 }
271 break;
272
273 default:
274 c2 = c;
275 break;
276 }
277
278 if (c2 != EOF) {
279 if (n == tokBufSize) {
280 if (!s)
281 s = new GString(tokBuf, tokBufSize);
282 else
283 s->append(tokBuf, tokBufSize);
284 p = tokBuf;
285 n = 0;
286 }
287 *p++ = (char)c2;
288 ++n;
289 }
290 } while (!done);
291 if (!s)
292 s = new GString(tokBuf, n);
293 else
294 s->append(tokBuf, n);
295 obj->initString(s);
296 break;
297
298 // name
299 case '/':
300 p = tokBuf;
301 n = 0;
302 while ((c = lookChar()) != EOF && !specialChars[c]) {
303 getChar();
304 if (c == '#') {
305 c2 = lookChar();
306 if (c2 >= '0' && c2 <= '9') {
307 c = c2 - '0';
308 } else if (c2 >= 'A' && c2 <= 'F') {
309 c = c2 - 'A' + 10;
310 } else if (c2 >= 'a' && c2 <= 'f') {
311 c = c2 - 'a' + 10;
312 } else {
313 goto notEscChar;
314 }
315 getChar();
316 c <<= 4;
317 c2 = getChar();
318 if (c2 >= '0' && c2 <= '9') {
319 c += c2 - '0';
320 } else if (c2 >= 'A' && c2 <= 'F') {
321 c += c2 - 'A' + 10;
322 } else if (c2 >= 'a' && c2 <= 'f') {
323 c += c2 - 'a' + 10;
324 } else {
325 error(getPos(), "Illegal digit in hex char in name");
326 }
327 }
328 notEscChar:
329 if (++n == tokBufSize) {
330 error(getPos(), "Name token too long");
331 break;
332 }
333 *p++ = c;
334 }
335 *p = '\0';
336 obj->initName(tokBuf);
337 break;
338
339 // array punctuation
340 case '[':
341 case ']':
342 tokBuf[0] = c;
343 tokBuf[1] = '\0';
344 obj->initCmd(tokBuf);
345 break;
346
347 // hex string or dict punctuation
348 case '<':
349 c = lookChar();
350
351 // dict punctuation
352 if (c == '<') {
353 getChar();
354 tokBuf[0] = tokBuf[1] = '<';
355 tokBuf[2] = '\0';
356 obj->initCmd(tokBuf);
357
358 // hex string
359 } else {
360 p = tokBuf;
361 m = n = 0;
362 c2 = 0;
363 s = NULL;
364 while (1) {
365 c = getChar();
366 if (c == '>') {
367 break;
368 } else if (c == EOF) {
369 error(getPos(), "Unterminated hex string");
370 break;
371 } else if (specialChars[c] != 1) {
372 c2 = c2 << 4;
373 if (c >= '0' && c <= '9')
374 c2 += c - '0';
375 else if (c >= 'A' && c <= 'F')
376 c2 += c - 'A' + 10;
377 else if (c >= 'a' && c <= 'f')
378 c2 += c - 'a' + 10;
379 else
380 error(getPos(), "Illegal character <%02x> in hex string", c);
381 if (++m == 2) {
382 if (n == tokBufSize) {
383 if (!s)
384 s = new GString(tokBuf, tokBufSize);
385 else
386 s->append(tokBuf, tokBufSize);
387 p = tokBuf;
388 n = 0;
389 }
390 *p++ = (char)c2;
391 ++n;
392 c2 = 0;
393 m = 0;
394 }
395 }
396 }
397 if (!s)
398 s = new GString(tokBuf, n);
399 else
400 s->append(tokBuf, n);
401 if (m == 1)
402 s->append((char)(c2 << 4));
403 obj->initString(s);
404 }
405 break;
406
407 // dict punctuation
408 case '>':
409 c = lookChar();
410 if (c == '>') {
411 getChar();
412 tokBuf[0] = tokBuf[1] = '>';
413 tokBuf[2] = '\0';
414 obj->initCmd(tokBuf);
415 } else {
416 error(getPos(), "Illegal character '>'");
417 obj->initError();
418 }
419 break;
420
421 // error
422 case ')':
423 case '{':
424 case '}':
425 error(getPos(), "Illegal character '%c'", c);
426 obj->initError();
427 break;
428
429 // command
430 default:
431 p = tokBuf;
432 *p++ = c;
433 n = 1;
434 while ((c = lookChar()) != EOF && !specialChars[c]) {
435 getChar();
436 if (++n == tokBufSize) {
437 error(getPos(), "Command token too long");
438 break;
439 }
440 *p++ = c;
441 }
442 *p = '\0';
443 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
444 obj->initBool(gTrue);
445 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
446 obj->initBool(gFalse);
447 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
448 obj->initNull();
449 } else {
450 obj->initCmd(tokBuf);
451 }
452 break;
453 }
454
455 return obj;
456}
457
458void Lexer::skipToNextLine() {
459 int c;
460
461 while (1) {
462 c = getChar();
463 if (c == EOF || c == '\n') {
464 return;
465 }
466 if (c == '\r') {
467 if ((c = lookChar()) == '\n') {
468 getChar();
469 }
470 return;
471 }
472 }
473}