summaryrefslogtreecommitdiff
path: root/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc
Unidiff
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/TextOutputDev.cc') (more/less context) (show whitespace changes)
-rw-r--r--noncore/unsupported/qpdf/xpdf/TextOutputDev.cc686
1 files changed, 686 insertions, 0 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc b/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc
new file mode 100644
index 0000000..aa9366a
--- a/dev/null
+++ b/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc
@@ -0,0 +1,686 @@
1//========================================================================
2//
3// TextOutputDev.cc
4//
5// Copyright 1997 Derek B. Noonburg
6//
7//========================================================================
8
9#ifdef __GNUC__
10#pragma implementation
11#endif
12
13#include <aconf.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <stddef.h>
17#include <math.h>
18#include <ctype.h>
19#include "GString.h"
20#include "gmem.h"
21#include "config.h"
22#include "Error.h"
23#include "GlobalParams.h"
24#include "UnicodeMap.h"
25#include "GfxState.h"
26#include "TextOutputDev.h"
27
28#ifdef MACOS
29// needed for setting type/creator of MacOS files
30#include "ICSupport.h"
31#endif
32
33//------------------------------------------------------------------------
34// TextString
35//------------------------------------------------------------------------
36
37TextString::TextString(GfxState *state, fouble fontSize) {
38 GfxFont *font;
39 fouble x, y;
40
41 state->transform(state->getCurX(), state->getCurY(), &x, &y);
42 if ((font = state->getFont())) {
43 yMin = y - font->getAscent() * fontSize;
44 yMax = y - font->getDescent() * fontSize;
45 } else {
46 // this means that the PDF file draws text without a current font,
47 // which should never happen
48 yMin = y - 0.95 * fontSize;
49 yMax = y + 0.35 * fontSize;
50 }
51 col = 0;
52 text = NULL;
53 xRight = NULL;
54 len = size = 0;
55 yxNext = NULL;
56 xyNext = NULL;
57}
58
59TextString::~TextString() {
60 gfree(text);
61 gfree(xRight);
62}
63
64void TextString::addChar(GfxState *state, fouble x, fouble y,
65 fouble dx, fouble dy, Unicode u) {
66 if (len == size) {
67 size += 16;
68 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
69 xRight = (fouble *)grealloc(xRight, size * sizeof(fouble));
70 }
71 text[len] = u;
72 if (len == 0) {
73 xMin = x;
74 }
75 xMax = xRight[len] = x + dx;
76 ++len;
77}
78
79//------------------------------------------------------------------------
80// TextPage
81//------------------------------------------------------------------------
82
83TextPage::TextPage(GBool rawOrderA) {
84 rawOrder = rawOrderA;
85 curStr = NULL;
86 fontSize = 0;
87 yxStrings = NULL;
88 xyStrings = NULL;
89 yxCur1 = yxCur2 = NULL;
90 nest = 0;
91}
92
93TextPage::~TextPage() {
94 clear();
95}
96
97void TextPage::updateFont(GfxState *state) {
98 GfxFont *font;
99 fouble *fm;
100 char *name;
101 int code;
102
103 // adjust the font size
104 fontSize = state->getTransformedFontSize();
105 if ((font = state->getFont()) && font->getType() == fontType3) {
106 // This is a hack which makes it possible to deal with some Type 3
107 // fonts. The problem is that it's impossible to know what the
108 // base coordinate system used in the font is without actually
109 // rendering the font. This code tries to guess by looking at the
110 // width of the character 'm' (which breaks if the font is a
111 // subset that doesn't contain 'm').
112 for (code = 0; code < 256; ++code) {
113 if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
114 name[0] == 'm' && name[1] == '\0') {
115 break;
116 }
117 }
118 if (code < 256) {
119 // 600 is a generic average 'm' width -- yes, this is a hack
120 fontSize *= ((Gfx8BitFont *)font)->getWidth(code) / 0.6;
121 }
122 fm = font->getFontMatrix();
123 if (fm[0] != 0) {
124 fontSize *= fabs(fm[3] / fm[0]);
125 }
126 }
127}
128
129void TextPage::beginString(GfxState *state) {
130 // This check is needed because Type 3 characters can contain
131 // text-drawing operations.
132 if (curStr) {
133 ++nest;
134 return;
135 }
136
137 curStr = new TextString(state, fontSize);
138}
139
140void TextPage::addChar(GfxState *state, fouble x, fouble y,
141 fouble dx, fouble dy, Unicode *u, int uLen) {
142 fouble x1, y1, w1, h1, dx2, dy2;
143 int n, i;
144
145 state->transform(x, y, &x1, &y1);
146 n = curStr->len;
147 if (n > 0 &&
148 x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) {
149 endString();
150 beginString(state);
151 }
152 state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
153 0, &dx2, &dy2);
154 dx -= dx2;
155 dy -= dy2;
156 state->transformDelta(dx, dy, &w1, &h1);
157 w1 /= uLen;
158 h1 /= uLen;
159 for (i = 0; i < uLen; ++i) {
160 curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
161 }
162}
163
164void TextPage::endString() {
165 TextString *p1, *p2;
166 fouble h, y1, y2;
167
168 // This check is needed because Type 3 characters can contain
169 // text-drawing operations.
170 if (nest > 0) {
171 --nest;
172 return;
173 }
174
175 // throw away zero-length strings -- they don't have valid xMin/xMax
176 // values, and they're useless anyway
177 if (curStr->len == 0) {
178 delete curStr;
179 curStr = NULL;
180 return;
181 }
182
183 // insert string in y-major list
184 h = curStr->yMax - curStr->yMin;
185 y1 = curStr->yMin + 0.5 * h;
186 y2 = curStr->yMin + 0.8 * h;
187 if (rawOrder) {
188 p1 = yxCur1;
189 p2 = NULL;
190 } else if ((!yxCur1 ||
191 (y1 >= yxCur1->yMin &&
192 (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
193 (!yxCur2 ||
194 (y1 < yxCur2->yMin ||
195 (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
196 p1 = yxCur1;
197 p2 = yxCur2;
198 } else {
199 for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
200 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) {
201 break;
202 }
203 }
204 yxCur2 = p2;
205 }
206 yxCur1 = curStr;
207 if (p1) {
208 p1->yxNext = curStr;
209 } else {
210 yxStrings = curStr;
211 }
212 curStr->yxNext = p2;
213 curStr = NULL;
214}
215
216void TextPage::coalesce() {
217 TextString *str1, *str2;
218 fouble space, d;
219 GBool addSpace;
220 int n, i;
221
222#if 0 //~ for debugging
223 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
224 printf("x=%3d..%3d y=%3d..%3d size=%2d '",
225 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
226 (int)(str1->yMax - str1->yMin));
227 for (i = 0; i < str1->len; ++i) {
228 fputc(str1->text[i] & 0xff, stdout);
229 }
230 printf("'\n");
231 }
232 printf("\n------------------------------------------------------------\n\n");
233#endif
234 str1 = yxStrings;
235 while (str1 && (str2 = str1->yxNext)) {
236 space = str1->yMax - str1->yMin;
237 d = str2->xMin - str1->xMax;
238 if (((rawOrder &&
239 ((str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) ||
240 (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax))) ||
241 (!rawOrder && str2->yMin < str1->yMax)) &&
242 d > -0.5 * space && d < space) {
243 n = str1->len + str2->len;
244 if ((addSpace = d > 0.1 * space)) {
245 ++n;
246 }
247 str1->size = (n + 15) & ~15;
248 str1->text = (Unicode *)grealloc(str1->text,
249 str1->size * sizeof(Unicode));
250 str1->xRight = (fouble *)grealloc(str1->xRight,
251 str1->size * sizeof(fouble));
252 if (addSpace) {
253 str1->text[str1->len] = 0x20;
254 str1->xRight[str1->len] = str2->xMin;
255 ++str1->len;
256 }
257 for (i = 0; i < str2->len; ++i) {
258 str1->text[str1->len] = str2->text[i];
259 str1->xRight[str1->len] = str2->xRight[i];
260 ++str1->len;
261 }
262 if (str2->xMax > str1->xMax) {
263 str1->xMax = str2->xMax;
264 }
265 if (str2->yMax > str1->yMax) {
266 str1->yMax = str2->yMax;
267 }
268 str1->yxNext = str2->yxNext;
269 delete str2;
270 } else {
271 str1 = str2;
272 }
273 }
274}
275
276GBool TextPage::findText(Unicode *s, int len,
277 GBool top, GBool bottom,
278 fouble *xMin, fouble *yMin,
279 fouble *xMax, fouble *yMax) {
280 TextString *str;
281 Unicode *p;
282 Unicode u1, u2;
283 int m, i, j;
284 fouble x;
285
286 // scan all strings on page
287 for (str = yxStrings; str; str = str->yxNext) {
288
289 // check: above top limit?
290 if (!top && (str->yMax < *yMin ||
291 (str->yMin < *yMin && str->xMax <= *xMin))) {
292 continue;
293 }
294
295 // check: below bottom limit?
296 if (!bottom && (str->yMin > *yMax ||
297 (str->yMax > *yMax && str->xMin >= *xMax))) {
298 return gFalse;
299 }
300
301 // search each position in this string
302 m = str->len;
303 for (i = 0, p = str->text; i <= m - len; ++i, ++p) {
304
305 // check: above top limit?
306 if (!top && str->yMin < *yMin) {
307 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
308 if (x < *xMin) {
309 continue;
310 }
311 }
312
313 // check: below bottom limit?
314 if (!bottom && str->yMax > *yMax) {
315 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
316 if (x > *xMax) {
317 return gFalse;
318 }
319 }
320
321 // compare the strings
322 for (j = 0; j < len; ++j) {
323#if 1 //~ this lowercases Latin A-Z only -- this will eventually be
324 //~ extended to handle other character sets
325 if (p[j] >= 0x41 && p[j] <= 0x5a) {
326 u1 = p[j] + 0x20;
327 } else {
328 u1 = p[j];
329 }
330 if (s[j] >= 0x41 && s[j] <= 0x5a) {
331 u2 = s[j] + 0x20;
332 } else {
333 u2 = s[j];
334 }
335#endif
336 if (u1 != u2) {
337 break;
338 }
339 }
340
341 // found it
342 if (j == len) {
343 *xMin = (i == 0) ? str->xMin : str->xRight[i-1];
344 *xMax = str->xRight[i + len - 1];
345 *yMin = str->yMin;
346 *yMax = str->yMax;
347 return gTrue;
348 }
349 }
350 }
351 return gFalse;
352}
353
354GString *TextPage::getText(fouble xMin, fouble yMin,
355 fouble xMax, fouble yMax) {
356 GString *s;
357 UnicodeMap *uMap;
358 char space[8], eol[16], buf[8];
359 int spaceLen, eolLen, n;
360 TextString *str1;
361 fouble x0, x1, x2, y;
362 fouble xPrev, yPrev;
363 int i1, i2, i;
364 GBool multiLine;
365
366 s = new GString();
367 if (!(uMap = globalParams->getTextEncoding())) {
368 return s;
369 }
370 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
371 eolLen = 0; // make gcc happy
372 switch (globalParams->getTextEOL()) {
373 case eolUnix:
374 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
375 break;
376 case eolDOS:
377 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
378 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
379 break;
380 case eolMac:
381 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
382 break;
383 }
384 xPrev = yPrev = 0;
385 multiLine = gFalse;
386 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
387 y = 0.5 * (str1->yMin + str1->yMax);
388 if (y > yMax) {
389 break;
390 }
391 if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) {
392 x0 = x1 = x2 = str1->xMin;
393 for (i1 = 0; i1 < str1->len; ++i1) {
394 x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1];
395 x1 = str1->xRight[i1];
396 if (0.5 * (x0 + x1) >= xMin) {
397 break;
398 }
399 }
400 for (i2 = str1->len - 1; i2 > i1; --i2) {
401 x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1];
402 x2 = str1->xRight[i2];
403 if (0.5 * (x1 + x2) <= xMax) {
404 break;
405 }
406 }
407 if (s->getLength() > 0) {
408 if (x0 < xPrev || str1->yMin > yPrev) {
409 s->append(eol, eolLen);
410 multiLine = gTrue;
411 } else {
412 for (i = 0; i < 4; ++i) {
413 s->append(space, spaceLen);
414 }
415 }
416 }
417 for (i = i1; i <= i2; ++i) {
418 n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf));
419 s->append(buf, n);
420 }
421 xPrev = x2;
422 yPrev = str1->yMax;
423 }
424 }
425 if (multiLine) {
426 s->append(eol, eolLen);
427 }
428 uMap->decRefCnt();
429 return s;
430}
431
432void TextPage::dump(FILE *f) {
433 UnicodeMap *uMap;
434 char space[8], eol[16], eop[8], buf[8];
435 int spaceLen, eolLen, eopLen, n;
436 TextString *str1, *str2, *str3;
437 fouble yMin, yMax;
438 int col1, col2, d, i;
439
440 // get the output encoding
441 if (!(uMap = globalParams->getTextEncoding())) {
442 return;
443 }
444 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
445 eolLen = 0; // make gcc happy
446 switch (globalParams->getTextEOL()) {
447 case eolUnix:
448 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
449 break;
450 case eolDOS:
451 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
452 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
453 break;
454 case eolMac:
455 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
456 break;
457 }
458 eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
459
460 // build x-major list
461 xyStrings = NULL;
462 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
463 for (str2 = NULL, str3 = xyStrings;
464 str3;
465 str2 = str3, str3 = str3->xyNext) {
466 if (str1->xMin < str3->xMin ||
467 (str1->xMin == str3->xMin && str1->yMin < str3->yMin)) {
468 break;
469 }
470 }
471 if (str2) {
472 str2->xyNext = str1;
473 } else {
474 xyStrings = str1;
475 }
476 str1->xyNext = str3;
477 }
478
479 // do column assignment
480 for (str1 = xyStrings; str1; str1 = str1->xyNext) {
481 col1 = 0;
482 for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) {
483 if (str1->xMin >= str2->xMax) {
484 col2 = str2->col + str2->len + 4;
485 if (col2 > col1) {
486 col1 = col2;
487 }
488 } else if (str1->xMin > str2->xMin) {
489 col2 = str2->col +
490 (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) *
491 str2->len);
492 if (col2 > col1) {
493 col1 = col2;
494 }
495 }
496 }
497 str1->col = col1;
498 }
499
500#if 0 //~ for debugging
501 fprintf(f, "~~~~~~~~~~\n");
502 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
503 fprintf(f, "(%4d,%4d) - (%4d,%4d) [%3d] '",
504 (int)str1->xMin, (int)str1->yMin,
505 (int)str1->xMax, (int)str1->yMax, str1->col);
506 for (i = 0; i < str1->len; ++i) {
507 fputc(str1->text[i] & 0xff, stdout);
508 }
509 printf("'\n");
510 }
511 fprintf(f, "~~~~~~~~~~\n");
512#endif
513
514 // output
515 col1 = 0;
516 yMax = yxStrings ? yxStrings->yMax : fouble(0);
517 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
518
519 // line this string up with the correct column
520 if (rawOrder && col1 == 0) {
521 col1 = str1->col;
522 } else {
523 for (; col1 < str1->col; ++col1) {
524 fwrite(space, 1, spaceLen, f);
525 }
526 }
527
528 // print the string
529 for (i = 0; i < str1->len; ++i) {
530 if ((n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf))) > 0) {
531 fwrite(buf, 1, n, f);
532 }
533 }
534
535 // increment column
536 col1 += str1->len;
537
538 // update yMax for this line
539 if (str1->yMax > yMax) {
540 yMax = str1->yMax;
541 }
542
543 // if we've hit the end of the line...
544 if (!(str1->yxNext &&
545 !(rawOrder && str1->yxNext->yMax < str1->yMin) &&
546 str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax &&
547 str1->yxNext->xMin >= str1->xMax)) {
548
549 // print a return
550 fwrite(eol, 1, eolLen, f);
551
552 // print extra vertical space if necessary
553 if (str1->yxNext) {
554
555 // find yMin for next line
556 yMin = str1->yxNext->yMin;
557 for (str2 = str1->yxNext; str2; str2 = str2->yxNext) {
558 if (str2->yMin < yMin) {
559 yMin = str2->yMin;
560 }
561 if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax &&
562 str2->yxNext->xMin >= str2->xMax))
563 break;
564 }
565
566 // print the space
567 d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5);
568 // various things (weird font matrices) can result in bogus
569 // values here, so do a sanity check
570 if (rawOrder && d > 2) {
571 d = 2;
572 } else if (!rawOrder && d > 5) {
573 d = 5;
574 }
575 for (; d > 0; --d) {
576 fwrite(eol, 1, eolLen, f);
577 }
578 }
579
580 // set up for next line
581 col1 = 0;
582 yMax = str1->yxNext ? str1->yxNext->yMax : fouble(0);
583 }
584 }
585
586 // end of page
587 fwrite(eol, 1, eolLen, f);
588 fwrite(eop, 1, eopLen, f);
589 fwrite(eol, 1, eolLen, f);
590
591 uMap->decRefCnt();
592}
593
594void TextPage::clear() {
595 TextString *p1, *p2;
596
597 if (curStr) {
598 delete curStr;
599 curStr = NULL;
600 }
601 for (p1 = yxStrings; p1; p1 = p2) {
602 p2 = p1->yxNext;
603 delete p1;
604 }
605 yxStrings = NULL;
606 xyStrings = NULL;
607 yxCur1 = yxCur2 = NULL;
608}
609
610//------------------------------------------------------------------------
611// TextOutputDev
612//------------------------------------------------------------------------
613
614TextOutputDev::TextOutputDev(char *fileName, GBool rawOrderA, GBool append) {
615 text = NULL;
616 rawOrder = rawOrderA;
617 ok = gTrue;
618
619 // open file
620 needClose = gFalse;
621 if (fileName) {
622 if (!strcmp(fileName, "-")) {
623 f = stdout;
624 } else if ((f = fopen(fileName, append ? "a" : "w"))) {
625 needClose = gTrue;
626 } else {
627 error(-1, "Couldn't open text file '%s'", fileName);
628 ok = gFalse;
629 return;
630 }
631 } else {
632 f = NULL;
633 }
634
635 // set up text object
636 text = new TextPage(rawOrder);
637}
638
639TextOutputDev::~TextOutputDev() {
640 if (needClose) {
641#ifdef MACOS
642 ICS_MapRefNumAndAssign((short)f->handle);
643#endif
644 fclose(f);
645 }
646 if (text) {
647 delete text;
648 }
649}
650
651void TextOutputDev::startPage(int pageNum, GfxState *state) {
652 text->clear();
653}
654
655void TextOutputDev::endPage() {
656 text->coalesce();
657 if (f) {
658 text->dump(f);
659 }
660}
661
662void TextOutputDev::updateFont(GfxState *state) {
663 text->updateFont(state);
664}
665
666void TextOutputDev::beginString(GfxState *state, GString *s) {
667 text->beginString(state);
668}
669
670void TextOutputDev::endString(GfxState *state) {
671 text->endString();
672}
673
674void TextOutputDev::drawChar(GfxState *state, fouble x, fouble y,
675 fouble dx, fouble dy,
676 fouble originX, fouble originY,
677 CharCode c, Unicode *u, int uLen) {
678 text->addChar(state, x, y, dx, dy, u, uLen);
679}
680
681GBool TextOutputDev::findText(Unicode *s, int len,
682 GBool top, GBool bottom,
683 fouble *xMin, fouble *yMin,
684 fouble *xMax, fouble *yMax) {
685 return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax);
686}