From 98a1e3f36567639344f12932b629e526a8783aa8 Mon Sep 17 00:00:00 2001 From: sandman Date: Sat, 13 Apr 2002 00:47:20 +0000 Subject: CVS import of QPdf --- (limited to 'noncore/unsupported/qpdf/xpdf/TextOutputDev.cc') diff --git a/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc b/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc new file mode 100644 index 0000000..aa9366a --- a/dev/null +++ b/noncore/unsupported/qpdf/xpdf/TextOutputDev.cc @@ -0,0 +1,686 @@ +//======================================================================== +// +// TextOutputDev.cc +// +// Copyright 1997 Derek B. Noonburg +// +//======================================================================== + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include +#include +#include +#include +#include +#include +#include "GString.h" +#include "gmem.h" +#include "config.h" +#include "Error.h" +#include "GlobalParams.h" +#include "UnicodeMap.h" +#include "GfxState.h" +#include "TextOutputDev.h" + +#ifdef MACOS +// needed for setting type/creator of MacOS files +#include "ICSupport.h" +#endif + +//------------------------------------------------------------------------ +// TextString +//------------------------------------------------------------------------ + +TextString::TextString(GfxState *state, fouble fontSize) { + GfxFont *font; + fouble x, y; + + state->transform(state->getCurX(), state->getCurY(), &x, &y); + if ((font = state->getFont())) { + yMin = y - font->getAscent() * fontSize; + yMax = y - font->getDescent() * fontSize; + } else { + // this means that the PDF file draws text without a current font, + // which should never happen + yMin = y - 0.95 * fontSize; + yMax = y + 0.35 * fontSize; + } + col = 0; + text = NULL; + xRight = NULL; + len = size = 0; + yxNext = NULL; + xyNext = NULL; +} + +TextString::~TextString() { + gfree(text); + gfree(xRight); +} + +void TextString::addChar(GfxState *state, fouble x, fouble y, + fouble dx, fouble dy, Unicode u) { + if (len == size) { + size += 16; + text = (Unicode *)grealloc(text, size * sizeof(Unicode)); + xRight = (fouble *)grealloc(xRight, size * sizeof(fouble)); + } + text[len] = u; + if (len == 0) { + xMin = x; + } + xMax = xRight[len] = x + dx; + ++len; +} + +//------------------------------------------------------------------------ +// TextPage +//------------------------------------------------------------------------ + +TextPage::TextPage(GBool rawOrderA) { + rawOrder = rawOrderA; + curStr = NULL; + fontSize = 0; + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; + nest = 0; +} + +TextPage::~TextPage() { + clear(); +} + +void TextPage::updateFont(GfxState *state) { + GfxFont *font; + fouble *fm; + char *name; + int code; + + // adjust the font size + fontSize = state->getTransformedFontSize(); + if ((font = state->getFont()) && font->getType() == fontType3) { + // This is a hack which makes it possible to deal with some Type 3 + // fonts. The problem is that it's impossible to know what the + // base coordinate system used in the font is without actually + // rendering the font. This code tries to guess by looking at the + // width of the character 'm' (which breaks if the font is a + // subset that doesn't contain 'm'). + for (code = 0; code < 256; ++code) { + if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && + name[0] == 'm' && name[1] == '\0') { + break; + } + } + if (code < 256) { + // 600 is a generic average 'm' width -- yes, this is a hack + fontSize *= ((Gfx8BitFont *)font)->getWidth(code) / 0.6; + } + fm = font->getFontMatrix(); + if (fm[0] != 0) { + fontSize *= fabs(fm[3] / fm[0]); + } + } +} + +void TextPage::beginString(GfxState *state) { + // This check is needed because Type 3 characters can contain + // text-drawing operations. + if (curStr) { + ++nest; + return; + } + + curStr = new TextString(state, fontSize); +} + +void TextPage::addChar(GfxState *state, fouble x, fouble y, + fouble dx, fouble dy, Unicode *u, int uLen) { + fouble x1, y1, w1, h1, dx2, dy2; + int n, i; + + state->transform(x, y, &x1, &y1); + n = curStr->len; + if (n > 0 && + x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) { + endString(); + beginString(state); + } + state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), + 0, &dx2, &dy2); + dx -= dx2; + dy -= dy2; + state->transformDelta(dx, dy, &w1, &h1); + w1 /= uLen; + h1 /= uLen; + for (i = 0; i < uLen; ++i) { + curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); + } +} + +void TextPage::endString() { + TextString *p1, *p2; + fouble h, y1, y2; + + // This check is needed because Type 3 characters can contain + // text-drawing operations. + if (nest > 0) { + --nest; + return; + } + + // throw away zero-length strings -- they don't have valid xMin/xMax + // values, and they're useless anyway + if (curStr->len == 0) { + delete curStr; + curStr = NULL; + return; + } + + // insert string in y-major list + h = curStr->yMax - curStr->yMin; + y1 = curStr->yMin + 0.5 * h; + y2 = curStr->yMin + 0.8 * h; + if (rawOrder) { + p1 = yxCur1; + p2 = NULL; + } else if ((!yxCur1 || + (y1 >= yxCur1->yMin && + (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && + (!yxCur2 || + (y1 < yxCur2->yMin || + (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) { + p1 = yxCur1; + p2 = yxCur2; + } else { + for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) { + if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) { + break; + } + } + yxCur2 = p2; + } + yxCur1 = curStr; + if (p1) { + p1->yxNext = curStr; + } else { + yxStrings = curStr; + } + curStr->yxNext = p2; + curStr = NULL; +} + +void TextPage::coalesce() { + TextString *str1, *str2; + fouble space, d; + GBool addSpace; + int n, i; + +#if 0 //~ for debugging + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + printf("x=%3d..%3d y=%3d..%3d size=%2d '", + (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax, + (int)(str1->yMax - str1->yMin)); + for (i = 0; i < str1->len; ++i) { + fputc(str1->text[i] & 0xff, stdout); + } + printf("'\n"); + } + printf("\n------------------------------------------------------------\n\n"); +#endif + str1 = yxStrings; + while (str1 && (str2 = str1->yxNext)) { + space = str1->yMax - str1->yMin; + d = str2->xMin - str1->xMax; + if (((rawOrder && + ((str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) || + (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax))) || + (!rawOrder && str2->yMin < str1->yMax)) && + d > -0.5 * space && d < space) { + n = str1->len + str2->len; + if ((addSpace = d > 0.1 * space)) { + ++n; + } + str1->size = (n + 15) & ~15; + str1->text = (Unicode *)grealloc(str1->text, + str1->size * sizeof(Unicode)); + str1->xRight = (fouble *)grealloc(str1->xRight, + str1->size * sizeof(fouble)); + if (addSpace) { + str1->text[str1->len] = 0x20; + str1->xRight[str1->len] = str2->xMin; + ++str1->len; + } + for (i = 0; i < str2->len; ++i) { + str1->text[str1->len] = str2->text[i]; + str1->xRight[str1->len] = str2->xRight[i]; + ++str1->len; + } + if (str2->xMax > str1->xMax) { + str1->xMax = str2->xMax; + } + if (str2->yMax > str1->yMax) { + str1->yMax = str2->yMax; + } + str1->yxNext = str2->yxNext; + delete str2; + } else { + str1 = str2; + } + } +} + +GBool TextPage::findText(Unicode *s, int len, + GBool top, GBool bottom, + fouble *xMin, fouble *yMin, + fouble *xMax, fouble *yMax) { + TextString *str; + Unicode *p; + Unicode u1, u2; + int m, i, j; + fouble x; + + // scan all strings on page + for (str = yxStrings; str; str = str->yxNext) { + + // check: above top limit? + if (!top && (str->yMax < *yMin || + (str->yMin < *yMin && str->xMax <= *xMin))) { + continue; + } + + // check: below bottom limit? + if (!bottom && (str->yMin > *yMax || + (str->yMax > *yMax && str->xMin >= *xMax))) { + return gFalse; + } + + // search each position in this string + m = str->len; + for (i = 0, p = str->text; i <= m - len; ++i, ++p) { + + // check: above top limit? + if (!top && str->yMin < *yMin) { + x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2; + if (x < *xMin) { + continue; + } + } + + // check: below bottom limit? + if (!bottom && str->yMax > *yMax) { + x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2; + if (x > *xMax) { + return gFalse; + } + } + + // compare the strings + for (j = 0; j < len; ++j) { +#if 1 //~ this lowercases Latin A-Z only -- this will eventually be + //~ extended to handle other character sets + if (p[j] >= 0x41 && p[j] <= 0x5a) { + u1 = p[j] + 0x20; + } else { + u1 = p[j]; + } + if (s[j] >= 0x41 && s[j] <= 0x5a) { + u2 = s[j] + 0x20; + } else { + u2 = s[j]; + } +#endif + if (u1 != u2) { + break; + } + } + + // found it + if (j == len) { + *xMin = (i == 0) ? str->xMin : str->xRight[i-1]; + *xMax = str->xRight[i + len - 1]; + *yMin = str->yMin; + *yMax = str->yMax; + return gTrue; + } + } + } + return gFalse; +} + +GString *TextPage::getText(fouble xMin, fouble yMin, + fouble xMax, fouble yMax) { + GString *s; + UnicodeMap *uMap; + char space[8], eol[16], buf[8]; + int spaceLen, eolLen, n; + TextString *str1; + fouble x0, x1, x2, y; + fouble xPrev, yPrev; + int i1, i2, i; + GBool multiLine; + + s = new GString(); + if (!(uMap = globalParams->getTextEncoding())) { + return s; + } + spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); + eolLen = 0; // make gcc happy + switch (globalParams->getTextEOL()) { + case eolUnix: + eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); + break; + case eolDOS: + eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); + eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); + break; + case eolMac: + eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); + break; + } + xPrev = yPrev = 0; + multiLine = gFalse; + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + y = 0.5 * (str1->yMin + str1->yMax); + if (y > yMax) { + break; + } + if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) { + x0 = x1 = x2 = str1->xMin; + for (i1 = 0; i1 < str1->len; ++i1) { + x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1]; + x1 = str1->xRight[i1]; + if (0.5 * (x0 + x1) >= xMin) { + break; + } + } + for (i2 = str1->len - 1; i2 > i1; --i2) { + x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1]; + x2 = str1->xRight[i2]; + if (0.5 * (x1 + x2) <= xMax) { + break; + } + } + if (s->getLength() > 0) { + if (x0 < xPrev || str1->yMin > yPrev) { + s->append(eol, eolLen); + multiLine = gTrue; + } else { + for (i = 0; i < 4; ++i) { + s->append(space, spaceLen); + } + } + } + for (i = i1; i <= i2; ++i) { + n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf)); + s->append(buf, n); + } + xPrev = x2; + yPrev = str1->yMax; + } + } + if (multiLine) { + s->append(eol, eolLen); + } + uMap->decRefCnt(); + return s; +} + +void TextPage::dump(FILE *f) { + UnicodeMap *uMap; + char space[8], eol[16], eop[8], buf[8]; + int spaceLen, eolLen, eopLen, n; + TextString *str1, *str2, *str3; + fouble yMin, yMax; + int col1, col2, d, i; + + // get the output encoding + if (!(uMap = globalParams->getTextEncoding())) { + return; + } + spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); + eolLen = 0; // make gcc happy + switch (globalParams->getTextEOL()) { + case eolUnix: + eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); + break; + case eolDOS: + eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); + eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); + break; + case eolMac: + eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); + break; + } + eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop)); + + // build x-major list + xyStrings = NULL; + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + for (str2 = NULL, str3 = xyStrings; + str3; + str2 = str3, str3 = str3->xyNext) { + if (str1->xMin < str3->xMin || + (str1->xMin == str3->xMin && str1->yMin < str3->yMin)) { + break; + } + } + if (str2) { + str2->xyNext = str1; + } else { + xyStrings = str1; + } + str1->xyNext = str3; + } + + // do column assignment + for (str1 = xyStrings; str1; str1 = str1->xyNext) { + col1 = 0; + for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) { + if (str1->xMin >= str2->xMax) { + col2 = str2->col + str2->len + 4; + if (col2 > col1) { + col1 = col2; + } + } else if (str1->xMin > str2->xMin) { + col2 = str2->col + + (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) * + str2->len); + if (col2 > col1) { + col1 = col2; + } + } + } + str1->col = col1; + } + +#if 0 //~ for debugging + fprintf(f, "~~~~~~~~~~\n"); + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + fprintf(f, "(%4d,%4d) - (%4d,%4d) [%3d] '", + (int)str1->xMin, (int)str1->yMin, + (int)str1->xMax, (int)str1->yMax, str1->col); + for (i = 0; i < str1->len; ++i) { + fputc(str1->text[i] & 0xff, stdout); + } + printf("'\n"); + } + fprintf(f, "~~~~~~~~~~\n"); +#endif + + // output + col1 = 0; + yMax = yxStrings ? yxStrings->yMax : fouble(0); + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + + // line this string up with the correct column + if (rawOrder && col1 == 0) { + col1 = str1->col; + } else { + for (; col1 < str1->col; ++col1) { + fwrite(space, 1, spaceLen, f); + } + } + + // print the string + for (i = 0; i < str1->len; ++i) { + if ((n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf))) > 0) { + fwrite(buf, 1, n, f); + } + } + + // increment column + col1 += str1->len; + + // update yMax for this line + if (str1->yMax > yMax) { + yMax = str1->yMax; + } + + // if we've hit the end of the line... + if (!(str1->yxNext && + !(rawOrder && str1->yxNext->yMax < str1->yMin) && + str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax && + str1->yxNext->xMin >= str1->xMax)) { + + // print a return + fwrite(eol, 1, eolLen, f); + + // print extra vertical space if necessary + if (str1->yxNext) { + + // find yMin for next line + yMin = str1->yxNext->yMin; + for (str2 = str1->yxNext; str2; str2 = str2->yxNext) { + if (str2->yMin < yMin) { + yMin = str2->yMin; + } + if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax && + str2->yxNext->xMin >= str2->xMax)) + break; + } + + // print the space + d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5); + // various things (weird font matrices) can result in bogus + // values here, so do a sanity check + if (rawOrder && d > 2) { + d = 2; + } else if (!rawOrder && d > 5) { + d = 5; + } + for (; d > 0; --d) { + fwrite(eol, 1, eolLen, f); + } + } + + // set up for next line + col1 = 0; + yMax = str1->yxNext ? str1->yxNext->yMax : fouble(0); + } + } + + // end of page + fwrite(eol, 1, eolLen, f); + fwrite(eop, 1, eopLen, f); + fwrite(eol, 1, eolLen, f); + + uMap->decRefCnt(); +} + +void TextPage::clear() { + TextString *p1, *p2; + + if (curStr) { + delete curStr; + curStr = NULL; + } + for (p1 = yxStrings; p1; p1 = p2) { + p2 = p1->yxNext; + delete p1; + } + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; +} + +//------------------------------------------------------------------------ +// TextOutputDev +//------------------------------------------------------------------------ + +TextOutputDev::TextOutputDev(char *fileName, GBool rawOrderA, GBool append) { + text = NULL; + rawOrder = rawOrderA; + ok = gTrue; + + // open file + needClose = gFalse; + if (fileName) { + if (!strcmp(fileName, "-")) { + f = stdout; + } else if ((f = fopen(fileName, append ? "a" : "w"))) { + needClose = gTrue; + } else { + error(-1, "Couldn't open text file '%s'", fileName); + ok = gFalse; + return; + } + } else { + f = NULL; + } + + // set up text object + text = new TextPage(rawOrder); +} + +TextOutputDev::~TextOutputDev() { + if (needClose) { +#ifdef MACOS + ICS_MapRefNumAndAssign((short)f->handle); +#endif + fclose(f); + } + if (text) { + delete text; + } +} + +void TextOutputDev::startPage(int pageNum, GfxState *state) { + text->clear(); +} + +void TextOutputDev::endPage() { + text->coalesce(); + if (f) { + text->dump(f); + } +} + +void TextOutputDev::updateFont(GfxState *state) { + text->updateFont(state); +} + +void TextOutputDev::beginString(GfxState *state, GString *s) { + text->beginString(state); +} + +void TextOutputDev::endString(GfxState *state) { + text->endString(); +} + +void TextOutputDev::drawChar(GfxState *state, fouble x, fouble y, + fouble dx, fouble dy, + fouble originX, fouble originY, + CharCode c, Unicode *u, int uLen) { + text->addChar(state, x, y, dx, dy, u, uLen); +} + +GBool TextOutputDev::findText(Unicode *s, int len, + GBool top, GBool bottom, + fouble *xMin, fouble *yMin, + fouble *xMax, fouble *yMax) { + return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax); +} -- cgit v0.9.0.2