From 98a1e3f36567639344f12932b629e526a8783aa8 Mon Sep 17 00:00:00 2001 From: sandman Date: Sat, 13 Apr 2002 00:47:20 +0000 Subject: CVS import of QPdf --- (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc new file mode 100644 index 0000000..6793398 --- a/dev/null +++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc @@ -0,0 +1,394 @@ +//======================================================================== +// +// CharCodeToUnicode.cc +// +// Copyright 2001 Derek B. Noonburg +// +//======================================================================== + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include +#include +#include +#include "gmem.h" +#include "gfile.h" +#include "GString.h" +#include "Error.h" +#include "GlobalParams.h" +#include "CharCodeToUnicode.h" + +//------------------------------------------------------------------------ + +#define maxUnicodeString 8 + +struct CharCodeToUnicodeString { + CharCode c; + Unicode u[maxUnicodeString]; + int len; +}; + +//------------------------------------------------------------------------ + +CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { + FILE *f; + Unicode *mapA; + CharCode size, mapLenA; + char buf[64]; + Unicode u; + CharCodeToUnicode *ctu; + + if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) { + error(-1, "Couldn't find cidToUnicode file for the '%s' collection", + collectionA->getCString()); + return NULL; + } + + size = 32768; + mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); + mapLenA = 0; + + while (getLine(buf, sizeof(buf), f)) { + if (mapLenA == size) { + size *= 2; + mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); + } + if (sscanf(buf, "%x", &u) == 1) { + mapA[mapLenA] = u; + } else { + error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection", + (int)(mapLenA + 1), collectionA->getCString()); + mapA[mapLenA] = 0; + } + ++mapLenA; + } + + ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue, + NULL, 0); + gfree(mapA); + return ctu; +} + +CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { + return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); +} + +static char *getLineFromString(char *buf, int size, char **s) { + char c; + int i; + + i = 0; + while (i < size - 1 && **s) { + buf[i++] = c = *(*s)++; + if (c == '\x0a') { + break; + } + if (c == '\x0d') { + if (**s == '\x0a' && i < size - 1) { + buf[i++] = '\x0a'; + ++*s; + } + break; + } + } + buf[i] = '\0'; + if (i == 0) { + return NULL; + } + return buf; +} + +CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { + CharCodeToUnicode *ctu; + char *p; + + ctu = new CharCodeToUnicode(NULL); + p = buf->getCString(); + ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, + &p, nBits); + return ctu; +} + +void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), + void *data, int nBits) { + char buf[256]; + GBool inBFChar, inBFRange; + char *tok1, *tok2, *tok3; + int nDigits, n1, n2, n3; + CharCode oldLen, i; + CharCode code1, code2; + Unicode u; + char uHex[5]; + int j; + GString *name; + FILE *f; + + nDigits = nBits / 4; + inBFChar = inBFRange = gFalse; + while ((*getLineFunc)(buf, sizeof(buf), data)) { + tok1 = strtok(buf, " \t\r\n"); + if (!tok1 || tok1[0] == '%') { + continue; + } + tok2 = strtok(NULL, " \t\r\n"); + tok3 = strtok(NULL, " \t\r\n"); + if (inBFChar) { + if (!strcmp(tok1, "endbfchar")) { + inBFChar = gFalse; + } else if (tok2) { + n1 = strlen(tok1); + n2 = strlen(tok2); + if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && + tok2[0] == '<' && tok2[n2 - 1] == '>')) { + error(-1, "Illegal line in bfchar block in ToUnicode CMap"); + continue; + } + tok1[n1 - 1] = tok2[n2 - 1] = '\0'; + if (sscanf(tok1 + 1, "%x", &code1) != 1) { + error(-1, "Illegal line in bfchar block in ToUnicode CMap"); + continue; + } + if (code1 >= mapLen) { + oldLen = mapLen; + mapLen = (code1 + 256) & ~255; + map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); + for (i = oldLen; i < mapLen; ++i) { + map[i] = 0; + } + } + if (n2 == 6) { + if (sscanf(tok2 + 1, "%x", &u) != 1) { + error(-1, "Illegal line in bfchar block in ToUnicode CMap"); + continue; + } + map[code1] = u; + } else { + map[code1] = 0; + if (sMapLen == sMapSize) { + sMapSize += 8; + sMap = (CharCodeToUnicodeString *) + grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); + } + sMap[sMapLen].c = code1; + sMap[sMapLen].len = (n2 - 2) / 4; + for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { + strncpy(uHex, tok2 + 1 + j*4, 4); + uHex[4] = '\0'; + if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { + error(-1, "Illegal line in bfchar block in ToUnicode CMap"); + } + } + ++sMapLen; + } + } else { + error(-1, "Illegal bfchar block in ToUnicode CMap"); + } + } else if (inBFRange) { + if (!strcmp(tok1, "endbfrange")) { + inBFRange = gFalse; + } else if (tok2 && tok3) { + n1 = strlen(tok1); + n2 = strlen(tok2); + n3 = strlen(tok3); + if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && + n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && + tok3[0] == '<' && tok3[n3 - 1] == '>')) { + error(-1, "Illegal line in bfrange block in ToUnicode CMap"); + continue; + } + tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; + if (sscanf(tok1 + 1, "%x", &code1) != 1 || + sscanf(tok2 + 1, "%x", &code2) != 1) { + error(-1, "Illegal line in bfrange block in ToUnicode CMap"); + continue; + } + if (code2 >= mapLen) { + oldLen = mapLen; + mapLen = (code2 + 256) & ~255; + map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); + for (i = oldLen; i < mapLen; ++i) { + map[i] = 0; + } + } + if (n3 == 6) { + if (sscanf(tok3 + 1, "%x", &u) != 1) { + error(-1, "Illegal line in bfrange block in ToUnicode CMap"); + continue; + } + for (; code1 <= code2; ++code1) { + map[code1] = u++; + } + } else { + if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) { + sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7; + sMap = (CharCodeToUnicodeString *) + grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); + } + for (i = 0; code1 <= code2; ++code1, ++i) { + map[code1] = 0; + sMap[sMapLen].c = code1; + sMap[sMapLen].len = (n3 - 2) / 4; + for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { + strncpy(uHex, tok3 + 1 + j*4, 4); + uHex[4] = '\0'; + if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { + error(-1, "Illegal line in bfrange block in ToUnicode CMap"); + } + } + sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; + ++sMapLen; + } + } + } else { + error(-1, "Illegal bfrange block in ToUnicode CMap"); + } + } else if (tok2 && !strcmp(tok2, "usecmap")) { + if (tok1[0] == '/') { + name = new GString(tok1 + 1); + if ((f = globalParams->findToUnicodeFile(name))) { + parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits); + fclose(f); + } else { + error(-1, "Couldn't find ToUnicode CMap file for '%s'", + name->getCString()); + } + delete name; + } + } else if (tok2 && !strcmp(tok2, "beginbfchar")) { + inBFChar = gTrue; + } else if (tok2 && !strcmp(tok2, "beginbfrange")) { + inBFRange = gTrue; + } + } +} + +CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { + CharCode i; + + collection = collectionA; + mapLen = 256; + map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); + for (i = 0; i < mapLen; ++i) { + map[i] = 0; + } + sMap = NULL; + sMapLen = sMapSize = 0; + refCnt = 1; +} + +CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA, + CharCode mapLenA, GBool copyMap, + CharCodeToUnicodeString *sMapA, + int sMapLenA) { + collection = collectionA; + mapLen = mapLenA; + if (copyMap) { + map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); + memcpy(map, mapA, mapLen * sizeof(Unicode)); + } else { + map = mapA; + } + sMap = sMapA; + sMapLen = sMapSize = sMapLenA; + refCnt = 1; +} + +CharCodeToUnicode::~CharCodeToUnicode() { + if (collection) { + delete collection; + } + gfree(map); + if (sMap) { + gfree(sMap); + } +} + +void CharCodeToUnicode::incRefCnt() { + ++refCnt; +} + +void CharCodeToUnicode::decRefCnt() { + if (--refCnt == 0) { + delete this; + } +} + +GBool CharCodeToUnicode::match(GString *collectionA) { + return collection && !collection->cmp(collectionA); +} + +int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { + int i, j; + + if (c >= mapLen) { + return 0; + } + if (map[c]) { + u[0] = map[c]; + return 1; + } + for (i = 0; i < sMapLen; ++i) { + if (sMap[i].c == c) { + for (j = 0; j < sMap[i].len && j < size; ++j) { + u[j] = sMap[i].u[j]; + } + return j; + } + } + return 0; +} + +//------------------------------------------------------------------------ + +CIDToUnicodeCache::CIDToUnicodeCache() { + int i; + + for (i = 0; i < cidToUnicodeCacheSize; ++i) { + cache[i] = NULL; + } +} + +CIDToUnicodeCache::~CIDToUnicodeCache() { + int i; + + for (i = 0; i < cidToUnicodeCacheSize; ++i) { + if (cache[i]) { + cache[i]->decRefCnt(); + } + } +} + +CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) { + CharCodeToUnicode *ctu; + int i, j; + + if (cache[0] && cache[0]->match(collection)) { + cache[0]->incRefCnt(); + return cache[0]; + } + for (i = 1; i < cidToUnicodeCacheSize; ++i) { + if (cache[i] && cache[i]->match(collection)) { + ctu = cache[i]; + for (j = i; j >= 1; --j) { + cache[j] = cache[j - 1]; + } + cache[0] = ctu; + ctu->incRefCnt(); + return ctu; + } + } + if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) { + if (cache[cidToUnicodeCacheSize - 1]) { + cache[cidToUnicodeCacheSize - 1]->decRefCnt(); + } + for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) { + cache[j] = cache[j - 1]; + } + cache[0] = ctu; + ctu->incRefCnt(); + return ctu; + } + return NULL; +} -- cgit v0.9.0.2