//======================================================================== // // CharCodeToUnicode.cc // // Copyright 2001 Derek B. Noonburg // //======================================================================== #ifdef __GNUC__ #pragma implementation #endif #include #include #include #include "gmem.h" #include "gfile.h" #include "GString.h" #include "Error.h" #include "GlobalParams.h" #include "CharCodeToUnicode.h" //------------------------------------------------------------------------ #define maxUnicodeString 8 struct CharCodeToUnicodeString { CharCode c; Unicode u[maxUnicodeString]; int len; }; //------------------------------------------------------------------------ CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { FILE *f; Unicode *mapA; CharCode size, mapLenA; char buf[64]; Unicode u; CharCodeToUnicode *ctu; if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) { error(-1, "Couldn't find cidToUnicode file for the '%s' collection", collectionA->getCString()); return NULL; } size = 32768; mapA = (Unicode *)gmalloc(size * sizeof(Unicode)); mapLenA = 0; while (getLine(buf, sizeof(buf), f)) { if (mapLenA == size) { size *= 2; mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode)); } if (sscanf(buf, "%x", &u) == 1) { mapA[mapLenA] = u; } else { error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection", (int)(mapLenA + 1), collectionA->getCString()); mapA[mapLenA] = 0; } ++mapLenA; } ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue, NULL, 0); gfree(mapA); return ctu; } CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); } static char *getLineFromString(char *buf, int size, char **s) { char c; int i; i = 0; while (i < size - 1 && **s) { buf[i++] = c = *(*s)++; if (c == '\x0a') { break; } if (c == '\x0d') { if (**s == '\x0a' && i < size - 1) { buf[i++] = '\x0a'; ++*s; } break; } } buf[i] = '\0'; if (i == 0) { return NULL; } return buf; } CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { CharCodeToUnicode *ctu; char *p; ctu = new CharCodeToUnicode(NULL); p = buf->getCString(); ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, &p, nBits); return ctu; } void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), void *data, int nBits) { char buf[256]; GBool inBFChar, inBFRange; char *tok1, *tok2, *tok3; int nDigits, n1, n2, n3; CharCode oldLen, i; CharCode code1, code2; Unicode u; char uHex[5]; int j; GString *name; FILE *f; nDigits = nBits / 4; inBFChar = inBFRange = gFalse; while ((*getLineFunc)(buf, sizeof(buf), data)) { tok1 = strtok(buf, " \t\r\n"); if (!tok1 || tok1[0] == '%') { continue; } tok2 = strtok(NULL, " \t\r\n"); tok3 = strtok(NULL, " \t\r\n"); if (inBFChar) { if (!strcmp(tok1, "endbfchar")) { inBFChar = gFalse; } else if (tok2) { n1 = strlen(tok1); n2 = strlen(tok2); if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok2[0] == '<' && tok2[n2 - 1] == '>')) { error(-1, "Illegal line in bfchar block in ToUnicode CMap"); continue; } tok1[n1 - 1] = tok2[n2 - 1] = '\0'; if (sscanf(tok1 + 1, "%x", &code1) != 1) { error(-1, "Illegal line in bfchar block in ToUnicode CMap"); continue; } if (code1 >= mapLen) { oldLen = mapLen; mapLen = (code1 + 256) & ~255; map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); for (i = oldLen; i < mapLen; ++i) { map[i] = 0; } } if (n2 == 6) { if (sscanf(tok2 + 1, "%x", &u) != 1) { error(-1, "Illegal line in bfchar block in ToUnicode CMap"); continue; } map[code1] = u; } else { map[code1] = 0; if (sMapLen == sMapSize) { sMapSize += 8; sMap = (CharCodeToUnicodeString *) grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); } sMap[sMapLen].c = code1; sMap[sMapLen].len = (n2 - 2) / 4; for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { strncpy(uHex, tok2 + 1 + j*4, 4); uHex[4] = '\0'; if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { error(-1, "Illegal line in bfchar block in ToUnicode CMap"); } } ++sMapLen; } } else { error(-1, "Illegal bfchar block in ToUnicode CMap"); } } else if (inBFRange) { if (!strcmp(tok1, "endbfrange")) { inBFRange = gFalse; } else if (tok2 && tok3) { n1 = strlen(tok1); n2 = strlen(tok2); n3 = strlen(tok3); if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && tok3[0] == '<' && tok3[n3 - 1] == '>')) { error(-1, "Illegal line in bfrange block in ToUnicode CMap"); continue; } tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; if (sscanf(tok1 + 1, "%x", &code1) != 1 || sscanf(tok2 + 1, "%x", &code2) != 1) { error(-1, "Illegal line in bfrange block in ToUnicode CMap"); continue; } if (code2 >= mapLen) { oldLen = mapLen; mapLen = (code2 + 256) & ~255; map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); for (i = oldLen; i < mapLen; ++i) { map[i] = 0; } } if (n3 == 6) { if (sscanf(tok3 + 1, "%x", &u) != 1) { error(-1, "Illegal line in bfrange block in ToUnicode CMap"); continue; } for (; code1 <= code2; ++code1) { map[code1] = u++; } } else { if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) { sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7; sMap = (CharCodeToUnicodeString *) grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString)); } for (i = 0; code1 <= code2; ++code1, ++i) { map[code1] = 0; sMap[sMapLen].c = code1; sMap[sMapLen].len = (n3 - 2) / 4; for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { strncpy(uHex, tok3 + 1 + j*4, 4); uHex[4] = '\0'; if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { error(-1, "Illegal line in bfrange block in ToUnicode CMap"); } } sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; ++sMapLen; } } } else { error(-1, "Illegal bfrange block in ToUnicode CMap"); } } else if (tok2 && !strcmp(tok2, "usecmap")) { if (tok1[0] == '/') { name = new GString(tok1 + 1); if ((f = globalParams->findToUnicodeFile(name))) { parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits); fclose(f); } else { error(-1, "Couldn't find ToUnicode CMap file for '%s'", name->getCString()); } delete name; } } else if (tok2 && !strcmp(tok2, "beginbfchar")) { inBFChar = gTrue; } else if (tok2 && !strcmp(tok2, "beginbfrange")) { inBFRange = gTrue; } } } CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { CharCode i; collection = collectionA; mapLen = 256; map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); for (i = 0; i < mapLen; ++i) { map[i] = 0; } sMap = NULL; sMapLen = sMapSize = 0; refCnt = 1; } CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA, CharCode mapLenA, GBool copyMap, CharCodeToUnicodeString *sMapA, int sMapLenA) { collection = collectionA; mapLen = mapLenA; if (copyMap) { map = (Unicode *)gmalloc(mapLen * sizeof(Unicode)); memcpy(map, mapA, mapLen * sizeof(Unicode)); } else { map = mapA; } sMap = sMapA; sMapLen = sMapSize = sMapLenA; refCnt = 1; } CharCodeToUnicode::~CharCodeToUnicode() { if (collection) { delete collection; } gfree(map); if (sMap) { gfree(sMap); } } void CharCodeToUnicode::incRefCnt() { ++refCnt; } void CharCodeToUnicode::decRefCnt() { if (--refCnt == 0) { delete this; } } GBool CharCodeToUnicode::match(GString *collectionA) { return collection && !collection->cmp(collectionA); } int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { int i, j; if (c >= mapLen) { return 0; } if (map[c]) { u[0] = map[c]; return 1; } for (i = 0; i < sMapLen; ++i) { if (sMap[i].c == c) { for (j = 0; j < sMap[i].len && j < size; ++j) { u[j] = sMap[i].u[j]; } return j; } } return 0; } //------------------------------------------------------------------------ CIDToUnicodeCache::CIDToUnicodeCache() { int i; for (i = 0; i < cidToUnicodeCacheSize; ++i) { cache[i] = NULL; } } CIDToUnicodeCache::~CIDToUnicodeCache() { int i; for (i = 0; i < cidToUnicodeCacheSize; ++i) { if (cache[i]) { cache[i]->decRefCnt(); } } } CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) { CharCodeToUnicode *ctu; int i, j; if (cache[0] && cache[0]->match(collection)) { cache[0]->incRefCnt(); return cache[0]; } for (i = 1; i < cidToUnicodeCacheSize; ++i) { if (cache[i] && cache[i]->match(collection)) { ctu = cache[i]; for (j = i; j >= 1; --j) { cache[j] = cache[j - 1]; } cache[0] = ctu; ctu->incRefCnt(); return ctu; } } if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) { if (cache[cidToUnicodeCacheSize - 1]) { cache[cidToUnicodeCacheSize - 1]->decRefCnt(); } for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) { cache[j] = cache[j - 1]; } cache[0] = ctu; ctu->incRefCnt(); return ctu; } return NULL; }