summaryrefslogtreecommitdiff
path: root/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
Unidiff
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc160
1 files changed, 78 insertions, 82 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
index 6793398..912981e 100644
--- a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
+++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
@@ -1,11 +1,11 @@
1//======================================================================== 1//========================================================================
2// 2//
3// CharCodeToUnicode.cc 3// CharCodeToUnicode.cc
4// 4//
5// Copyright 2001 Derek B. Noonburg 5// Copyright 2001-2002 Glyph & Cog, LLC
6// 6//
7//======================================================================== 7//========================================================================
8 8
9#ifdef __GNUC__ 9#ifdef __GNUC__
10#pragma implementation 10#pragma implementation
11#endif 11#endif
@@ -15,12 +15,13 @@
15#include <string.h> 15#include <string.h>
16#include "gmem.h" 16#include "gmem.h"
17#include "gfile.h" 17#include "gfile.h"
18#include "GString.h" 18#include "GString.h"
19#include "Error.h" 19#include "Error.h"
20#include "GlobalParams.h" 20#include "GlobalParams.h"
21#include "PSTokenizer.h"
21#include "CharCodeToUnicode.h" 22#include "CharCodeToUnicode.h"
22 23
23//------------------------------------------------------------------------ 24//------------------------------------------------------------------------
24 25
25#define maxUnicodeString 8 26#define maxUnicodeString 8
26 27
@@ -29,12 +30,32 @@ struct CharCodeToUnicodeString {
29 Unicode u[maxUnicodeString]; 30 Unicode u[maxUnicodeString];
30 int len; 31 int len;
31}; 32};
32 33
33//------------------------------------------------------------------------ 34//------------------------------------------------------------------------
34 35
36static int getCharFromString(void *data) {
37 char *p;
38 int c;
39
40 p = *(char **)data;
41 if (*p) {
42 c = *p++;
43 *(char **)data = p;
44 } else {
45 c = EOF;
46 }
47 return c;
48}
49
50static int getCharFromFile(void *data) {
51 return fgetc((FILE *)data);
52}
53
54//------------------------------------------------------------------------
55
35CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { 56CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
36 FILE *f; 57 FILE *f;
37 Unicode *mapA; 58 Unicode *mapA;
38 CharCode size, mapLenA; 59 CharCode size, mapLenA;
39 char buf[64]; 60 char buf[64];
40 Unicode u; 61 Unicode u;
@@ -72,98 +93,83 @@ CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
72} 93}
73 94
74CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { 95CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
75 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); 96 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
76} 97}
77 98
78static char *getLineFromString(char *buf, int size, char **s) {
79 char c;
80 int i;
81
82 i = 0;
83 while (i < size - 1 && **s) {
84 buf[i++] = c = *(*s)++;
85 if (c == '\x0a') {
86 break;
87 }
88 if (c == '\x0d') {
89 if (**s == '\x0a' && i < size - 1) {
90 buf[i++] = '\x0a';
91 ++*s;
92 }
93 break;
94 }
95 }
96 buf[i] = '\0';
97 if (i == 0) {
98 return NULL;
99 }
100 return buf;
101}
102
103CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { 99CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
104 CharCodeToUnicode *ctu; 100 CharCodeToUnicode *ctu;
105 char *p; 101 char *p;
106 102
107 ctu = new CharCodeToUnicode(NULL); 103 ctu = new CharCodeToUnicode(NULL);
108 p = buf->getCString(); 104 p = buf->getCString();
109 ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, 105 ctu->parseCMap1(&getCharFromString, &p, nBits);
110 &p, nBits);
111 return ctu; 106 return ctu;
112} 107}
113 108
114void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), 109void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
115 void *data, int nBits) { 110 int nBits) {
116 char buf[256]; 111 PSTokenizer *pst;
117 GBool inBFChar, inBFRange; 112 char tok1[256], tok2[256], tok3[256];
118 char *tok1, *tok2, *tok3;
119 int nDigits, n1, n2, n3; 113 int nDigits, n1, n2, n3;
120 CharCode oldLen, i; 114 CharCode oldLen, i;
121 CharCode code1, code2; 115 CharCode code1, code2;
122 Unicode u; 116 Unicode u;
123 char uHex[5]; 117 char uHex[5];
124 int j; 118 int j;
125 GString *name; 119 GString *name;
126 FILE *f; 120 FILE *f;
127 121
128 nDigits = nBits / 4; 122 nDigits = nBits / 4;
129 inBFChar = inBFRange = gFalse; 123 pst = new PSTokenizer(getCharFunc, data);
130 while ((*getLineFunc)(buf, sizeof(buf), data)) { 124 pst->getToken(tok1, sizeof(tok1), &n1);
131 tok1 = strtok(buf, " \t\r\n"); 125 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
132 if (!tok1 || tok1[0] == '%') { 126 if (!strcmp(tok2, "usecmap")) {
133 continue; 127 if (tok1[0] == '/') {
134 } 128 name = new GString(tok1 + 1);
135 tok2 = strtok(NULL, " \t\r\n"); 129 if ((f = globalParams->findToUnicodeFile(name))) {
136 tok3 = strtok(NULL, " \t\r\n"); 130 parseCMap1(&getCharFromFile, f, nBits);
137 if (inBFChar) { 131 fclose(f);
138 if (!strcmp(tok1, "endbfchar")) { 132 } else {
139 inBFChar = gFalse; 133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
140 } else if (tok2) { 134 name->getCString());
141 n1 = strlen(tok1); 135 }
142 n2 = strlen(tok2); 136 delete name;
137 }
138 pst->getToken(tok1, sizeof(tok1), &n1);
139 } else if (!strcmp(tok2, "beginbfchar")) {
140 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
141 if (!strcmp(tok1, "endbfchar")) {
142 break;
143 }
144 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
145 !strcmp(tok2, "endbfchar")) {
146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
147 break;
148 }
143 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && 149 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
144 tok2[0] == '<' && tok2[n2 - 1] == '>')) { 150 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
145 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
146 continue; 152 continue;
147 } 153 }
148 tok1[n1 - 1] = tok2[n2 - 1] = '\0'; 154 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
149 if (sscanf(tok1 + 1, "%x", &code1) != 1) { 155 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
150 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
151 continue; 157 continue;
152 } 158 }
153 if (code1 >= mapLen) { 159 if (code1 >= mapLen) {
154 oldLen = mapLen; 160 oldLen = mapLen;
155 mapLen = (code1 + 256) & ~255; 161 mapLen = (code1 + 256) & ~255;
156 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); 162 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
157 for (i = oldLen; i < mapLen; ++i) { 163 for (i = oldLen; i < mapLen; ++i) {
158 map[i] = 0; 164 map[i] = 0;
159 } 165 }
160 } 166 }
161 if (n2 == 6) { 167 if (n2 == 6) {
162 if (sscanf(tok2 + 1, "%x", &u) != 1) { 168 if (sscanf(tok2 + 1, "%x", &u) != 1) {
163 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
164 continue; 170 continue;
165 } 171 }
166 map[code1] = u; 172 map[code1] = u;
167 } else { 173 } else {
168 map[code1] = 0; 174 map[code1] = 0;
169 if (sMapLen == sMapSize) { 175 if (sMapLen == sMapSize) {
@@ -174,50 +180,54 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
174 sMap[sMapLen].c = code1; 180 sMap[sMapLen].c = code1;
175 sMap[sMapLen].len = (n2 - 2) / 4; 181 sMap[sMapLen].len = (n2 - 2) / 4;
176 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { 182 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
177 strncpy(uHex, tok2 + 1 + j*4, 4); 183 strncpy(uHex, tok2 + 1 + j*4, 4);
178 uHex[4] = '\0'; 184 uHex[4] = '\0';
179 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { 185 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
180 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
181 } 187 }
182 } 188 }
183 ++sMapLen; 189 ++sMapLen;
184 } 190 }
185 } else {
186 error(-1, "Illegal bfchar block in ToUnicode CMap");
187 } 191 }
188 } else if (inBFRange) { 192 pst->getToken(tok1, sizeof(tok1), &n1);
189 if (!strcmp(tok1, "endbfrange")) { 193 } else if (!strcmp(tok2, "beginbfrange")) {
190 inBFRange = gFalse; 194 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
191 } else if (tok2 && tok3) { 195 if (!strcmp(tok1, "endbfrange")) {
192 n1 = strlen(tok1); 196 break;
193 n2 = strlen(tok2); 197 }
194 n3 = strlen(tok3); 198 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
199 !strcmp(tok2, "endbfrange") ||
200 !pst->getToken(tok3, sizeof(tok3), &n3) ||
201 !strcmp(tok3, "endbfrange")) {
202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
203 break;
204 }
195 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && 205 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
196 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && 206 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
197 tok3[0] == '<' && tok3[n3 - 1] == '>')) { 207 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
198 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
199 continue; 209 continue;
200 } 210 }
201 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; 211 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
202 if (sscanf(tok1 + 1, "%x", &code1) != 1 || 212 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
203 sscanf(tok2 + 1, "%x", &code2) != 1) { 213 sscanf(tok2 + 1, "%x", &code2) != 1) {
204 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
205 continue; 215 continue;
206 } 216 }
207 if (code2 >= mapLen) { 217 if (code2 >= mapLen) {
208 oldLen = mapLen; 218 oldLen = mapLen;
209 mapLen = (code2 + 256) & ~255; 219 mapLen = (code2 + 256) & ~255;
210 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode)); 220 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
211 for (i = oldLen; i < mapLen; ++i) { 221 for (i = oldLen; i < mapLen; ++i) {
212 map[i] = 0; 222 map[i] = 0;
213 } 223 }
214 } 224 }
215 if (n3 == 6) { 225 if (n3 == 6) {
216 if (sscanf(tok3 + 1, "%x", &u) != 1) { 226 if (sscanf(tok3 + 1, "%x", &u) != 1) {
217 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
218 continue; 228 continue;
219 } 229 }
220 for (; code1 <= code2; ++code1) { 230 for (; code1 <= code2; ++code1) {
221 map[code1] = u++; 231 map[code1] = u++;
222 } 232 }
223 } else { 233 } else {
@@ -231,40 +241,26 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
231 sMap[sMapLen].c = code1; 241 sMap[sMapLen].c = code1;
232 sMap[sMapLen].len = (n3 - 2) / 4; 242 sMap[sMapLen].len = (n3 - 2) / 4;
233 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { 243 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
234 strncpy(uHex, tok3 + 1 + j*4, 4); 244 strncpy(uHex, tok3 + 1 + j*4, 4);
235 uHex[4] = '\0'; 245 uHex[4] = '\0';
236 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { 246 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
237 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
238 } 248 }
239 } 249 }
240 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; 250 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
241 ++sMapLen; 251 ++sMapLen;
242 } 252 }
243 } 253 }
244 } else {
245 error(-1, "Illegal bfrange block in ToUnicode CMap");
246 } 254 }
247 } else if (tok2 && !strcmp(tok2, "usecmap")) { 255 pst->getToken(tok1, sizeof(tok1), &n1);
248 if (tok1[0] == '/') { 256 } else {
249 name = new GString(tok1 + 1); 257 strcpy(tok1, tok2);
250 if ((f = globalParams->findToUnicodeFile(name))) {
251 parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits);
252 fclose(f);
253 } else {
254 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
255 name->getCString());
256 }
257 delete name;
258 }
259 } else if (tok2 && !strcmp(tok2, "beginbfchar")) {
260 inBFChar = gTrue;
261 } else if (tok2 && !strcmp(tok2, "beginbfrange")) {
262 inBFRange = gTrue;
263 } 258 }
264 } 259 }
260 delete pst;
265} 261}
266 262
267CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { 263CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
268 CharCode i; 264 CharCode i;
269 265
270 collection = collectionA; 266 collection = collectionA;