summaryrefslogtreecommitdiff
path: root/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
Unidiff
Diffstat (limited to 'noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc160
1 files changed, 78 insertions, 82 deletions
diff --git a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
index 6793398..912981e 100644
--- a/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
+++ b/noncore/unsupported/qpdf/xpdf/CharCodeToUnicode.cc
@@ -2,7 +2,7 @@
2// 2//
3// CharCodeToUnicode.cc 3// CharCodeToUnicode.cc
4// 4//
5// Copyright 2001 Derek B. Noonburg 5// Copyright 2001-2002 Glyph & Cog, LLC
6// 6//
7//======================================================================== 7//========================================================================
8 8
@@ -18,6 +18,7 @@
18#include "GString.h" 18#include "GString.h"
19#include "Error.h" 19#include "Error.h"
20#include "GlobalParams.h" 20#include "GlobalParams.h"
21#include "PSTokenizer.h"
21#include "CharCodeToUnicode.h" 22#include "CharCodeToUnicode.h"
22 23
23//------------------------------------------------------------------------ 24//------------------------------------------------------------------------
@@ -32,6 +33,26 @@ struct CharCodeToUnicodeString {
32 33
33//------------------------------------------------------------------------ 34//------------------------------------------------------------------------
34 35
36static int getCharFromString(void *data) {
37 char *p;
38 int c;
39
40 p = *(char **)data;
41 if (*p) {
42 c = *p++;
43 *(char **)data = p;
44 } else {
45 c = EOF;
46 }
47 return c;
48}
49
50static int getCharFromFile(void *data) {
51 return fgetc((FILE *)data);
52}
53
54//------------------------------------------------------------------------
55
35CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) { 56CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
36 FILE *f; 57 FILE *f;
37 Unicode *mapA; 58 Unicode *mapA;
@@ -75,47 +96,20 @@ CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
75 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0); 96 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
76} 97}
77 98
78static char *getLineFromString(char *buf, int size, char **s) {
79 char c;
80 int i;
81
82 i = 0;
83 while (i < size - 1 && **s) {
84 buf[i++] = c = *(*s)++;
85 if (c == '\x0a') {
86 break;
87 }
88 if (c == '\x0d') {
89 if (**s == '\x0a' && i < size - 1) {
90 buf[i++] = '\x0a';
91 ++*s;
92 }
93 break;
94 }
95 }
96 buf[i] = '\0';
97 if (i == 0) {
98 return NULL;
99 }
100 return buf;
101}
102
103CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { 99CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
104 CharCodeToUnicode *ctu; 100 CharCodeToUnicode *ctu;
105 char *p; 101 char *p;
106 102
107 ctu = new CharCodeToUnicode(NULL); 103 ctu = new CharCodeToUnicode(NULL);
108 p = buf->getCString(); 104 p = buf->getCString();
109 ctu->parseCMap1((char *(*)(char *, int, void *))&getLineFromString, 105 ctu->parseCMap1(&getCharFromString, &p, nBits);
110 &p, nBits);
111 return ctu; 106 return ctu;
112} 107}
113 108
114void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *), 109void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
115 void *data, int nBits) { 110 int nBits) {
116 char buf[256]; 111 PSTokenizer *pst;
117 GBool inBFChar, inBFRange; 112 char tok1[256], tok2[256], tok3[256];
118 char *tok1, *tok2, *tok3;
119 int nDigits, n1, n2, n3; 113 int nDigits, n1, n2, n3;
120 CharCode oldLen, i; 114 CharCode oldLen, i;
121 CharCode code1, code2; 115 CharCode code1, code2;
@@ -126,28 +120,40 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
126 FILE *f; 120 FILE *f;
127 121
128 nDigits = nBits / 4; 122 nDigits = nBits / 4;
129 inBFChar = inBFRange = gFalse; 123 pst = new PSTokenizer(getCharFunc, data);
130 while ((*getLineFunc)(buf, sizeof(buf), data)) { 124 pst->getToken(tok1, sizeof(tok1), &n1);
131 tok1 = strtok(buf, " \t\r\n"); 125 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
132 if (!tok1 || tok1[0] == '%') { 126 if (!strcmp(tok2, "usecmap")) {
133 continue; 127 if (tok1[0] == '/') {
134 } 128 name = new GString(tok1 + 1);
135 tok2 = strtok(NULL, " \t\r\n"); 129 if ((f = globalParams->findToUnicodeFile(name))) {
136 tok3 = strtok(NULL, " \t\r\n"); 130 parseCMap1(&getCharFromFile, f, nBits);
137 if (inBFChar) { 131 fclose(f);
138 if (!strcmp(tok1, "endbfchar")) { 132 } else {
139 inBFChar = gFalse; 133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
140 } else if (tok2) { 134 name->getCString());
141 n1 = strlen(tok1); 135 }
142 n2 = strlen(tok2); 136 delete name;
137 }
138 pst->getToken(tok1, sizeof(tok1), &n1);
139 } else if (!strcmp(tok2, "beginbfchar")) {
140 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
141 if (!strcmp(tok1, "endbfchar")) {
142 break;
143 }
144 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
145 !strcmp(tok2, "endbfchar")) {
146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
147 break;
148 }
143 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && 149 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
144 tok2[0] == '<' && tok2[n2 - 1] == '>')) { 150 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
145 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
146 continue; 152 continue;
147 } 153 }
148 tok1[n1 - 1] = tok2[n2 - 1] = '\0'; 154 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
149 if (sscanf(tok1 + 1, "%x", &code1) != 1) { 155 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
150 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
151 continue; 157 continue;
152 } 158 }
153 if (code1 >= mapLen) { 159 if (code1 >= mapLen) {
@@ -160,7 +166,7 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
160 } 166 }
161 if (n2 == 6) { 167 if (n2 == 6) {
162 if (sscanf(tok2 + 1, "%x", &u) != 1) { 168 if (sscanf(tok2 + 1, "%x", &u) != 1) {
163 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
164 continue; 170 continue;
165 } 171 }
166 map[code1] = u; 172 map[code1] = u;
@@ -177,31 +183,35 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
177 strncpy(uHex, tok2 + 1 + j*4, 4); 183 strncpy(uHex, tok2 + 1 + j*4, 4);
178 uHex[4] = '\0'; 184 uHex[4] = '\0';
179 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { 185 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
180 error(-1, "Illegal line in bfchar block in ToUnicode CMap"); 186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
181 } 187 }
182 } 188 }
183 ++sMapLen; 189 ++sMapLen;
184 } 190 }
185 } else {
186 error(-1, "Illegal bfchar block in ToUnicode CMap");
187 } 191 }
188 } else if (inBFRange) { 192 pst->getToken(tok1, sizeof(tok1), &n1);
189 if (!strcmp(tok1, "endbfrange")) { 193 } else if (!strcmp(tok2, "beginbfrange")) {
190 inBFRange = gFalse; 194 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
191 } else if (tok2 && tok3) { 195 if (!strcmp(tok1, "endbfrange")) {
192 n1 = strlen(tok1); 196 break;
193 n2 = strlen(tok2); 197 }
194 n3 = strlen(tok3); 198 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
199 !strcmp(tok2, "endbfrange") ||
200 !pst->getToken(tok3, sizeof(tok3), &n3) ||
201 !strcmp(tok3, "endbfrange")) {
202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
203 break;
204 }
195 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && 205 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
196 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && 206 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
197 tok3[0] == '<' && tok3[n3 - 1] == '>')) { 207 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
198 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
199 continue; 209 continue;
200 } 210 }
201 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0'; 211 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
202 if (sscanf(tok1 + 1, "%x", &code1) != 1 || 212 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
203 sscanf(tok2 + 1, "%x", &code2) != 1) { 213 sscanf(tok2 + 1, "%x", &code2) != 1) {
204 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
205 continue; 215 continue;
206 } 216 }
207 if (code2 >= mapLen) { 217 if (code2 >= mapLen) {
@@ -214,7 +224,7 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
214 } 224 }
215 if (n3 == 6) { 225 if (n3 == 6) {
216 if (sscanf(tok3 + 1, "%x", &u) != 1) { 226 if (sscanf(tok3 + 1, "%x", &u) != 1) {
217 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
218 continue; 228 continue;
219 } 229 }
220 for (; code1 <= code2; ++code1) { 230 for (; code1 <= code2; ++code1) {
@@ -234,34 +244,20 @@ void CharCodeToUnicode::parseCMap1(char *(*getLineFunc)(char *, int, void *),
234 strncpy(uHex, tok3 + 1 + j*4, 4); 244 strncpy(uHex, tok3 + 1 + j*4, 4);
235 uHex[4] = '\0'; 245 uHex[4] = '\0';
236 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { 246 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
237 error(-1, "Illegal line in bfrange block in ToUnicode CMap"); 247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
238 } 248 }
239 } 249 }
240 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i; 250 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
241 ++sMapLen; 251 ++sMapLen;
242 } 252 }
243 } 253 }
244 } else {
245 error(-1, "Illegal bfrange block in ToUnicode CMap");
246 } 254 }
247 } else if (tok2 && !strcmp(tok2, "usecmap")) { 255 pst->getToken(tok1, sizeof(tok1), &n1);
248 if (tok1[0] == '/') { 256 } else {
249 name = new GString(tok1 + 1); 257 strcpy(tok1, tok2);
250 if ((f = globalParams->findToUnicodeFile(name))) {
251 parseCMap1((char *(*)(char *, int, void *))&getLine, f, nBits);
252 fclose(f);
253 } else {
254 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
255 name->getCString());
256 }
257 delete name;
258 }
259 } else if (tok2 && !strcmp(tok2, "beginbfchar")) {
260 inBFChar = gTrue;
261 } else if (tok2 && !strcmp(tok2, "beginbfrange")) {
262 inBFRange = gTrue;
263 } 258 }
264 } 259 }
260 delete pst;
265} 261}
266 262
267CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) { 263CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {