Diffstat (limited to 'noncore/apps/opie-reader/striphtml.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/apps/opie-reader/striphtml.cpp | 1249 |
1 files changed, 1249 insertions, 0 deletions
diff --git a/noncore/apps/opie-reader/striphtml.cpp b/noncore/apps/opie-reader/striphtml.cpp new file mode 100644 index 0000000..9cd6a0d --- a/dev/null +++ b/noncore/apps/opie-reader/striphtml.cpp | |||
@@ -0,0 +1,1249 @@ | |||
1 | #include <qmap.h> | ||
2 | #include <qfileinfo.h> | ||
3 | #include <qtextstream.h> | ||
4 | #include <qdir.h> | ||
5 | #include "useqpe.h" | ||
6 | #ifdef USEQPE | ||
7 | #include <qpe/global.h> | ||
8 | #endif | ||
9 | #include "CDrawBuffer.h" | ||
10 | #include "striphtml.h" | ||
11 | #include "hrule.h" | ||
12 | #include "util.h" | ||
13 | |||
14 | #include <qregexp.h> | ||
15 | #include <qimage.h> | ||
16 | #include <qpixmap.h> | ||
17 | //#include <qprogressdialog.h> | ||
18 | //#include <qapplication.h> | ||
19 | |||
20 | static unsigned char h2i(unsigned char c) | ||
21 | { | ||
22 | unsigned char ret = 0; | ||
23 | if ('0' <= c && c <= '9') | ||
24 | { | ||
25 | ret = c - '0'; | ||
26 | } | ||
27 | else if ('a' <= c && c <= 'f') | ||
28 | { | ||
29 | ret = c - 'a' + 10; | ||
30 | } | ||
31 | return ret; | ||
32 | } | ||
33 | |||
34 | static void parse_color(const QString& attr, unsigned char& r, unsigned char& g, unsigned char& b) | ||
35 | { | ||
36 | r = g = b = 0; | ||
37 | if (attr.length() >= 7 && attr[0] == '#') | ||
38 | { | ||
39 | r = h2i(attr[1].unicode()); | ||
40 | r = 16*r + h2i(attr[2].unicode()); | ||
41 | g = h2i(attr[3].unicode()); | ||
42 | g = 16*g + h2i(attr[4].unicode()); | ||
43 | b = h2i(attr[5].unicode()); | ||
44 | b = 16*b + h2i(attr[6].unicode()); | ||
45 | } | ||
46 | else if (attr == "red") | ||
47 | { | ||
48 | r = 255; | ||
49 | } | ||
50 | else if (attr == "green") | ||
51 | { | ||
52 | g = 255; | ||
53 | } | ||
54 | else if (attr == "blue") | ||
55 | { | ||
56 | b = 255; | ||
57 | } | ||
58 | else if (attr == "white") | ||
59 | { | ||
60 | r = g = b = 255; | ||
61 | } | ||
62 | else if (attr == "black") | ||
63 | { | ||
64 | r = g = b = 0; | ||
65 | } | ||
66 | else | ||
67 | { | ||
68 | qDebug("Don't understand colour \"%s\"", (const char*)attr); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | CNavigation_base<htmlmark> striphtml::m_nav; | ||
73 | |||
74 | void striphtml::skipblock(const QString& _ent) | ||
75 | { | ||
76 | tchar ch = '>'; | ||
77 | CStyle dummy; | ||
78 | QString ent; | ||
79 | unsigned long pos; | ||
80 | do | ||
81 | { | ||
82 | while (ch != '<' && ch != UEOF) | ||
83 | { | ||
84 | mygetch(ch, dummy, pos); | ||
85 | } | ||
86 | |||
87 | ch = skip_ws(); | ||
88 | |||
89 | ent = getname(ch, " >").lower(); | ||
90 | qDebug("Skipblock:%s", (const char*)ent); | ||
91 | } while (ent != _ent && ch != UEOF); | ||
92 | } | ||
93 | |||
94 | void striphtml::locate(unsigned int n) | ||
95 | { | ||
96 | m_inblock = false; | ||
97 | text_q = ""; | ||
98 | forcecentre = false; | ||
99 | ignorespace = false; | ||
100 | while (!stylestack.isEmpty()) stylestack.pop(); | ||
101 | currentstyle.unset(); | ||
102 | qDebug("striphtml:locating:%u", n); | ||
103 | parent->locate(n); | ||
104 | } | ||
105 | |||
106 | int striphtml::getpara(CBuffer& buff, unsigned long& startpos) | ||
107 | { | ||
108 | tchar ch; | ||
109 | CStyle sty; | ||
110 | unsigned long pos; | ||
111 | int i = 0; | ||
112 | parent->getch(ch, sty, startpos); | ||
113 | pos = startpos; | ||
114 | while (1) | ||
115 | { | ||
116 | if (ch == 10 && !isPre) | ||
117 | { | ||
118 | ch = ' '; | ||
119 | } | ||
120 | if (ch == UEOF) | ||
121 | { | ||
122 | // qDebug("EOF:%d:%u", i, pos); | ||
123 | buff[i] = 0; | ||
124 | if (i == 0) | ||
125 | { | ||
126 | i = -1; | ||
127 | } | ||
128 | return i; | ||
129 | } | ||
130 | else if (ch == '<') | ||
131 | { | ||
132 | tchar ch2 = skip_ws(); | ||
133 | QString ent = getname(ch2, " >"); | ||
134 | ent = ent.lower(); | ||
135 | // qDebug("ent:%s", (const char*)ent); | ||
136 | if (ent == "a") | ||
137 | { | ||
138 | buff[i++] = '<'; | ||
139 | buff[i++] = 'a'; | ||
140 | buff[i++] = ch2; | ||
141 | // buff[i] = 0; qDebug("ANCHOR:%s", (const char*)toQString(buff.data())); | ||
142 | } | ||
143 | else if (ent == "/a") | ||
144 | { | ||
145 | buff[i++] = '<'; | ||
146 | buff[i++] = '/'; | ||
147 | buff[i++] = 'a'; | ||
148 | buff[i++] = ch2; | ||
149 | // buff[i] = 0; qDebug("/ANCHOR:%s", (const char*)toQString(buff.data())); | ||
150 | } | ||
151 | else if (ent == "div") | ||
152 | { | ||
153 | // buff[i] = 0; qDebug("DIV:%s", (const char*)toQString(buff.data())); | ||
154 | if (i == 0) | ||
155 | { | ||
156 | buff[i++] = '<'; | ||
157 | buff[i++] = 'd'; | ||
158 | buff[i++] = 'i'; | ||
159 | buff[i++] = 'v'; | ||
160 | buff[i++] = ' '; | ||
161 | buff[i++] = ch2; | ||
162 | while (ch2 != '>' && ch2 != UEOF && i < 2048) | ||
163 | { | ||
164 | parent->getch(ch2, sty, pos); | ||
165 | buff[i++] = ch2; | ||
166 | } | ||
167 | } | ||
168 | else | ||
169 | { | ||
170 | locate(pos); | ||
171 | } | ||
172 | buff[i++] = 0; | ||
173 | // qDebug("DIV:%s", (const char*)toQString(buff.data())); | ||
174 | return i; | ||
175 | } | ||
176 | else if (ent == "p" || (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1)) | ||
177 | { | ||
178 | buff[i++] = 0; | ||
179 | while (ch2 != '>' && ch2 != UEOF) | ||
180 | { | ||
181 | parent->getch(ch2, sty, pos); | ||
182 | } | ||
183 | return i; | ||
184 | } | ||
185 | else | ||
186 | { | ||
187 | while (ch2 != '>' && ch2 != UEOF) | ||
188 | { | ||
189 | parent->getch(ch2, sty, pos); | ||
190 | } | ||
191 | } | ||
192 | } | ||
193 | else | ||
194 | { | ||
195 | buff[i++] = ch; | ||
196 | } | ||
197 | parent->getch(ch, sty, pos); | ||
198 | } | ||
199 | } | ||
200 | |||
201 | QString striphtml::dehtml(const QString& _info) | ||
202 | { | ||
203 | QString info; | ||
204 | for (int i = 0; i < _info.length(); i++) | ||
205 | { | ||
206 | tchar ch = _info[i]; | ||
207 | if (ch == '%') | ||
208 | { | ||
209 | ch = 0; | ||
210 | for (int j = 0; j < 2; j++) | ||
211 | { | ||
212 | ch <<= 4; | ||
213 | tchar ch1 = _info[++i]; | ||
214 | if ('0' <= ch1 && ch1 <= '9') | ||
215 | { | ||
216 | ch += ch1 - '0'; | ||
217 | } | ||
218 | else if ('a' <= ch1 && ch1 <= 'f') | ||
219 | { | ||
220 | ch += ch1 - 'a' + 10; | ||
221 | } | ||
222 | else if ('A' <= ch1 && ch1 <= 'F') | ||
223 | { | ||
224 | ch += ch1 - 'A' + 10; | ||
225 | } | ||
226 | } | ||
227 | } | ||
228 | info += ch; | ||
229 | } | ||
230 | return info; | ||
231 | } | ||
232 | |||
233 | bool striphtml::findanchor(const QString& _info) | ||
234 | { | ||
235 | // QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true); | ||
236 | // QProgressBar dlg(0); | ||
237 | #if defined(USEQPE) || defined(_WINDOWS) | ||
238 | QString info; | ||
239 | for (int i = 0; i < _info.length(); i++) | ||
240 | { | ||
241 | tchar ch = _info[i]; | ||
242 | if (QString(".^$[]*+?").find(ch) != -1) | ||
243 | { | ||
244 | info += '\\'; | ||
245 | } | ||
246 | info += ch; | ||
247 | } | ||
248 | #else | ||
249 | QString info = QRegExp::escape(_info); | ||
250 | #endif | ||
251 | qDebug("Adjusted searchstring:%s", (const char*)info); | ||
252 | QString sname("<[Aa][^>]*[ \t]+[Nn][Aa][Mm][Ee][ \t]*=[ \t]*\"?"); | ||
253 | sname += info + "\"?[ \t>]"; | ||
254 | QString sid("<[A-Za-z][^>]*[ \t]+[Ii][Dd][ \t]*=[ \t]*\"?"); | ||
255 | sid += info+"\"?[ \t>]"; | ||
256 | #ifdef USEQPE | ||
257 | QRegExp name(sname); | ||
258 | QRegExp id(sid); | ||
259 | #else | ||
260 | QRegExp name(sname+"|"+sid); | ||
261 | #endif | ||
262 | bool ret = true; | ||
263 | locate(0); | ||
264 | unsigned long pos = 0; | ||
265 | unsigned long startpos = 0; | ||
266 | int offset; | ||
267 | CBuffer test; | ||
268 | qDebug("striphtml::findanchor"); | ||
269 | // dlg.show(); | ||
270 | if (getpara(test, pos) >= 0) | ||
271 | { | ||
272 | while (1) | ||
273 | { | ||
274 | // qApp->processEvents(); | ||
275 | if ((offset = name.match(toQString(test.data()))) != -1) break; | ||
276 | #ifdef USEQPE | ||
277 | if ((offset = id.match(toQString(test.data()))) != -1) break; | ||
278 | #endif | ||
279 | if (getpara(test, pos) < 0) | ||
280 | { | ||
281 | locate(startpos); | ||
282 | qDebug("Not found"); | ||
283 | return false; | ||
284 | } | ||
285 | } | ||
286 | locate(pos); | ||
287 | qDebug("Found"); | ||
288 | ret = true; | ||
289 | } | ||
290 | else | ||
291 | { | ||
292 | locate(startpos); | ||
293 | qDebug("Not found"); | ||
294 | ret = false; | ||
295 | } | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false) | ||
300 | { | ||
301 | href2filepos = new QMap<QString, unsigned long>; | ||
302 | id2href = new QMap<unsigned long, QString>; | ||
303 | } | ||
304 | |||
305 | striphtml::~striphtml() | ||
306 | { | ||
307 | if (entmap != NULL) delete entmap; | ||
308 | delete href2filepos; | ||
309 | delete id2href; | ||
310 | } | ||
311 | |||
312 | void striphtml::initentmap() | ||
313 | { | ||
314 | entmap = new QMap<QString, tchar>; | ||
315 | QString fname(QTReaderUtil::getPluginPath("data")); | ||
316 | QFileInfo fi; | ||
317 | fi.setFile(fname, "HTMLentities"); | ||
318 | if (fi.exists()) | ||
319 | { | ||
320 | fname = fi.absFilePath(); | ||
321 | |||
322 | QFile fl(fname); | ||
323 | if (fl.open(IO_ReadOnly)) | ||
324 | { | ||
325 | QTextStream t(&fl); | ||
326 | QString key, value; | ||
327 | while (!t.eof()) | ||
328 | { | ||
329 | QString data = t.readLine(); | ||
330 | int colon = data.find(':'); | ||
331 | if (colon > 0) | ||
332 | { | ||
333 | QString key = data.left(colon); | ||
334 | QString value = data.right(data.length()-colon-1); | ||
335 | bool ok; | ||
336 | int ret = value.toInt(&ok); | ||
337 | if (ok) | ||
338 | { | ||
339 | (*entmap)[key] = ret; | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | fl.close(); | ||
344 | } | ||
345 | } | ||
346 | } | ||
347 | |||
348 | unsigned short striphtml::skip_ws() | ||
349 | { | ||
350 | tchar ch; | ||
351 | CStyle sty; | ||
352 | unsigned long dummy; | ||
353 | do | ||
354 | { | ||
355 | mygetch(ch, sty, dummy); | ||
356 | } | ||
357 | while (ch < 33 && ch != UEOF); | ||
358 | return ch; | ||
359 | } | ||
360 | |||
361 | unsigned short striphtml::skip_ws_end() | ||
362 | { | ||
363 | unsigned long dummy; | ||
364 | return skip_ws_end(dummy); | ||
365 | } | ||
366 | |||
367 | unsigned short striphtml::skip_ws_end(unsigned long& pos) | ||
368 | { | ||
369 | tchar ch; | ||
370 | CStyle sty; | ||
371 | do | ||
372 | { | ||
373 | mygetch(ch, sty, pos); | ||
374 | } | ||
375 | while (ch != '>' && ch != UEOF); | ||
376 | return ch; | ||
377 | } | ||
378 | |||
379 | QString striphtml::getname(tchar& ch, const QString& nd) | ||
380 | { | ||
381 | QString nm = ""; | ||
382 | // nm += ch; | ||
383 | CStyle sty; | ||
384 | unsigned long dummy; | ||
385 | while (1) | ||
386 | { | ||
387 | // if ( QChar(ch).isLetterOrNumber() ) | ||
388 | if (ch != UEOF && nd.find(ch, 0, false) == -1 && nm.length() < 2048) | ||
389 | { | ||
390 | nm += ch; | ||
391 | } | ||
392 | else | ||
393 | { | ||
394 | break; | ||
395 | } | ||
396 | mygetch(ch, sty, dummy); | ||
397 | } | ||
398 | return nm; | ||
399 | } | ||
400 | |||
401 | QString striphtml::getattr(tchar& ch) | ||
402 | { | ||
403 | QString ref; | ||
404 | CStyle sty; | ||
405 | unsigned long pos; | ||
406 | if (ch == ' ') ch = skip_ws(); | ||
407 | if (ch == '=') | ||
408 | { | ||
409 | ch = skip_ws(); | ||
410 | if (ch == '"') | ||
411 | { | ||
412 | mygetch(ch, sty, pos); | ||
413 | ref = getname(ch, "\""); | ||
414 | ch = skip_ws(); | ||
415 | } | ||
416 | else | ||
417 | { | ||
418 | ref = getname(ch, " >"); | ||
419 | if (ch == ' ') ch = skip_ws(); | ||
420 | } | ||
421 | } | ||
422 | return ref; | ||
423 | } | ||
424 | |||
425 | linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& nm) | ||
426 | { | ||
427 | #if defined(USEQPE) || defined(_WINDOWS) | ||
428 | QMap<unsigned long, QString>::Iterator hrefit = id2href->find(n); | ||
429 | #else | ||
430 | QMap<unsigned long, QString>::iterator hrefit = id2href->find(n); | ||
431 | #endif | ||
432 | if (hrefit == id2href->end()) | ||
433 | { | ||
434 | return eNone; | ||
435 | } | ||
436 | QString href = *hrefit; | ||
437 | #if defined(USEQPE) || defined(_WINDOWS) | ||
438 | QMap<QString, unsigned long>::Iterator fpit = href2filepos->find(href); | ||
439 | #else | ||
440 | QMap<QString, unsigned long>::iterator fpit = href2filepos->find(href); | ||
441 | #endif | ||
442 | if (fpit == href2filepos->end()) | ||
443 | { | ||
444 | if (href == "history.back()") | ||
445 | { | ||
446 | QString fc = currentfile; | ||
447 | unsigned long loc; | ||
448 | htmlmark m(fc, loc); | ||
449 | linkType ret = (m_nav.back(m)) ? eFile : eNone; | ||
450 | if (fc == m.filename()) | ||
451 | { | ||
452 | if ((ret & eFile) != 0) | ||
453 | { | ||
454 | locate(m.posn()); | ||
455 | return eLink; | ||
456 | } | ||
457 | } | ||
458 | return eNone; | ||
459 | } | ||
460 | qDebug("Searching for %s", (const char*)href); | ||
461 | |||
462 | |||
463 | QString file, name; | ||
464 | |||
465 | int colon = href.find('#'); | ||
466 | if (colon >= 0) | ||
467 | { | ||
468 | file = dehtml(href.left(colon)); | ||
469 | name = dehtml(href.right(href.length()-colon-1)); | ||
470 | } | ||
471 | else | ||
472 | { | ||
473 | file = dehtml(href); | ||
474 | } | ||
475 | |||
476 | qDebug("File:%s", (const char*)file); | ||
477 | qDebug("Name:%s", (const char*)name); | ||
478 | |||
479 | |||
480 | if (file.isEmpty()) | ||
481 | { | ||
482 | fpit = href2filepos->find(name); | ||
483 | if (fpit != href2filepos->end()) | ||
484 | { | ||
485 | locate(*fpit); | ||
486 | return eLink; | ||
487 | } | ||
488 | else | ||
489 | { | ||
490 | // nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\""; | ||
491 | qDebug("Do a search for:%s", (const char*)name); | ||
492 | parent->locate(0); | ||
493 | findanchor(name); | ||
494 | return eLink; | ||
495 | } | ||
496 | } | ||
497 | else | ||
498 | // if (href.find('#') == -1) | ||
499 | { | ||
500 | if (m_bchm) | ||
501 | { | ||
502 | w = "/"+file; | ||
503 | nm = name; | ||
504 | return eFile; | ||
505 | } | ||
506 | else | ||
507 | { | ||
508 | QFileInfo f(currentfile); | ||
509 | QFileInfo f1(f.dir(true), file); | ||
510 | if (f1.exists()) | ||
511 | { | ||
512 | w = f1.absFilePath(); | ||
513 | nm = name; | ||
514 | } | ||
515 | else | ||
516 | { | ||
517 | w = file; | ||
518 | } | ||
519 | return (f1.exists() ? eFile : eNone); | ||
520 | } | ||
521 | } | ||
522 | return eNone; | ||
523 | } | ||
524 | locate(*fpit); | ||
525 | // parent->locate((*href2filepos)[(*id2href)[n]]); | ||
526 | return eLink; | ||
527 | } | ||
528 | /* | ||
529 | unsigned short striphtml::parse_m() | ||
530 | { | ||
531 | tchar ch; | ||
532 | CStyle sty; | ||
533 | unsigned long dummy; | ||
534 | mygetch(ch, sty, dummy); | ||
535 | if (ch == 'm' || ch == 'M') | ||
536 | { | ||
537 | ch = skip_ws_end(); | ||
538 | if (ch == '>') | ||
539 | { | ||
540 | return 0; | ||
541 | } | ||
542 | } | ||
543 | return ch; | ||
544 | } | ||
545 | */ | ||
546 | |||
547 | void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos) | ||
548 | { | ||
549 | if (!text_q.isEmpty() && !m_inblock) | ||
550 | { | ||
551 | ch = text_q[0].unicode(); | ||
552 | text_q = text_q.right(text_q.length()-1); | ||
553 | } | ||
554 | else | ||
555 | { | ||
556 | parent->getch(ch, sty, pos); | ||
557 | if (ch == '<') | ||
558 | { | ||
559 | m_inblock = true; | ||
560 | } | ||
561 | if (ch == '>') | ||
562 | { | ||
563 | m_inblock = false; | ||
564 | } | ||
565 | } | ||
566 | if (ch == 10 && !isPre) | ||
567 | { | ||
568 | ch = ' '; | ||
569 | } | ||
570 | } | ||
571 | |||
572 | void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long pos) | ||
573 | { | ||
574 | /* | ||
575 | int count = 0; | ||
576 | for (CList<CStyle>::iterator iter = stylestack.begin(); iter != stylestack.end(); ++iter) | ||
577 | { | ||
578 | count++; | ||
579 | } | ||
580 | qDebug("Currently have %u styles", count); | ||
581 | */ | ||
582 | if (stylestack.isEmpty()) | ||
583 | { | ||
584 | currentstyle.unset(); | ||
585 | } | ||
586 | else | ||
587 | { | ||
588 | qDebug("Using stack style"); | ||
589 | currentstyle = stylestack.first(); | ||
590 | } | ||
591 | if (forcecentre) | ||
592 | { | ||
593 | currentstyle.setCentreJustify(); | ||
594 | } | ||
595 | if (ch == ' ') ch = skip_ws(); | ||
596 | while (ch != '>' && ch != UEOF) | ||
597 | { | ||
598 | QString ent = getname(ch, " =>").lower(); | ||
599 | QString attr = getattr(ch).lower(); | ||
600 | //qDebug("(Paragraph)Entity:%s Attr:%s", (const char*)ent, (const char*)attr); | ||
601 | if (ent == "align") | ||
602 | { | ||
603 | if (attr == "center") | ||
604 | { | ||
605 | currentstyle.setCentreJustify(); | ||
606 | } | ||
607 | if (attr == "right") | ||
608 | { | ||
609 | currentstyle.setRightJustify(); | ||
610 | } | ||
611 | } | ||
612 | if (ent == "id") | ||
613 | { | ||
614 | (*href2filepos)[attr] = pos; | ||
615 | } | ||
616 | if (ent == "bgcolor") | ||
617 | { | ||
618 | qDebug("Got paper colour:%s", (const char*)attr); | ||
619 | unsigned char r,g,b; | ||
620 | parse_color(attr, r, g, b); | ||
621 | currentstyle.setPaper(r, g, b); | ||
622 | } | ||
623 | if (ent == "color") | ||
624 | { | ||
625 | qDebug("Got foreground colour:%s", (const char*)attr); | ||
626 | unsigned char r,g,b; | ||
627 | parse_color(attr, r, g, b); | ||
628 | currentstyle.setColour(r, g, b); | ||
629 | } | ||
630 | if (ch == ' ') ch = skip_ws(); | ||
631 | } | ||
632 | ch = 10; | ||
633 | } | ||
634 | |||
635 | void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) | ||
636 | { | ||
637 | currentstyle.clearPicture(); | ||
638 | if (!q.isEmpty()) | ||
639 | { | ||
640 | ch = q[0].unicode(); | ||
641 | if (ch == '-') | ||
642 | { | ||
643 | tchar w = q[1].unicode(); | ||
644 | tchar h = q[2].unicode(); | ||
645 | unsigned char r = q[3].unicode(); | ||
646 | unsigned char g = q[4].unicode(); | ||
647 | unsigned char b = q[5].unicode(); | ||
648 | ch = '#'; | ||
649 | //qDebug("html:hrule<%u, %u>", w, h); | ||
650 | currentstyle.setPicture(false, hRule(w,h,r,g,b)); | ||
651 | q = q.right(q.length()-6); | ||
652 | } | ||
653 | else | ||
654 | { | ||
655 | q = q.right(q.length()-1); | ||
656 | } | ||
657 | sty = currentstyle; | ||
658 | return; | ||
659 | } | ||
660 | do | ||
661 | { | ||
662 | unsigned long npos; | ||
663 | CStyle dummy; | ||
664 | mygetch(ch, dummy, pos); | ||
665 | // if (ch == 10 && !isPre) ch = ' '; | ||
666 | while (ch == '<' && ch != UEOF) | ||
667 | { | ||
668 | ch = skip_ws(); | ||
669 | |||
670 | QString ent = getname(ch, " >").lower(); | ||
671 | |||
672 | // qDebug("Entity:%s", (const char*)ent); | ||
673 | |||
674 | if (ent == "a"/* || ent == "reference"*/) | ||
675 | { | ||
676 | if (ch == ' ') ch = skip_ws(); | ||
677 | bool fileposfound = false; | ||
678 | bool ishref = false; | ||
679 | unsigned int filepos = 0; | ||
680 | QString ref, name; | ||
681 | while (ch != '>' && ch != UEOF) | ||
682 | { | ||
683 | QString ent = getname(ch, " =>").lower(); | ||
684 | QString attr = getattr(ch); | ||
685 | qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); | ||
686 | if (ent == "name") | ||
687 | { | ||
688 | name = attr; | ||
689 | } | ||
690 | if (ent == "onclick") | ||
691 | { | ||
692 | int st = attr.find('\''); | ||
693 | int nd = attr.findRev('\''); | ||
694 | ref = attr.mid(st+1, nd-st-1); | ||
695 | ishref = true; | ||
696 | qDebug("Onclick:%s", (const char*)ref); | ||
697 | } | ||
698 | if (ent == "href") | ||
699 | { | ||
700 | ishref = true; | ||
701 | ref = attr; | ||
702 | } | ||
703 | if (ent == "filepos") | ||
704 | { | ||
705 | filepos = attr.toUInt(&fileposfound); | ||
706 | if (ref.isEmpty()) | ||
707 | { | ||
708 | ishref = true; | ||
709 | ref = attr; | ||
710 | } | ||
711 | } | ||
712 | if (ent == "title") | ||
713 | { | ||
714 | text_q = attr + "</a><p>"; | ||
715 | } | ||
716 | qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); | ||
717 | } | ||
718 | if (ishref) | ||
719 | { | ||
720 | currentstyle.setColour(0,0,255); | ||
721 | currentstyle.setLink(true); | ||
722 | currentstyle.setData(currentid); | ||
723 | if (!text_q.isEmpty()) | ||
724 | { | ||
725 | currentstyle.setBold(); | ||
726 | currentstyle.setCentreJustify(); | ||
727 | } | ||
728 | (*id2href)[currentid] = ref; | ||
729 | currentid++; | ||
730 | |||
731 | |||
732 | if (fileposfound) | ||
733 | { | ||
734 | (*href2filepos)[ref] = filepos; | ||
735 | } | ||
736 | } | ||
737 | if (!name.isEmpty()) | ||
738 | { | ||
739 | (*href2filepos)[name] = pos; | ||
740 | } | ||
741 | } | ||
742 | else if (ent == "p") | ||
743 | { | ||
744 | parse_paragraph(currentstyle, ch, pos); | ||
745 | currentstyle.setExtraSpace(3); | ||
746 | continue; | ||
747 | } | ||
748 | else if (ent == "div") | ||
749 | { | ||
750 | parse_paragraph(currentstyle, ch, pos); | ||
751 | stylestack.push_front(currentstyle); | ||
752 | //indent = 0; | ||
753 | continue; | ||
754 | } | ||
755 | else if (ent == "sup") | ||
756 | { | ||
757 | currentstyle.setVOffset(-1); | ||
758 | } | ||
759 | else if (ent == "sup") | ||
760 | { | ||
761 | currentstyle.setVOffset(1); | ||
762 | } | ||
763 | else if (ent == "/sup" || ent == "/sub") | ||
764 | { | ||
765 | currentstyle.setVOffset(0); | ||
766 | } | ||
767 | else if (ent == "span") | ||
768 | { | ||
769 | if (ch == ' ') ch = skip_ws(); | ||
770 | while (ch != '>' && ch != UEOF) | ||
771 | { | ||
772 | QString ent = getname(ch, " =>").lower(); | ||
773 | QString attr = getattr(ch).lower(); | ||
774 | if (ent == "bgcolor") | ||
775 | { | ||
776 | qDebug("Got background colour:%s", (const char*)attr); | ||
777 | unsigned char r,g,b; | ||
778 | parse_color(attr, r, g, b); | ||
779 | currentstyle.setBackground(r, g, b); | ||
780 | } | ||
781 | if (ent == "color") | ||
782 | { | ||
783 | qDebug("Got foreground colour:%s", (const char*)attr); | ||
784 | unsigned char r,g,b; | ||
785 | parse_color(attr, r, g, b); | ||
786 | currentstyle.setColour(r, g, b); | ||
787 | } | ||
788 | } | ||
789 | stylestack.push_front(currentstyle); | ||
790 | } | ||
791 | else if (ent == "/span") | ||
792 | { | ||
793 | if (ch != '>') ch = skip_ws_end(); | ||
794 | currentstyle.setBackground(255, 255, 255); | ||
795 | currentstyle.setColour(0, 0, 0); | ||
796 | if (!stylestack.isEmpty()) | ||
797 | { | ||
798 | stylestack.pop(); | ||
799 | } | ||
800 | } | ||
801 | else if (ent == "pre") | ||
802 | { | ||
803 | isPre = true; | ||
804 | currentstyle.setMono(); | ||
805 | } | ||
806 | else if (ent == "tt") | ||
807 | { | ||
808 | currentstyle.setMono(); | ||
809 | } | ||
810 | else if (ent == "b" || ent == "strong") | ||
811 | { | ||
812 | currentstyle.setBold(); | ||
813 | } | ||
814 | else if (ent == "u") | ||
815 | { | ||
816 | currentstyle.setUnderline(); | ||
817 | } | ||
818 | else if (ent == "/u") | ||
819 | { | ||
820 | currentstyle.unsetUnderline(); | ||
821 | } | ||
822 | else if (ent == "blockquote") | ||
823 | { | ||
824 | if (ch != '>') ch = skip_ws_end(); | ||
825 | ch = 10; | ||
826 | currentstyle.setLeftMargin(30); | ||
827 | currentstyle.setRightMargin(30); | ||
828 | continue; | ||
829 | } | ||
830 | else if (ent == "br" || ent == "br/") | ||
831 | { | ||
832 | if (ch != '>') ch = skip_ws_end(); | ||
833 | ch = 10; | ||
834 | continue; | ||
835 | } | ||
836 | else if (ent == "mbp:pagebreak") | ||
837 | { | ||
838 | /* | ||
839 | if (ch != '>') ch = skip_ws_end(pos); | ||
840 | q += 10; | ||
841 | q += QChar(UEOF); | ||
842 | ch = 10; | ||
843 | continue; | ||
844 | */ | ||
845 | ch = 6; | ||
846 | // currentstyle.setTop(); | ||
847 | continue; | ||
848 | } | ||
849 | else if (ent == "center") | ||
850 | { | ||
851 | //forcecentre = true; | ||
852 | qDebug("setting centre"); | ||
853 | currentstyle.setCentreJustify(); | ||
854 | ch = 10; | ||
855 | continue; | ||
856 | } | ||
857 | else if (ent == "/center") | ||
858 | { | ||
859 | qDebug("unsetting centre"); | ||
860 | forcecentre = false; | ||
861 | } | ||
862 | else if (ent == "li") | ||
863 | { | ||
864 | if (ch != '>') ch = skip_ws_end(); | ||
865 | ch = 10; | ||
866 | if (m_listtype[indent % m_cmaxdepth] == 1) | ||
867 | { | ||
868 | q.setNum(m_ctr[indent % m_cmaxdepth]++); | ||
869 | } | ||
870 | else | ||
871 | { | ||
872 | q += QChar(8226); | ||
873 | } | ||
874 | q += ' '; | ||
875 | currentstyle.setLeftMargin(6*indent); | ||
876 | qDebug("Setting indent:%d", indent); | ||
877 | continue; | ||
878 | } | ||
879 | else if (ent == "ul") | ||
880 | { | ||
881 | indent++; | ||
882 | m_listtype[indent % m_cmaxdepth] = 0; | ||
883 | } | ||
884 | else if (ent == "/ul") | ||
885 | { | ||
886 | indent--; | ||
887 | } | ||
888 | else if (ent == "ol") | ||
889 | { | ||
890 | indent++; | ||
891 | m_listtype[indent % m_cmaxdepth] = 1; | ||
892 | m_ctr[indent % m_cmaxdepth] = 1; | ||
893 | } | ||
894 | else if (ent == "/ol") | ||
895 | { | ||
896 | indent--; | ||
897 | } | ||
898 | else if (ent == "i") | ||
899 | { | ||
900 | currentstyle.setItalic(); | ||
901 | } | ||
902 | else if (ent == "em") | ||
903 | { | ||
904 | currentstyle.setItalic(); | ||
905 | } | ||
906 | else if (ent == "small") | ||
907 | { | ||
908 | currentstyle.setFontSize(-2); | ||
909 | } | ||
910 | else if (ent == "/small") | ||
911 | { | ||
912 | currentstyle.setFontSize(0); | ||
913 | } | ||
914 | else if (ent == "big") | ||
915 | { | ||
916 | currentstyle.setFontSize(2); | ||
917 | } | ||
918 | else if (ent == "/big") | ||
919 | { | ||
920 | currentstyle.setFontSize(0); | ||
921 | } | ||
922 | else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1) | ||
923 | { | ||
924 | indent = 0; | ||
925 | if (ent[1] == '1') | ||
926 | { | ||
927 | parse_paragraph(currentstyle, ch, pos); | ||
928 | currentstyle.setFontSize(3); | ||
929 | currentstyle.setExtraSpace(8); | ||
930 | currentstyle.setBold(); | ||
931 | // currentstyle.setExtraSpace(10); | ||
932 | continue; | ||
933 | } | ||
934 | else if (ent[1] == '2') | ||
935 | { | ||
936 | parse_paragraph(currentstyle, ch, pos); | ||
937 | currentstyle.setFontSize(2); | ||
938 | currentstyle.setExtraSpace(6); | ||
939 | currentstyle.setBold(); | ||
940 | // currentstyle.setExtraSpace(10); | ||
941 | continue; | ||
942 | } | ||
943 | else if (ent[1] == '3') | ||
944 | { | ||
945 | parse_paragraph(currentstyle, ch, pos); | ||
946 | currentstyle.setFontSize(1); | ||
947 | currentstyle.setExtraSpace(4); | ||
948 | currentstyle.setBold(); | ||
949 | // currentstyle.setExtraSpace(10); | ||
950 | continue; | ||
951 | } | ||
952 | else | ||
953 | { | ||
954 | parse_paragraph(currentstyle, ch, pos); | ||
955 | currentstyle.setExtraSpace(4); | ||
956 | currentstyle.setBold(); | ||
957 | // currentstyle.setExtraSpace(10); | ||
958 | continue; | ||
959 | } | ||
960 | } | ||
961 | |||
962 | |||
963 | else if (ent == "/a") | ||
964 | { | ||
965 | currentstyle.setColour(0,0,0); | ||
966 | currentstyle.setLink(false); | ||
967 | } | ||
968 | else if (ent == "/pre") | ||
969 | { | ||
970 | currentstyle.unsetMono(); | ||
971 | isPre = false; | ||
972 | } | ||
973 | else if (ent == "/tt") | ||
974 | { | ||
975 | currentstyle.unsetMono(); | ||
976 | } | ||
977 | else if (ent == "/b" || ent == "/strong") | ||
978 | { | ||
979 | currentstyle.unsetBold(); | ||
980 | } | ||
981 | else if (ent == "/i") | ||
982 | { | ||
983 | currentstyle.unsetItalic(); | ||
984 | } | ||
985 | else if (ent == "/em") | ||
986 | { | ||
987 | currentstyle.unsetItalic(); | ||
988 | } | ||
989 | else if (ent == "/div") | ||
990 | { | ||
991 | currentstyle.unset(); | ||
992 | if (ch != '>') ch = skip_ws_end(); | ||
993 | ch = 10; | ||
994 | if (!stylestack.isEmpty()) | ||
995 | { | ||
996 | stylestack.pop(); | ||
997 | } | ||
998 | continue; | ||
999 | } | ||
1000 | else if (ent == "tr") | ||
1001 | { | ||
1002 | if (ch != '>') ch = skip_ws_end(); | ||
1003 | ch = 10; | ||
1004 | q += '-'; | ||
1005 | q += QChar(parent->getwidth()); | ||
1006 | q += 2; | ||
1007 | q += '\0'; | ||
1008 | q += '\0'; | ||
1009 | q += '\0'; | ||
1010 | continue; | ||
1011 | } | ||
1012 | else if (ent == "td") | ||
1013 | { | ||
1014 | if (ch != '>') ch = skip_ws_end(); | ||
1015 | ignorespace = false; | ||
1016 | } | ||
1017 | else if (ent == "/td") | ||
1018 | { | ||
1019 | ignorespace = true; | ||
1020 | // parse_paragraph(currentstyle, ch, pos); | ||
1021 | //stylestack.push_front(currentstyle); | ||
1022 | if (ch != '>') ch = skip_ws_end(); | ||
1023 | // ch = '|'; | ||
1024 | //continue; | ||
1025 | ch = 10; | ||
1026 | q += '-'; | ||
1027 | q += QChar(parent->getwidth()); | ||
1028 | q += 1; | ||
1029 | q += '\0'; | ||
1030 | q += '\0'; | ||
1031 | q += '\0'; | ||
1032 | continue; | ||
1033 | } | ||
1034 | /* | ||
1035 | else if (ent == "/td") | ||
1036 | { | ||
1037 | currentstyle.unset(); | ||
1038 | if (ch != '>') ch = skip_ws_end(); | ||
1039 | if (!stylestack.isEmpty()) | ||
1040 | { | ||
1041 | stylestack.pop(); | ||
1042 | } | ||
1043 | // ch = 10; | ||
1044 | continue; | ||
1045 | } | ||
1046 | */ | ||
1047 | else if (ent[0] == '/' && ent.length() == 3 && ent[1] == 'h' && QString("123456789").find(ent[2]) != -1) | ||
1048 | { | ||
1049 | currentstyle.unset(); | ||
1050 | if (ch != '>') ch = skip_ws_end(); | ||
1051 | ch = 10; | ||
1052 | continue; | ||
1053 | } | ||
1054 | |||
1055 | |||
1056 | |||
1057 | else if (ent == "table" || ent == "/table") | ||
1058 | { | ||
1059 | currentstyle.unset(); | ||
1060 | ignorespace = (ent == "table"); | ||
1061 | if (ch == ' ') ch = skip_ws(); | ||
1062 | while (ch != '>' && ch != UEOF) | ||
1063 | { | ||
1064 | QString ent = getname(ch, " =>").lower(); | ||
1065 | QString attr = getattr(ch); | ||
1066 | qDebug("<table>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); | ||
1067 | } | ||
1068 | if (ch != '>') ch = skip_ws_end(); | ||
1069 | |||
1070 | ch = 10; | ||
1071 | q += '-'; | ||
1072 | q += QChar(parent->getwidth()); | ||
1073 | q += 2; | ||
1074 | q += '\0'; | ||
1075 | q += '\0'; | ||
1076 | q += '\0'; | ||
1077 | continue; | ||
1078 | } | ||
1079 | else if (ent == "hr") | ||
1080 | { | ||
1081 | if (ch == ' ') ch = skip_ws(); | ||
1082 | // if (stylestack.isEmpty()) | ||
1083 | // { | ||
1084 | currentstyle.unset(); | ||
1085 | // } | ||
1086 | /* | ||
1087 | else | ||
1088 | { | ||
1089 | qDebug("Using stack style"); | ||
1090 | currentstyle = stylestack.first(); | ||
1091 | } | ||
1092 | */ | ||
1093 | unsigned char red = 0, green = 0, blue = 0; | ||
1094 | while (ch != '>' && ch != UEOF) | ||
1095 | { | ||
1096 | QString ent = getname(ch, " =>").lower(); | ||
1097 | QString attr = getattr(ch); | ||
1098 | if (ent == "color") | ||
1099 | { | ||
1100 | parse_color(attr, red, green, blue); | ||
1101 | } | ||
1102 | qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); | ||
1103 | } | ||
1104 | if (ch != '>') ch = skip_ws_end(); | ||
1105 | ch = 10; | ||
1106 | q += '-'; | ||
1107 | q += QChar(parent->getwidth()); | ||
1108 | q += 3; | ||
1109 | q += red; | ||
1110 | q += green; | ||
1111 | q += blue; | ||
1112 | continue; | ||
1113 | } | ||
1114 | |||
1115 | |||
1116 | |||
1117 | else if (ent == "img") | ||
1118 | { | ||
1119 | if (ch == ' ') ch = skip_ws(); | ||
1120 | while (ch != '>' && ch != UEOF) | ||
1121 | { | ||
1122 | QString ent = getname(ch, " =>").lower(); | ||
1123 | QString attr = getattr(ch); | ||
1124 | qDebug("<img>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); | ||
1125 | if (ent == "src") | ||
1126 | { | ||
1127 | if (m_bchm) | ||
1128 | { | ||
1129 | QImage* img = parent->getPicture(attr); | ||
1130 | if (img != NULL) | ||
1131 | { | ||
1132 | currentstyle.setPicture(true, img); | ||
1133 | } | ||
1134 | } | ||
1135 | else | ||
1136 | { | ||
1137 | QFileInfo f(currentfile); | ||
1138 | QFileInfo f1(f.dir(true), attr); | ||
1139 | QPixmap pm; | ||
1140 | if (pm.load(f1.absFilePath())) | ||
1141 | { | ||
1142 | QImage* img = new QImage(pm.convertToImage()); | ||
1143 | currentstyle.setPicture(true, img); | ||
1144 | } | ||
1145 | } | ||
1146 | } | ||
1147 | if (ent == "recindex") | ||
1148 | { | ||
1149 | bool ok; | ||
1150 | unsigned int picindex = attr.toUInt(&ok); | ||
1151 | qDebug("Looking for image at %u", picindex); | ||
1152 | QImage* img = parent->getPicture(picindex); | ||
1153 | if (img != NULL) | ||
1154 | { | ||
1155 | currentstyle.setPicture(true, img); | ||
1156 | } | ||
1157 | else | ||
1158 | { | ||
1159 | qDebug("No image found"); | ||
1160 | } | ||
1161 | } | ||
1162 | } | ||
1163 | if (ch != '>') ch = skip_ws_end(); | ||
1164 | ch = '#'; | ||
1165 | break; | ||
1166 | } | ||
1167 | else if (ent.left(2) == "dc") | ||
1168 | { | ||
1169 | QString nd("/"); | ||
1170 | skipblock(nd+ent); | ||
1171 | } | ||
1172 | else if (ent == "metadata") | ||
1173 | { | ||
1174 | // skipblock("/metadata"); | ||
1175 | } | ||
1176 | /* | ||
1177 | else if (ent == "metadata") | ||
1178 | { | ||
1179 | currentstyle.setFontSize(-2); | ||
1180 | } | ||
1181 | else if (ent == "/metadata") | ||
1182 | { | ||
1183 | currentstyle.unset(); | ||
1184 | ch = 10; | ||
1185 | continue; | ||
1186 | } | ||
1187 | */ | ||
1188 | else | ||
1189 | { | ||
1190 | if (ent[0] != '/') | ||
1191 | qDebug("Not handling:%s", (const char*)ent); | ||
1192 | } | ||
1193 | |||
1194 | if (ch != '>') ch = skip_ws_end(); | ||
1195 | mygetch(ch, dummy, npos); | ||
1196 | } | ||
1197 | if (ch == '&') | ||
1198 | { | ||
1199 | mygetch(ch, dummy, npos); | ||
1200 | if (ch == '#') | ||
1201 | { | ||
1202 | int id = 0; | ||
1203 | mygetch(ch, dummy, npos); | ||
1204 | while (ch != ';' && ch != UEOF) | ||
1205 | { | ||
1206 | id = 10*id+ch-'0'; | ||
1207 | mygetch(ch, dummy, npos); | ||
1208 | } | ||
1209 | ch = id; | ||
1210 | } | ||
1211 | else | ||
1212 | { | ||
1213 | QString en; | ||
1214 | en += ch; | ||
1215 | mygetch(ch, dummy, npos); | ||
1216 | while (ch != ';' && ch != UEOF) | ||
1217 | { | ||
1218 | en += ch; | ||
1219 | mygetch(ch, dummy, npos); | ||
1220 | } | ||
1221 | if (entmap == NULL) initentmap(); | ||
1222 | #if defined(USEQPE) || defined(_WINDOWS) | ||
1223 | QMap<QString, tchar>::Iterator it = entmap->find(en); | ||
1224 | #else | ||
1225 | QMap<QString, tchar>::iterator it = entmap->find(en); | ||
1226 | #endif | ||
1227 | if (it != entmap->end()) | ||
1228 | { | ||
1229 | ch = *it; | ||
1230 | } | ||
1231 | else | ||
1232 | { | ||
1233 | ch = '.'; | ||
1234 | } | ||
1235 | } | ||
1236 | } | ||
1237 | // sty = (dummy == ucFontBase) ? currentstyle : dummy; | ||
1238 | sty = currentstyle; | ||
1239 | } | ||
1240 | while (!isPre && (lastch == ' ' || lastch == 10 || ignorespace) && ch == ' '); | ||
1241 | // lastch = ch; | ||
1242 | lastch = ch; | ||
1243 | return; | ||
1244 | } | ||
1245 | |||
1246 | extern "C" | ||
1247 | { | ||
1248 | CFilter* newfilter(const QString& s) { return new striphtml(s); } | ||
1249 | } | ||