summaryrefslogtreecommitdiff
path: root/noncore/apps/opie-reader/striphtml.cpp
Unidiff
Diffstat (limited to 'noncore/apps/opie-reader/striphtml.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/apps/opie-reader/striphtml.cpp252
1 files changed, 209 insertions, 43 deletions
diff --git a/noncore/apps/opie-reader/striphtml.cpp b/noncore/apps/opie-reader/striphtml.cpp
index e86402b..c434dbb 100644
--- a/noncore/apps/opie-reader/striphtml.cpp
+++ b/noncore/apps/opie-reader/striphtml.cpp
@@ -9,5 +9,4 @@
9#include "striphtml.h" 9#include "striphtml.h"
10#include "hrule.h" 10#include "hrule.h"
11#include "util.h"
12 11
13#include <qregexp.h> 12#include <qregexp.h>
@@ -91,13 +90,21 @@ void striphtml::skipblock(const QString& _ent)
91} 90}
92 91
93void striphtml::locate(unsigned int n) 92void striphtml::reset()
94{ 93{
95 m_inblock = false; 94 m_inblock = false;
96 text_q = ""; 95 text_q = "";
96 q = "";
97 tablenesteddepth = 0;
97 forcecentre = false; 98 forcecentre = false;
98 ignorespace = false; 99 ignorespace = false;
100 indent = 0;
99 while (!stylestack.isEmpty()) stylestack.pop(); 101 while (!stylestack.isEmpty()) stylestack.pop();
100 currentstyle.unset(); 102 currentstyle.unset();
103}
104
105void striphtml::locate(unsigned int n)
106{
101 qDebug("striphtml:locating:%u", n); 107 qDebug("striphtml:locating:%u", n);
108 reset();
102 parent->locate(n); 109 parent->locate(n);
103} 110}
@@ -234,4 +241,11 @@ bool striphtml::findanchor(const QString& _info)
234 // QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true); 241 // QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true);
235 // QProgressBar dlg(0); 242 // QProgressBar dlg(0);
243 if (parent->findanchor(_info))
244 {
245 reset();
246 return true;
247 }
248 qDebug("Using html find");
249 parent->locate(parent->startSection());
236#if defined(USEQPE) || defined(_WINDOWS) 250#if defined(USEQPE) || defined(_WINDOWS)
237 QString info; 251 QString info;
@@ -296,5 +310,5 @@ bool striphtml::findanchor(const QString& _info)
296} 310}
297 311
298striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false) 312striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false), tablenesteddepth(0)
299{ 313{
300 href2filepos = new QMap<QString, unsigned long>; 314 href2filepos = new QMap<QString, unsigned long>;
@@ -312,5 +326,15 @@ void striphtml::initentmap()
312{ 326{
313 entmap = new QMap<QString, tchar>; 327 entmap = new QMap<QString, tchar>;
314 QString fname(QTReaderUtil::getPluginPath("data")); 328#ifdef USEQPE
329#ifdef OPIE
330 QString fname(getenv("OPIEDIR"));
331#else
332 QString fname(getenv("QTDIR"));
333#endif
334 fname += "/plugins/reader/data";
335#else
336 QString fname(getenv("READERDIR"));
337 fname += "/data";
338#endif
315 QFileInfo fi; 339 QFileInfo fi;
316 fi.setFile(fname, "HTMLentities"); 340 fi.setFile(fname, "HTMLentities");
@@ -413,4 +437,10 @@ QString striphtml::getattr(tchar& ch)
413 ch = skip_ws(); 437 ch = skip_ws();
414 } 438 }
439 else if (ch == '\'')
440 {
441 mygetch(ch, sty, pos);
442 ref = getname(ch, "\'");
443 ch = skip_ws();
444 }
415 else 445 else
416 { 446 {
@@ -479,4 +509,9 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString&
479 if (file.isEmpty()) 509 if (file.isEmpty())
480 { 510 {
511 if (parent->findanchor(name))
512 {
513 reset();
514 return eLink;
515 }
481 fpit = href2filepos->find(name); 516 fpit = href2filepos->find(name);
482 if (fpit != href2filepos->end()) 517 if (fpit != href2filepos->end())
@@ -489,5 +524,4 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString&
489 // nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\""; 524 // nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\"";
490 qDebug("Do a search for:%s", (const char*)name); 525 qDebug("Do a search for:%s", (const char*)name);
491 parent->locate(0);
492 findanchor(name); 526 findanchor(name);
493 return eLink; 527 return eLink;
@@ -499,5 +533,5 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString&
499 if (m_bchm) 533 if (m_bchm)
500 { 534 {
501 w = "/"+file; 535 w = file;
502 nm = name; 536 nm = name;
503 return eFile; 537 return eFile;
@@ -565,5 +599,29 @@ void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos)
565 if (ch == 10 && !isPre) 599 if (ch == 10 && !isPre)
566 { 600 {
601#ifdef REMOVE_LF_BEFORE_ENDTAG
602 parent->getch(ch, sty, pos);
603 if (ch == '<')
604 {
605 parent->getch(ch, sty, pos);
606 if (ch == '/')
607 {
608 ch = '<';
609 text_q += '/';
610 }
611 else
612 {
613 text_q += '<';
614 text_q += ch;
615 ch = ' ';
616 }
617 }
618 else
619 {
620 text_q += ch;
621 ch = ' ';
622 }
623#else
567 ch = ' '; 624 ch = ' ';
625#endif
568 } 626 }
569} 627}
@@ -585,5 +643,4 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p
585 else 643 else
586 { 644 {
587 qDebug("Using stack style");
588 currentstyle = stylestack.first(); 645 currentstyle = stylestack.first();
589 } 646 }
@@ -608,4 +665,8 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p
608 currentstyle.setRightJustify(); 665 currentstyle.setRightJustify();
609 } 666 }
667 if (attr == "justify")
668 {
669 currentstyle.setFullJustify();
670 }
610 } 671 }
611 if (ent == "id") 672 if (ent == "id")
@@ -655,4 +716,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
655 } 716 }
656 sty = currentstyle; 717 sty = currentstyle;
718 lastch = ch;
657 return; 719 return;
658 } 720 }
@@ -662,9 +724,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
662 CStyle dummy; 724 CStyle dummy;
663 mygetch(ch, dummy, pos); 725 mygetch(ch, dummy, pos);
664 // if (ch == 10 && !isPre) ch = ' ';
665 while (ch == '<' && ch != UEOF) 726 while (ch == '<' && ch != UEOF)
666 { 727 {
667 ch = skip_ws(); 728 ch = skip_ws();
668
669 QString ent = getname(ch, " >").lower(); 729 QString ent = getname(ch, " >").lower();
670 730
@@ -682,5 +742,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
682 QString ent = getname(ch, " =>").lower(); 742 QString ent = getname(ch, " =>").lower();
683 QString attr = getattr(ch); 743 QString attr = getattr(ch);
684 qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); 744 //qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
685 if (ent == "name") 745 if (ent == "name")
686 { 746 {
@@ -713,5 +773,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
713 text_q = attr + "</a><p>"; 773 text_q = attr + "</a><p>";
714 } 774 }
715 qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); 775 //qDebug("<a %s=%s>", (const char*)ent, (const char*)ref);
716 } 776 }
717 if (ishref) 777 if (ishref)
@@ -748,6 +808,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
748 { 808 {
749 parse_paragraph(currentstyle, ch, pos); 809 parse_paragraph(currentstyle, ch, pos);
750 stylestack.push_front(currentstyle); 810 stylestack.push_front(currentstyle);
751 //indent = 0; 811 currentstyle.setExtraSpace(16);
812 //indent = 0;
752 continue; 813 continue;
753 } 814 }
@@ -801,4 +862,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
801 { 862 {
802 isPre = true; 863 isPre = true;
864 currentstyle.setNoJustify();
803 currentstyle.setMono(); 865 currentstyle.setMono();
804 } 866 }
@@ -823,4 +885,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
823 if (ch != '>') ch = skip_ws_end(); 885 if (ch != '>') ch = skip_ws_end();
824 ch = 10; 886 ch = 10;
887 currentstyle.setExtraSpace(0);
825 currentstyle.setLeftMargin(30); 888 currentstyle.setLeftMargin(30);
826 currentstyle.setRightMargin(30); 889 currentstyle.setRightMargin(30);
@@ -831,4 +894,6 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
831 if (ch != '>') ch = skip_ws_end(); 894 if (ch != '>') ch = skip_ws_end();
832 ch = 10; 895 ch = 10;
896 currentstyle.setExtraSpace(0);
897 lastch = 0;
833 continue; 898 continue;
834 } 899 }
@@ -862,4 +927,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
862 { 927 {
863 if (ch != '>') ch = skip_ws_end(); 928 if (ch != '>') ch = skip_ws_end();
929 lastch = 0;
864 ch = 10; 930 ch = 10;
865 if (m_listtype[indent % m_cmaxdepth] == 1) 931 if (m_listtype[indent % m_cmaxdepth] == 1)
@@ -919,4 +985,10 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
919 currentstyle.setFontSize(0); 985 currentstyle.setFontSize(0);
920 } 986 }
987 else if (ent[0] == '/' && ent[1] == 'h' && ent.length() == 3 && QString("123456789").find(ent[2]) != -1)
988 {
989 parse_paragraph(currentstyle, ch, pos);
990 currentstyle.setExtraSpace(3);
991 continue;
992 }
921 else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1) 993 else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1)
922 { 994 {
@@ -929,5 +1001,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
929 currentstyle.setBold(); 1001 currentstyle.setBold();
930 // currentstyle.setExtraSpace(10); 1002 // currentstyle.setExtraSpace(10);
931 continue;
932 } 1003 }
933 else if (ent[1] == '2') 1004 else if (ent[1] == '2')
@@ -938,5 +1009,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
938 currentstyle.setBold(); 1009 currentstyle.setBold();
939 // currentstyle.setExtraSpace(10); 1010 // currentstyle.setExtraSpace(10);
940 continue;
941 } 1011 }
942 else if (ent[1] == '3') 1012 else if (ent[1] == '3')
@@ -947,5 +1017,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
947 currentstyle.setBold(); 1017 currentstyle.setBold();
948 // currentstyle.setExtraSpace(10); 1018 // currentstyle.setExtraSpace(10);
949 continue;
950 } 1019 }
951 else 1020 else
@@ -955,6 +1024,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
955 currentstyle.setBold(); 1024 currentstyle.setBold();
956 // currentstyle.setExtraSpace(10); 1025 // currentstyle.setExtraSpace(10);
957 continue;
958 } 1026 }
1027 ch = 10;
1028 continue;
959 } 1029 }
960 1030
@@ -1048,14 +1118,23 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1048 currentstyle.unset(); 1118 currentstyle.unset();
1049 if (ch != '>') ch = skip_ws_end(); 1119 if (ch != '>') ch = skip_ws_end();
1050 ch = 10; 1120 //ch = 10;
1051 continue; 1121 //continue;
1052 } 1122 }
1053
1054
1055
1056 else if (ent == "table" || ent == "/table") 1123 else if (ent == "table" || ent == "/table")
1057 { 1124 {
1058 currentstyle.unset(); 1125 currentstyle.unset();
1059 ignorespace = (ent == "table"); 1126 ignorespace = (ent == "table");
1127 if (ent == "table")
1128 {
1129 if (tablenesteddepth++ == 0) currentstyle.setTable(pos);
1130 }
1131 else
1132 {
1133 if (--tablenesteddepth <= 0)
1134 {
1135 tablenesteddepth = 0;
1136 currentstyle.setTable(0xffffffff);
1137 }
1138 }
1060 if (ch == ' ') ch = skip_ws(); 1139 if (ch == ' ') ch = skip_ws();
1061 while (ch != '>' && ch != UEOF) 1140 while (ch != '>' && ch != UEOF)
@@ -1067,8 +1146,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1067 if (ch != '>') ch = skip_ws_end(); 1146 if (ch != '>') ch = skip_ws_end();
1068 1147
1148 currentstyle.setLeftMargin(6*tablenesteddepth);
1149
1150
1151 lastch = 0; // Anything but 10
1069 ch = 10; 1152 ch = 10;
1070 q += '-'; 1153 q += '-';
1071 q += QChar(parent->getwidth()); 1154 q += QChar(parent->getwidth());
1072 q += 2; 1155 q += 3;
1073 q += '\0'; 1156 q += '\0';
1074 q += '\0'; 1157 q += '\0';
@@ -1078,16 +1161,6 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1078 else if (ent == "hr") 1161 else if (ent == "hr")
1079 { 1162 {
1163 //bool isPageBreak = false;
1080 if (ch == ' ') ch = skip_ws(); 1164 if (ch == ' ') ch = skip_ws();
1081 // if (stylestack.isEmpty())
1082 // {
1083 currentstyle.unset();
1084 // }
1085 /*
1086 else
1087 {
1088 qDebug("Using stack style");
1089 currentstyle = stylestack.first();
1090 }
1091 */
1092 unsigned char red = 0, green = 0, blue = 0; 1165 unsigned char red = 0, green = 0, blue = 0;
1093 while (ch != '>' && ch != UEOF) 1166 while (ch != '>' && ch != UEOF)
@@ -1099,14 +1172,44 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1099 parse_color(attr, red, green, blue); 1172 parse_color(attr, red, green, blue);
1100 } 1173 }
1174 /*
1175 if (ent == "size")
1176 {
1177 if (attr == "0")
1178 {
1179 isPageBreak = true;
1180 }
1181 }
1182 */
1101 qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); 1183 qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr);
1102 } 1184 }
1103 if (ch != '>') ch = skip_ws_end(); 1185 if (ch != '>') ch = skip_ws_end();
1104 ch = 10; 1186 /*
1105 q += '-'; 1187 if (isPageBreak)
1106 q += QChar(parent->getwidth()); 1188 {
1107 q += 3; 1189 ch = UEOF;
1108 q += red; 1190 }
1109 q += green; 1191 else
1110 q += blue; 1192 {
1193 */
1194 // if (stylestack.isEmpty())
1195 // {
1196 currentstyle.unset();
1197 // }
1198 /*
1199 else
1200 {
1201 qDebug("Using stack style");
1202 currentstyle = stylestack.first();
1203 }
1204 */
1205 lastch = 0; //Anything but 10 or ' '
1206 ch = 10;
1207 q += '-';
1208 q += QChar(parent->getwidth());
1209 q += 3;
1210 q += red;
1211 q += green;
1212 q += blue;
1213
1111 continue; 1214 continue;
1112 } 1215 }
@@ -1124,4 +1227,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1124 if (ent == "src") 1227 if (ent == "src")
1125 { 1228 {
1229 /*
1126 if (m_bchm) 1230 if (m_bchm)
1127 { 1231 {
@@ -1132,4 +1236,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1132 } 1236 }
1133 } 1237 }
1238 */
1239
1240
1241 QImage* img = parent->getPicture(attr);
1242 if (img != NULL)
1243 {
1244 currentstyle.setPicture(true, img);
1245 }
1134 else 1246 else
1135 { 1247 {
@@ -1173,4 +1285,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1173 // skipblock("/metadata"); 1285 // skipblock("/metadata");
1174 } 1286 }
1287 else if (ent == "title")
1288 {
1289 skipblock("/title");
1290 }
1291 else if (ent == "head")
1292 {
1293 skipblock("/head");
1294 }
1175 /* 1295 /*
1176 else if (ent == "metadata") 1296 else if (ent == "metadata")
@@ -1192,5 +1312,8 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1192 1312
1193 if (ch != '>') ch = skip_ws_end(); 1313 if (ch != '>') ch = skip_ws_end();
1194 mygetch(ch, dummy, npos); 1314 if (ent[0] == '/')
1315 mygetch(ch, dummy, pos);
1316 else
1317 mygetch(ch, dummy, npos);
1195 } 1318 }
1196 if (ch == '&') 1319 if (ch == '&')
@@ -1235,7 +1358,11 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1235 } 1358 }
1236 // sty = (dummy == ucFontBase) ? currentstyle : dummy; 1359 // sty = (dummy == ucFontBase) ? currentstyle : dummy;
1360 if (lastch == 10 && ch == 10 && sty.getExtraSpace() > currentstyle.getExtraSpace())
1361 {
1362 currentstyle.setExtraSpace(sty.getExtraSpace());
1363 }
1237 sty = currentstyle; 1364 sty = currentstyle;
1238 } 1365 }
1239 while (!isPre && (lastch == ' ' || lastch == 10 || ignorespace) && ch == ' '); 1366 while (!isPre && (((lastch == ' ' || lastch == 10 || ignorespace) && ch == ' ') || ((ch == 10) && (lastch == 10))));
1240 // lastch = ch; 1367 // lastch = ch;
1241 lastch = ch; 1368 lastch = ch;
@@ -1243,4 +1370,43 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos)
1243} 1370}
1244 1371
1372QString striphtml::getTableAsHtml(unsigned long loc)
1373{
1374 qDebug("striphtml::getTableAsHtml");
1375 QString ret;
1376 tchar ch(0);
1377 CStyle sty;
1378 unsigned long pos;
1379 locate(loc);
1380 int endpos(0);
1381 QString endmarker("</table>");
1382 QString startmarker("<table");
1383 int startpos(0);
1384 int depth(0);
1385 while (ch != UEOF)
1386 {
1387 parent->getch(ch, sty, pos);
1388 QChar qc(ch);
1389 ret += qc;
1390 if (qc.lower() == endmarker[endpos])
1391 {
1392 if ((++endpos >= endmarker.length()) && (--depth <= 0)) break;
1393 }
1394 else
1395 {
1396 endpos = 0;
1397 }
1398 if (qc.lower() == startmarker[startpos])
1399 {
1400 if (++startpos >= startmarker.length()) ++depth;
1401 }
1402 else
1403 {
1404 startpos = 0;
1405 }
1406 }
1407 return ret;
1408}
1409
1410
1245extern "C" 1411extern "C"
1246{ 1412{