Diffstat (limited to 'noncore/apps/opie-reader/striphtml.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/apps/opie-reader/striphtml.cpp | 252 |
1 files changed, 209 insertions, 43 deletions
diff --git a/noncore/apps/opie-reader/striphtml.cpp b/noncore/apps/opie-reader/striphtml.cpp index e86402b..c434dbb 100644 --- a/noncore/apps/opie-reader/striphtml.cpp +++ b/noncore/apps/opie-reader/striphtml.cpp @@ -7,9 +7,8 @@ #endif #include "CDrawBuffer.h" #include "striphtml.h" #include "hrule.h" -#include "util.h" #include <qregexp.h> #include <qimage.h> #include <qpixmap.h> @@ -89,17 +88,25 @@ void striphtml::skipblock(const QString& _ent) qDebug("Skipblock:%s", (const char*)ent); } while (ent != _ent && ch != UEOF); } -void striphtml::locate(unsigned int n) +void striphtml::reset() { m_inblock = false; text_q = ""; + q = ""; + tablenesteddepth = 0; forcecentre = false; ignorespace = false; + indent = 0; while (!stylestack.isEmpty()) stylestack.pop(); currentstyle.unset(); +} + +void striphtml::locate(unsigned int n) +{ qDebug("striphtml:locating:%u", n); + reset(); parent->locate(n); } int striphtml::getpara(CBuffer& buff, unsigned long& startpos) @@ -232,8 +239,15 @@ QString striphtml::dehtml(const QString& _info) bool striphtml::findanchor(const QString& _info) { // QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true); // QProgressBar dlg(0); + if (parent->findanchor(_info)) + { + reset(); + return true; + } + qDebug("Using html find"); + parent->locate(parent->startSection()); #if defined(USEQPE) || defined(_WINDOWS) QString info; for (int i = 0; i < _info.length(); i++) { @@ -294,9 +308,9 @@ bool striphtml::findanchor(const QString& _info) } return ret; } -striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false) +striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false), tablenesteddepth(0) { href2filepos = new QMap<QString, unsigned long>; id2href = new QMap<unsigned long, QString>; } @@ -310,9 +324,19 @@ striphtml::~striphtml() void striphtml::initentmap() { entmap = new QMap<QString, tchar>; - QString fname(QTReaderUtil::getPluginPath("data")); +#ifdef USEQPE +#ifdef OPIE + QString fname(getenv("OPIEDIR")); +#else + QString fname(getenv("QTDIR")); +#endif + fname += "/plugins/reader/data"; +#else + QString fname(getenv("READERDIR")); + fname += "/data"; +#endif QFileInfo fi; fi.setFile(fname, "HTMLentities"); if (fi.exists()) { @@ -411,8 +435,14 @@ QString striphtml::getattr(tchar& ch) mygetch(ch, sty, pos); ref = getname(ch, "\""); ch = skip_ws(); } + else if (ch == '\'') + { + mygetch(ch, sty, pos); + ref = getname(ch, "\'"); + ch = skip_ws(); + } else { ref = getname(ch, " >"); if (ch == ' ') ch = skip_ws(); @@ -477,8 +507,13 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& if (file.isEmpty()) { + if (parent->findanchor(name)) + { + reset(); + return eLink; + } fpit = href2filepos->find(name); if (fpit != href2filepos->end()) { locate(*fpit); @@ -487,9 +522,8 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& else { // nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\""; qDebug("Do a search for:%s", (const char*)name); - parent->locate(0); findanchor(name); return eLink; } } @@ -497,9 +531,9 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& // if (href.find('#') == -1) { if (m_bchm) { - w = "/"+file; + w = file; nm = name; return eFile; } else @@ -563,9 +597,33 @@ void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos) } } if (ch == 10 && !isPre) { +#ifdef REMOVE_LF_BEFORE_ENDTAG + parent->getch(ch, sty, pos); + if (ch == '<') + { + parent->getch(ch, sty, pos); + if (ch == '/') + { + ch = '<'; + text_q += '/'; + } + else + { + text_q += '<'; + text_q += ch; + ch = ' '; + } + } + else + { + text_q += ch; + ch = ' '; + } +#else ch = ' '; +#endif } } void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long pos) @@ -583,9 +641,8 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p currentstyle.unset(); } else { - qDebug("Using stack style"); currentstyle = stylestack.first(); } if (forcecentre) { @@ -606,8 +663,12 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p if (attr == "right") { currentstyle.setRightJustify(); } + if (attr == "justify") + { + currentstyle.setFullJustify(); + } } if (ent == "id") { (*href2filepos)[attr] = pos; @@ -653,20 +714,19 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) { q = q.right(q.length()-1); } sty = currentstyle; + lastch = ch; return; } do { unsigned long npos; CStyle dummy; mygetch(ch, dummy, pos); - // if (ch == 10 && !isPre) ch = ' '; while (ch == '<' && ch != UEOF) { ch = skip_ws(); - QString ent = getname(ch, " >").lower(); // qDebug("Entity:%s", (const char*)ent); @@ -680,9 +740,9 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) while (ch != '>' && ch != UEOF) { QString ent = getname(ch, " =>").lower(); QString attr = getattr(ch); - qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); + //qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); if (ent == "name") { name = attr; } @@ -711,9 +771,9 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ent == "title") { text_q = attr + "</a><p>"; } - qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); + //qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); } if (ishref) { currentstyle.setColour(0,0,255); @@ -746,10 +806,11 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } else if (ent == "div") { parse_paragraph(currentstyle, ch, pos); - stylestack.push_front(currentstyle); - //indent = 0; + stylestack.push_front(currentstyle); + currentstyle.setExtraSpace(16); + //indent = 0; continue; } else if (ent == "sup") { @@ -799,8 +860,9 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } else if (ent == "pre") { isPre = true; + currentstyle.setNoJustify(); currentstyle.setMono(); } else if (ent == "tt") { @@ -821,16 +883,19 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) else if (ent == "blockquote") { if (ch != '>') ch = skip_ws_end(); ch = 10; + currentstyle.setExtraSpace(0); currentstyle.setLeftMargin(30); currentstyle.setRightMargin(30); continue; } else if (ent == "br" || ent == "br/") { if (ch != '>') ch = skip_ws_end(); ch = 10; + currentstyle.setExtraSpace(0); + lastch = 0; continue; } else if (ent == "mbp:pagebreak") { @@ -860,8 +925,9 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } else if (ent == "li") { if (ch != '>') ch = skip_ws_end(); + lastch = 0; ch = 10; if (m_listtype[indent % m_cmaxdepth] == 1) { q.setNum(m_ctr[indent % m_cmaxdepth]++); @@ -917,8 +983,14 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) else if (ent == "/big") { currentstyle.setFontSize(0); } + else if (ent[0] == '/' && ent[1] == 'h' && ent.length() == 3 && QString("123456789").find(ent[2]) != -1) + { + parse_paragraph(currentstyle, ch, pos); + currentstyle.setExtraSpace(3); + continue; + } else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1) { indent = 0; if (ent[1] == '1') @@ -927,36 +999,34 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setFontSize(3); currentstyle.setExtraSpace(8); currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else if (ent[1] == '2') { parse_paragraph(currentstyle, ch, pos); currentstyle.setFontSize(2); currentstyle.setExtraSpace(6); currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else if (ent[1] == '3') { parse_paragraph(currentstyle, ch, pos); currentstyle.setFontSize(1); currentstyle.setExtraSpace(4); currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else { parse_paragraph(currentstyle, ch, pos); currentstyle.setExtraSpace(4); currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } + ch = 10; + continue; } else if (ent == "/a") @@ -1046,18 +1116,27 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) else if (ent[0] == '/' && ent.length() == 3 && ent[1] == 'h' && QString("123456789").find(ent[2]) != -1) { currentstyle.unset(); if (ch != '>') ch = skip_ws_end(); - ch = 10; - continue; + //ch = 10; + //continue; } - - - else if (ent == "table" || ent == "/table") { currentstyle.unset(); ignorespace = (ent == "table"); + if (ent == "table") + { + if (tablenesteddepth++ == 0) currentstyle.setTable(pos); + } + else + { + if (--tablenesteddepth <= 0) + { + tablenesteddepth = 0; + currentstyle.setTable(0xffffffff); + } + } if (ch == ' ') ch = skip_ws(); while (ch != '>' && ch != UEOF) { QString ent = getname(ch, " =>").lower(); @@ -1065,31 +1144,25 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) qDebug("<table>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); } if (ch != '>') ch = skip_ws_end(); + currentstyle.setLeftMargin(6*tablenesteddepth); + + + lastch = 0; // Anything but 10 ch = 10; q += '-'; q += QChar(parent->getwidth()); - q += 2; + q += 3; q += '\0'; q += '\0'; q += '\0'; continue; } else if (ent == "hr") { + //bool isPageBreak = false; if (ch == ' ') ch = skip_ws(); - // if (stylestack.isEmpty()) - // { - currentstyle.unset(); - // } - /* - else - { - qDebug("Using stack style"); - currentstyle = stylestack.first(); - } - */ unsigned char red = 0, green = 0, blue = 0; while (ch != '>' && ch != UEOF) { QString ent = getname(ch, " =>").lower(); @@ -1097,18 +1170,48 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ent == "color") { parse_color(attr, red, green, blue); } + /* + if (ent == "size") + { + if (attr == "0") + { + isPageBreak = true; + } + } + */ qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); } if (ch != '>') ch = skip_ws_end(); - ch = 10; - q += '-'; - q += QChar(parent->getwidth()); - q += 3; - q += red; - q += green; - q += blue; + /* + if (isPageBreak) + { + ch = UEOF; + } + else + { + */ + // if (stylestack.isEmpty()) + // { + currentstyle.unset(); + // } + /* + else + { + qDebug("Using stack style"); + currentstyle = stylestack.first(); + } + */ + lastch = 0; //Anything but 10 or ' ' + ch = 10; + q += '-'; + q += QChar(parent->getwidth()); + q += 3; + q += red; + q += green; + q += blue; + continue; } @@ -1122,16 +1225,25 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) QString attr = getattr(ch); qDebug("<img>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); if (ent == "src") { + /* if (m_bchm) { QImage* img = parent->getPicture(attr); if (img != NULL) { currentstyle.setPicture(true, img); } } + */ + + + QImage* img = parent->getPicture(attr); + if (img != NULL) + { + currentstyle.setPicture(true, img); + } else { QFileInfo f(currentfile); QFileInfo f1(f.dir(true), attr); @@ -1171,8 +1283,16 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) else if (ent == "metadata") { // skipblock("/metadata"); } + else if (ent == "title") + { + skipblock("/title"); + } + else if (ent == "head") + { + skipblock("/head"); + } /* else if (ent == "metadata") { currentstyle.setFontSize(-2); @@ -1190,9 +1310,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) qDebug("Not handling:%s", (const char*)ent); } if (ch != '>') ch = skip_ws_end(); - mygetch(ch, dummy, npos); + if (ent[0] == '/') + mygetch(ch, dummy, pos); + else + mygetch(ch, dummy, npos); } if (ch == '&') { mygetch(ch, dummy, npos); @@ -1233,16 +1356,59 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } } } // sty = (dummy == ucFontBase) ? currentstyle : dummy; + if (lastch == 10 && ch == 10 && sty.getExtraSpace() > currentstyle.getExtraSpace()) + { + currentstyle.setExtraSpace(sty.getExtraSpace()); + } sty = currentstyle; } - while (!isPre && (lastch == ' ' || lastch == 10 || ignorespace) && ch == ' '); + while (!isPre && (((lastch == ' ' || lastch == 10 || ignorespace) && ch == ' ') || ((ch == 10) && (lastch == 10)))); // lastch = ch; lastch = ch; return; } +QString striphtml::getTableAsHtml(unsigned long loc) +{ + qDebug("striphtml::getTableAsHtml"); + QString ret; + tchar ch(0); + CStyle sty; + unsigned long pos; + locate(loc); + int endpos(0); + QString endmarker("</table>"); + QString startmarker("<table"); + int startpos(0); + int depth(0); + while (ch != UEOF) + { + parent->getch(ch, sty, pos); + QChar qc(ch); + ret += qc; + if (qc.lower() == endmarker[endpos]) + { + if ((++endpos >= endmarker.length()) && (--depth <= 0)) break; + } + else + { + endpos = 0; + } + if (qc.lower() == startmarker[startpos]) + { + if (++startpos >= startmarker.length()) ++depth; + } + else + { + startpos = 0; + } + } + return ret; +} + + extern "C" { CFilter* newfilter(const QString& s) { return new striphtml(s); } } |