author | pohly <pohly> | 2005-05-05 14:39:33 (UTC) |
---|---|---|
committer | pohly <pohly> | 2005-05-05 14:39:33 (UTC) |
commit | 39fbfd5eb7e45d73d38e8a2ce9437a3d7e1b8e91 (patch) (side-by-side diff) | |
tree | 96e66fdc18dca4d4ab8611133e072f57dea224b9 /noncore/apps/opie-reader/striphtml.cpp | |
parent | 279fc4fd1986074acbadd3a8e86fcf3968a8dd5c (diff) | |
download | opie-39fbfd5eb7e45d73d38e8a2ce9437a3d7e1b8e91.zip opie-39fbfd5eb7e45d73d38e8a2ce9437a3d7e1b8e91.tar.gz opie-39fbfd5eb7e45d73d38e8a2ce9437a3d7e1b8e91.tar.bz2 |
new opie-reader sources with support for ArriereGo, Reb input and flite output plugins
Diffstat (limited to 'noncore/apps/opie-reader/striphtml.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/apps/opie-reader/striphtml.cpp | 252 |
1 files changed, 209 insertions, 43 deletions
diff --git a/noncore/apps/opie-reader/striphtml.cpp b/noncore/apps/opie-reader/striphtml.cpp index e86402b..c434dbb 100644 --- a/noncore/apps/opie-reader/striphtml.cpp +++ b/noncore/apps/opie-reader/striphtml.cpp @@ -9,5 +9,4 @@ #include "striphtml.h" #include "hrule.h" -#include "util.h" #include <qregexp.h> @@ -91,13 +90,21 @@ void striphtml::skipblock(const QString& _ent) } -void striphtml::locate(unsigned int n) +void striphtml::reset() { m_inblock = false; text_q = ""; + q = ""; + tablenesteddepth = 0; forcecentre = false; ignorespace = false; + indent = 0; while (!stylestack.isEmpty()) stylestack.pop(); currentstyle.unset(); +} + +void striphtml::locate(unsigned int n) +{ qDebug("striphtml:locating:%u", n); + reset(); parent->locate(n); } @@ -234,4 +241,11 @@ bool striphtml::findanchor(const QString& _info) // QProgressDialog dlg("Finding link...", QString::null, 0, NULL, "progress", true); // QProgressBar dlg(0); + if (parent->findanchor(_info)) + { + reset(); + return true; + } + qDebug("Using html find"); + parent->locate(parent->startSection()); #if defined(USEQPE) || defined(_WINDOWS) QString info; @@ -296,5 +310,5 @@ bool striphtml::findanchor(const QString& _info) } -striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false) +striphtml::striphtml(const QString& _s) : entmap(NULL), isPre(false), currentid(0), lastch(0), currentfile(_s), indent(0), forcecentre(false), m_inblock(false), m_bchm(false), ignorespace(false), tablenesteddepth(0) { href2filepos = new QMap<QString, unsigned long>; @@ -312,5 +326,15 @@ void striphtml::initentmap() { entmap = new QMap<QString, tchar>; - QString fname(QTReaderUtil::getPluginPath("data")); +#ifdef USEQPE +#ifdef OPIE + QString fname(getenv("OPIEDIR")); +#else + QString fname(getenv("QTDIR")); +#endif + fname += "/plugins/reader/data"; +#else + QString fname(getenv("READERDIR")); + fname += "/data"; +#endif QFileInfo fi; fi.setFile(fname, "HTMLentities"); @@ -413,4 +437,10 @@ QString striphtml::getattr(tchar& ch) ch = skip_ws(); } + else if (ch == '\'') + { + mygetch(ch, sty, pos); + ref = getname(ch, "\'"); + ch = skip_ws(); + } else { @@ -479,4 +509,9 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& if (file.isEmpty()) { + if (parent->findanchor(name)) + { + reset(); + return eLink; + } fpit = href2filepos->find(name); if (fpit != href2filepos->end()) @@ -489,5 +524,4 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& // nm = QString("<a[^>]*name[ \t]*=[ \t]*\"") + name + "\""; qDebug("Do a search for:%s", (const char*)name); - parent->locate(0); findanchor(name); return eLink; @@ -499,5 +533,5 @@ linkType striphtml::hyperlink(unsigned int n, unsigned int, QString& w, QString& if (m_bchm) { - w = "/"+file; + w = file; nm = name; return eFile; @@ -565,5 +599,29 @@ void striphtml::mygetch(tchar& ch, CStyle& sty, unsigned long& pos) if (ch == 10 && !isPre) { +#ifdef REMOVE_LF_BEFORE_ENDTAG + parent->getch(ch, sty, pos); + if (ch == '<') + { + parent->getch(ch, sty, pos); + if (ch == '/') + { + ch = '<'; + text_q += '/'; + } + else + { + text_q += '<'; + text_q += ch; + ch = ' '; + } + } + else + { + text_q += ch; + ch = ' '; + } +#else ch = ' '; +#endif } } @@ -585,5 +643,4 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p else { - qDebug("Using stack style"); currentstyle = stylestack.first(); } @@ -608,4 +665,8 @@ void striphtml::parse_paragraph(CStyle& currentstyle, tchar& ch, unsigned long p currentstyle.setRightJustify(); } + if (attr == "justify") + { + currentstyle.setFullJustify(); + } } if (ent == "id") @@ -655,4 +716,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } sty = currentstyle; + lastch = ch; return; } @@ -662,9 +724,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) CStyle dummy; mygetch(ch, dummy, pos); - // if (ch == 10 && !isPre) ch = ' '; while (ch == '<' && ch != UEOF) { ch = skip_ws(); - QString ent = getname(ch, " >").lower(); @@ -682,5 +742,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) QString ent = getname(ch, " =>").lower(); QString attr = getattr(ch); - qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); + //qDebug("<A>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); if (ent == "name") { @@ -713,5 +773,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) text_q = attr + "</a><p>"; } - qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); + //qDebug("<a %s=%s>", (const char*)ent, (const char*)ref); } if (ishref) @@ -748,6 +808,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) { parse_paragraph(currentstyle, ch, pos); - stylestack.push_front(currentstyle); - //indent = 0; + stylestack.push_front(currentstyle); + currentstyle.setExtraSpace(16); + //indent = 0; continue; } @@ -801,4 +862,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) { isPre = true; + currentstyle.setNoJustify(); currentstyle.setMono(); } @@ -823,4 +885,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ch != '>') ch = skip_ws_end(); ch = 10; + currentstyle.setExtraSpace(0); currentstyle.setLeftMargin(30); currentstyle.setRightMargin(30); @@ -831,4 +894,6 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ch != '>') ch = skip_ws_end(); ch = 10; + currentstyle.setExtraSpace(0); + lastch = 0; continue; } @@ -862,4 +927,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) { if (ch != '>') ch = skip_ws_end(); + lastch = 0; ch = 10; if (m_listtype[indent % m_cmaxdepth] == 1) @@ -919,4 +985,10 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setFontSize(0); } + else if (ent[0] == '/' && ent[1] == 'h' && ent.length() == 3 && QString("123456789").find(ent[2]) != -1) + { + parse_paragraph(currentstyle, ch, pos); + currentstyle.setExtraSpace(3); + continue; + } else if (ent[0] == 'h' && ent.length() == 2 && QString("123456789").find(ent[1]) != -1) { @@ -929,5 +1001,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else if (ent[1] == '2') @@ -938,5 +1009,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else if (ent[1] == '3') @@ -947,5 +1017,4 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } else @@ -955,6 +1024,7 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.setBold(); // currentstyle.setExtraSpace(10); - continue; } + ch = 10; + continue; } @@ -1048,14 +1118,23 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) currentstyle.unset(); if (ch != '>') ch = skip_ws_end(); - ch = 10; - continue; + //ch = 10; + //continue; } - - - else if (ent == "table" || ent == "/table") { currentstyle.unset(); ignorespace = (ent == "table"); + if (ent == "table") + { + if (tablenesteddepth++ == 0) currentstyle.setTable(pos); + } + else + { + if (--tablenesteddepth <= 0) + { + tablenesteddepth = 0; + currentstyle.setTable(0xffffffff); + } + } if (ch == ' ') ch = skip_ws(); while (ch != '>' && ch != UEOF) @@ -1067,8 +1146,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ch != '>') ch = skip_ws_end(); + currentstyle.setLeftMargin(6*tablenesteddepth); + + + lastch = 0; // Anything but 10 ch = 10; q += '-'; q += QChar(parent->getwidth()); - q += 2; + q += 3; q += '\0'; q += '\0'; @@ -1078,16 +1161,6 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) else if (ent == "hr") { + //bool isPageBreak = false; if (ch == ' ') ch = skip_ws(); - // if (stylestack.isEmpty()) - // { - currentstyle.unset(); - // } - /* - else - { - qDebug("Using stack style"); - currentstyle = stylestack.first(); - } - */ unsigned char red = 0, green = 0, blue = 0; while (ch != '>' && ch != UEOF) @@ -1099,14 +1172,44 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) parse_color(attr, red, green, blue); } + /* + if (ent == "size") + { + if (attr == "0") + { + isPageBreak = true; + } + } + */ qDebug("<hr>Entity:%s Attr:%s", (const char*)ent, (const char*)attr); } if (ch != '>') ch = skip_ws_end(); - ch = 10; - q += '-'; - q += QChar(parent->getwidth()); - q += 3; - q += red; - q += green; - q += blue; + /* + if (isPageBreak) + { + ch = UEOF; + } + else + { + */ + // if (stylestack.isEmpty()) + // { + currentstyle.unset(); + // } + /* + else + { + qDebug("Using stack style"); + currentstyle = stylestack.first(); + } + */ + lastch = 0; //Anything but 10 or ' ' + ch = 10; + q += '-'; + q += QChar(parent->getwidth()); + q += 3; + q += red; + q += green; + q += blue; + continue; } @@ -1124,4 +1227,5 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ent == "src") { + /* if (m_bchm) { @@ -1132,4 +1236,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } } + */ + + + QImage* img = parent->getPicture(attr); + if (img != NULL) + { + currentstyle.setPicture(true, img); + } else { @@ -1173,4 +1285,12 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) // skipblock("/metadata"); } + else if (ent == "title") + { + skipblock("/title"); + } + else if (ent == "head") + { + skipblock("/head"); + } /* else if (ent == "metadata") @@ -1192,5 +1312,8 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) if (ch != '>') ch = skip_ws_end(); - mygetch(ch, dummy, npos); + if (ent[0] == '/') + mygetch(ch, dummy, pos); + else + mygetch(ch, dummy, npos); } if (ch == '&') @@ -1235,7 +1358,11 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } // sty = (dummy == ucFontBase) ? currentstyle : dummy; + if (lastch == 10 && ch == 10 && sty.getExtraSpace() > currentstyle.getExtraSpace()) + { + currentstyle.setExtraSpace(sty.getExtraSpace()); + } sty = currentstyle; } - while (!isPre && (lastch == ' ' || lastch == 10 || ignorespace) && ch == ' '); + while (!isPre && (((lastch == ' ' || lastch == 10 || ignorespace) && ch == ' ') || ((ch == 10) && (lastch == 10)))); // lastch = ch; lastch = ch; @@ -1243,4 +1370,43 @@ void striphtml::getch(tchar& ch, CStyle& sty, unsigned long& pos) } +QString striphtml::getTableAsHtml(unsigned long loc) +{ + qDebug("striphtml::getTableAsHtml"); + QString ret; + tchar ch(0); + CStyle sty; + unsigned long pos; + locate(loc); + int endpos(0); + QString endmarker("</table>"); + QString startmarker("<table"); + int startpos(0); + int depth(0); + while (ch != UEOF) + { + parent->getch(ch, sty, pos); + QChar qc(ch); + ret += qc; + if (qc.lower() == endmarker[endpos]) + { + if ((++endpos >= endmarker.length()) && (--depth <= 0)) break; + } + else + { + endpos = 0; + } + if (qc.lower() == startmarker[startpos]) + { + if (++startpos >= startmarker.length()) ++depth; + } + else + { + startpos = 0; + } + } + return ret; +} + + extern "C" { |