From 73253e93327cf4ef0932de1b4afb56af22a0f37e Mon Sep 17 00:00:00 2001 From: pohly Date: Tue, 24 Aug 2004 20:52:45 +0000 Subject: updated source to opie-reader 0.7g --- (limited to 'noncore/apps/opie-reader/CRegExp.cpp') diff --git a/noncore/apps/opie-reader/CRegExp.cpp b/noncore/apps/opie-reader/CRegExp.cpp new file mode 100644 index 0000000..77dc2dc --- a/dev/null +++ b/noncore/apps/opie-reader/CRegExp.cpp @@ -0,0 +1,599 @@ +#include "CRegExp.h" + +//#include +#include +//#include + + +tchar CRegExpFilt::escapedchar(tchar c) +{ + switch (c) + { + case '\\': + return '\\'; + break; + case '"': + return '\"'; + break; + case 'a': + return '\a'; + break; + case 'b': + return '\b'; + break; + case 'f': + return '\f'; + break; + case 'n': + return '\n'; + break; + case 'r': + return '\r'; + break; + case 't': + return '\t'; + break; + case 'v': + return '\v'; + break; + default: + return c; + break; + } +} + +void CRegExpFilt::regchar(tchar c, bool insens) +{ + if (insens) + { + tchar t = upper(c); + CV[t] = 0; + t = lower(c); + CV[t] = 0; + } + else + { + CV[c] = 0; + } +} + +void CRegExpFilt::prepreprocessing(const QString& pat, bool insens) +{ + for (unsigned int p = 0; p < pat.length(); p++) + { +#ifdef _WINDOWS + switch (pat.at(p).unicode()) +#else + switch (pat[p].unicode()) +#endif + { + case '{': + { + break; + } + case '}': + { + break; + } + case '^': + { + break; + } + case '.' : + { + break; + } + case '#': + { + p++; +#ifdef _WINDOWS + while ('0' <= pat.at(p).unicode() && pat.at(p).unicode() <= '9') +#else + while ('0' <= pat[p].unicode() && pat[p].unicode() <= '9') +#endif + { + } + p--; + break; + } + case '\\' : + { +#ifdef _WINDOWS + tchar c = escapedchar(pat.at(++p).unicode()); +#else + tchar c = escapedchar(pat[++p].unicode()); +#endif + regchar(c, insens); + break; + } + + case '[' : + { + tchar clast; + bool invert = false; + tchar c; +#ifdef _WINDOWS + if (pat.at(p+1).unicode() == '^') +#else + if (pat[p+1].unicode() == '^') +#endif + { + p++; + invert = true; + } +#ifdef _WINDOWS + while ((c = pat.at(++p).unicode()) != ']') +#else + while ((c = pat[++p].unicode()) != ']') +#endif + { + if (c == '\\') + { +#ifdef _WINDOWS + c = escapedchar(pat.at(++p).unicode()); +#else + c = escapedchar(pat[++p].unicode()); +#endif + if (c == ']') break; + } + if (c == '-') + { +#ifdef _WINDOWS + c = pat.at(++p).unicode(); +#else + c = pat[++p].unicode(); +#endif + for (tchar j = clast; j <= c; j++) + { + regchar(j, insens); + } + } + else + { + regchar(c, insens); + } + clast = c; + } + break; + } + default : + { +#ifdef _WINDOWS + regchar(pat.at(p).unicode(), insens); +#else + regchar(pat[p].unicode(), insens); +#endif + break; + } + } + } + /* + for (iter i = CV.begin(); i != CV.end(); ++i) + { + printf("Pre: [%u]\n", i.first()); + } + */ + CV[0] = 0; +} + +unsigned int CRegExpFilt::preprocessing(const QString& pat, bool insens) +{ + prepreprocessing(pat, insens); + qDebug("PrePreProcessing done"); + unsigned int p, m; + bool inkeep = false; + keep = 0; + replace = 0; + for (unsigned int j = 0; j < WORD_SIZE; j++) + { + bit[j] = (1 << (WORD_SIZE -j -1)); + lfcnt[j] = 0; + } + + for (p = 0, m = 0; p < pat.length(); p++) + { + qDebug("m is %u", m); + if (inkeep) keep |= bit[m]; +#ifdef _WINDOWS + switch (pat.at(p).unicode()) +#else + switch (pat[p].unicode()) +#endif + { + case '{': + { + inkeep = true; + break; + } + case '}': + { + keep ^= bit[m]; + inkeep = false; + break; + } + case '^': + { + replace |= bit[m]; + lfcnt[m]++; + break; + } + case '.' : + { + for (iter j = CV.begin(); j != CV.end(); ++j) CV[j.first()] |= bit[m]; + m++; + break; + } + case '#': + { + if (m > 0) + { + p++; + int count = 0; +#ifdef _WINDOWS + while ('0' <= pat.at(p).unicode() && pat.at(p).unicode() <= '9') +#else + while ('0' <= pat[p].unicode() && pat[p].unicode() <= '9') +#endif + { +#ifdef _WINDOWS + count = 10*count + pat.at(p++).unicode() - '0'; +#else + count = 10*count + pat[p++].unicode() - '0'; +#endif + } + p--; + count = count-1; + unsigned int mask = 0; + for (unsigned int i = m; i < m+count; i++) + { + mask |= bit[i]; + } + + for (iter it = CV.begin(); it != CV.end(); ++it) + { + if (CV[it.first()] & bit[m-1]) + { + CV[it.first()] |= mask; + } + } + if (keep & bit[m-1]) keep |= mask; + m += count; + } + else + { + p++; + } + break; + } + case '\\' : + { +#ifdef _WINDOWS + tchar c = escapedchar(pat.at(++p).unicode()); +#else + tchar c = escapedchar(pat[++p].unicode()); +#endif + if (insens) + { + CV[upper(c)] |= bit[m]; + CV[lower(c)] |= bit[m]; + } + else + { + CV[c] |= bit[m]; + } + m++; + break; + } + + case '[' : + { + tchar c, clast; + bool invert = false; +#ifdef _WINDOWS + if (pat.at(p+1).unicode() == '^') +#else + if (pat[p+1].unicode() == '^') +#endif + { + p++; + invert = true; + } +#ifdef _WINDOWS + while ((c = pat.at(++p).unicode()) != ']') +#else + while ((c = pat[++p].unicode()) != ']') +#endif + { + if (c == '\\') + { +#ifdef _WINDOWS + c = escapedchar(pat.at(++p).unicode()); +#else + c = escapedchar(pat[++p].unicode()); +#endif + if (c == ']') break; + } + if (c == '-') + { +#ifdef _WINDOWS + c = pat.at(++p).unicode(); +#else + c = pat[++p].unicode(); +#endif + for (tchar j = clast; j <= c; j++) + { + if (insens) + { + iter it; + if ((it = CV.find(upper(j))) != CV.end()) + CV[it] |= bit[m]; + else + CV[0] |= bit[m]; + if ((it = CV.find(lower(j))) != CV.end()) + CV[it] |= bit[m]; + else + CV[0] |= bit[m]; + } + else + { + iter it; + if ((it = CV.find(j)) != CV.end()) + CV[it] |= bit[m]; + else + { + CV[0] |= bit[m]; + } + } + } + } + else + { + if (insens) + { + iter it; + if ((it = CV.find(upper(c))) != CV.end()) + CV[it] |= bit[m]; + else + CV[0] |= bit[m]; + if ((it = CV.find(lower(c))) != CV.end()) + CV[it] |= bit[m]; + else + CV[0] |= bit[m]; + } + else + { + iter it; + if ((it = CV.find(c)) != CV.end()) + CV[it] |= bit[m]; + else + CV[0] |= bit[m]; + } + } + clast = c; + } + if (invert) + { + for (iter i = CV.begin(); i != CV.end(); ++i) + { + CV[i.first()] ^= bit[m]; + } + } + m++; + break; + } + default : + { +#ifdef _WINDOWS + tchar c = pat.at(p).unicode(); +#else + tchar c = pat[p].unicode(); +#endif + if (insens) + { + CV[upper(c)] |= bit[m]; + CV[lower(c)] |= bit[m]; + } + else CV[c] |= bit[m]; + m++; + break; + } + } + } + qDebug("Returning:%u",m); + return m; +} + +bool CRegExpFilt::empty() +{ + return m_outQueue.empty(); +} + +tchar CRegExpFilt::pop() +{ + return m_outQueue.pop(); +} + +bool CRegExpFilt::addch(tchar ch) +{ + word[cur] = ch; + cur = (cur+1)%patlength; + if (len < patlength) len++; + + unsigned int cv = 0; + iter it; + if ((it = CV.find(ch)) == CV.end()) + { + cv = CV[0]; + } + else + { + cv = CV[it]; + } + + R = ((R >> 1) | bit_0) & cv; /* Exact matches */ + if (R & endpos) + { + for (unsigned int i = 0; i < patlength; i++) + { + if (replace & bit[i]) + { + for (unsigned int j = 0; j < lfcnt[i]; j++) + { + m_outQueue.push(10); + } + } + if (keep & bit[i]) + { + m_outQueue.push(word[(cur+i)%patlength]); + // putchar('*'); + // putchar(i + '0'); + } + len = 0; + } + return true; + } + else + { + if (len == patlength) + { + tchar ch = word[cur]; + if (ch == 10) ch = ' '; + m_outQueue.push(ch); + } + return false; + } +} + +void CRegExpFilt::restart() +{ + R = 0; + len = 0; +} + +CRegExpFilt::CRegExpFilt(const QString& pat, bool insensflag) : CV(300) +{ + cur = 0; + patlength = preprocessing(pat, insensflag); + qDebug("Preprocesing done:%u", patlength); + endpos = bit[patlength-1]; + bit_0 = bit[0]; + + restart(); + + qDebug("Pattern: %s:%u", (const char*)pat, patlength); + +} + + +CRegExpFilt::~CRegExpFilt() +{ +} + +#ifdef NOWAYISTHISDEFINED +void reportmatch(tchar *line, /*tchar *text,*/ unsigned int mtype, unsigned int lino) +{ + /* + tchar *text = line + strlen(line); + + tchar *ptr = line; + if (mtype == 0) + printf("Exact match at line number %u.\n", lino); + else + printf("%u error match at line number %u.\n", mtype, lino); + while (ptr < text) putchar(*ptr++); + printf("%c[4m^%c[24m%s\n", 27, 27, ptr); + */ +} + + +void usage(void) +{ + printf("Usage: CRegExpFilt [-i] pattern/a file\n"); +} + +int getline(tchar *s,int lim,FILE *f) +{ + int c, i; + + for (i = 0; i < lim-1 && (c = getc(f)) != EOF && c != '\n'; ) + { + s[i++] = (tchar)c; + } + s[i] = '\0'; + return ((c == EOF && i == 0) ? -1 : i); +} + +#define BUF_SIZE 256 + +int main(int argc, char **argv) +{ + unsigned int lino = 0; + unsigned int blino = 0; + bool insens = false; + int len; + tchar line[BUF_SIZE]; + FILE *inf; + + /* Error checking of cmd ln args! */ + if (argc < 3) + {usage(); return 10; } + /* Corresponds to requiring a minimum of 3 matches */ + for (len = 1; len < argc-2; len++) + { + if (argv[len][0] != '-') + {usage(); return 10; } + else switch (argv[len][1]) + { + case 'i' : + { + insens = true; + break; + } + default : + {usage(); return 10;} + } + } + + tchar* pattern = new tchar[strlen(argv[argc-2])+1]; + + for (int i = 0; (pattern[i] = argv[argc-2][i]) != 0; i++); + + + + CRegExpFilt test(pattern, insens); + + delete [] pattern; + + inf = fopen(argv[argc-1], "r"); + if (!inf) + { + printf("file not found\n"); + return 10; + } + + while ((len = getline(line, BUF_SIZE, inf)) >= 0) + { + lino++; + bool ret = false; + + { + tchar *textend = line+len; + tchar *text = line; + while (text < textend) + { + ret |= test.addch(*text++); + } + ret |= test.addch('\n'); + while (!test.empty()) + { + putchar(test.pop()); + } + } + // inswt = test.addch(line, len); + if (ret) reportmatch(line, 0, lino); + } + fclose(inf); + // CloseSTDLIB(); + return 0; +} +#endif -- cgit v0.9.0.2