summaryrefslogtreecommitdiff
path: root/noncore/apps/opie-reader/CRegExp.cpp
Unidiff
Diffstat (limited to 'noncore/apps/opie-reader/CRegExp.cpp') (more/less context) (show whitespace changes)
-rw-r--r--noncore/apps/opie-reader/CRegExp.cpp599
1 files changed, 599 insertions, 0 deletions
diff --git a/noncore/apps/opie-reader/CRegExp.cpp b/noncore/apps/opie-reader/CRegExp.cpp
new file mode 100644
index 0000000..77dc2dc
--- a/dev/null
+++ b/noncore/apps/opie-reader/CRegExp.cpp
@@ -0,0 +1,599 @@
1#include "CRegExp.h"
2
3//#include <stdio.h>
4#include <stdlib.h>
5//#include <string.h>
6
7
8tchar CRegExpFilt::escapedchar(tchar c)
9{
10 switch (c)
11 {
12 case '\\':
13 return '\\';
14 break;
15 case '"':
16 return '\"';
17 break;
18 case 'a':
19 return '\a';
20 break;
21 case 'b':
22 return '\b';
23 break;
24 case 'f':
25 return '\f';
26 break;
27 case 'n':
28 return '\n';
29 break;
30 case 'r':
31 return '\r';
32 break;
33 case 't':
34 return '\t';
35 break;
36 case 'v':
37 return '\v';
38 break;
39 default:
40 return c;
41 break;
42 }
43}
44
45void CRegExpFilt::regchar(tchar c, bool insens)
46{
47 if (insens)
48 {
49 tchar t = upper(c);
50 CV[t] = 0;
51 t = lower(c);
52 CV[t] = 0;
53 }
54 else
55 {
56 CV[c] = 0;
57 }
58}
59
60void CRegExpFilt::prepreprocessing(const QString& pat, bool insens)
61{
62 for (unsigned int p = 0; p < pat.length(); p++)
63 {
64#ifdef _WINDOWS
65 switch (pat.at(p).unicode())
66#else
67 switch (pat[p].unicode())
68#endif
69 {
70 case '{':
71 {
72 break;
73 }
74 case '}':
75 {
76 break;
77 }
78 case '^':
79 {
80 break;
81 }
82 case '.' :
83 {
84 break;
85 }
86 case '#':
87 {
88 p++;
89#ifdef _WINDOWS
90 while ('0' <= pat.at(p).unicode() && pat.at(p).unicode() <= '9')
91#else
92 while ('0' <= pat[p].unicode() && pat[p].unicode() <= '9')
93#endif
94 {
95 }
96 p--;
97 break;
98 }
99 case '\\' :
100 {
101#ifdef _WINDOWS
102 tchar c = escapedchar(pat.at(++p).unicode());
103#else
104 tchar c = escapedchar(pat[++p].unicode());
105#endif
106 regchar(c, insens);
107 break;
108 }
109
110 case '[' :
111 {
112 tchar clast;
113 bool invert = false;
114 tchar c;
115#ifdef _WINDOWS
116 if (pat.at(p+1).unicode() == '^')
117#else
118 if (pat[p+1].unicode() == '^')
119#endif
120 {
121 p++;
122 invert = true;
123 }
124#ifdef _WINDOWS
125 while ((c = pat.at(++p).unicode()) != ']')
126#else
127 while ((c = pat[++p].unicode()) != ']')
128#endif
129 {
130 if (c == '\\')
131 {
132#ifdef _WINDOWS
133 c = escapedchar(pat.at(++p).unicode());
134#else
135 c = escapedchar(pat[++p].unicode());
136#endif
137 if (c == ']') break;
138 }
139 if (c == '-')
140 {
141#ifdef _WINDOWS
142 c = pat.at(++p).unicode();
143#else
144 c = pat[++p].unicode();
145#endif
146 for (tchar j = clast; j <= c; j++)
147 {
148 regchar(j, insens);
149 }
150 }
151 else
152 {
153 regchar(c, insens);
154 }
155 clast = c;
156 }
157 break;
158 }
159 default :
160 {
161#ifdef _WINDOWS
162 regchar(pat.at(p).unicode(), insens);
163#else
164 regchar(pat[p].unicode(), insens);
165#endif
166 break;
167 }
168 }
169 }
170 /*
171 for (iter i = CV.begin(); i != CV.end(); ++i)
172 {
173 printf("Pre: [%u]\n", i.first());
174 }
175 */
176 CV[0] = 0;
177}
178
179unsigned int CRegExpFilt::preprocessing(const QString& pat, bool insens)
180{
181 prepreprocessing(pat, insens);
182 qDebug("PrePreProcessing done");
183 unsigned int p, m;
184 bool inkeep = false;
185 keep = 0;
186 replace = 0;
187 for (unsigned int j = 0; j < WORD_SIZE; j++)
188 {
189 bit[j] = (1 << (WORD_SIZE -j -1));
190 lfcnt[j] = 0;
191 }
192
193 for (p = 0, m = 0; p < pat.length(); p++)
194 {
195 qDebug("m is %u", m);
196 if (inkeep) keep |= bit[m];
197#ifdef _WINDOWS
198 switch (pat.at(p).unicode())
199#else
200 switch (pat[p].unicode())
201#endif
202 {
203 case '{':
204 {
205 inkeep = true;
206 break;
207 }
208 case '}':
209 {
210 keep ^= bit[m];
211 inkeep = false;
212 break;
213 }
214 case '^':
215 {
216 replace |= bit[m];
217 lfcnt[m]++;
218 break;
219 }
220 case '.' :
221 {
222 for (iter j = CV.begin(); j != CV.end(); ++j) CV[j.first()] |= bit[m];
223 m++;
224 break;
225 }
226 case '#':
227 {
228 if (m > 0)
229 {
230 p++;
231 int count = 0;
232#ifdef _WINDOWS
233 while ('0' <= pat.at(p).unicode() && pat.at(p).unicode() <= '9')
234#else
235 while ('0' <= pat[p].unicode() && pat[p].unicode() <= '9')
236#endif
237 {
238#ifdef _WINDOWS
239 count = 10*count + pat.at(p++).unicode() - '0';
240#else
241 count = 10*count + pat[p++].unicode() - '0';
242#endif
243 }
244 p--;
245 count = count-1;
246 unsigned int mask = 0;
247 for (unsigned int i = m; i < m+count; i++)
248 {
249 mask |= bit[i];
250 }
251
252 for (iter it = CV.begin(); it != CV.end(); ++it)
253 {
254 if (CV[it.first()] & bit[m-1])
255 {
256 CV[it.first()] |= mask;
257 }
258 }
259 if (keep & bit[m-1]) keep |= mask;
260 m += count;
261 }
262 else
263 {
264 p++;
265 }
266 break;
267 }
268 case '\\' :
269 {
270#ifdef _WINDOWS
271 tchar c = escapedchar(pat.at(++p).unicode());
272#else
273 tchar c = escapedchar(pat[++p].unicode());
274#endif
275 if (insens)
276 {
277 CV[upper(c)] |= bit[m];
278 CV[lower(c)] |= bit[m];
279 }
280 else
281 {
282 CV[c] |= bit[m];
283 }
284 m++;
285 break;
286 }
287
288 case '[' :
289 {
290 tchar c, clast;
291 bool invert = false;
292#ifdef _WINDOWS
293 if (pat.at(p+1).unicode() == '^')
294#else
295 if (pat[p+1].unicode() == '^')
296#endif
297 {
298 p++;
299 invert = true;
300 }
301#ifdef _WINDOWS
302 while ((c = pat.at(++p).unicode()) != ']')
303#else
304 while ((c = pat[++p].unicode()) != ']')
305#endif
306 {
307 if (c == '\\')
308 {
309#ifdef _WINDOWS
310 c = escapedchar(pat.at(++p).unicode());
311#else
312 c = escapedchar(pat[++p].unicode());
313#endif
314 if (c == ']') break;
315 }
316 if (c == '-')
317 {
318#ifdef _WINDOWS
319 c = pat.at(++p).unicode();
320#else
321 c = pat[++p].unicode();
322#endif
323 for (tchar j = clast; j <= c; j++)
324 {
325 if (insens)
326 {
327 iter it;
328 if ((it = CV.find(upper(j))) != CV.end())
329 CV[it] |= bit[m];
330 else
331 CV[0] |= bit[m];
332 if ((it = CV.find(lower(j))) != CV.end())
333 CV[it] |= bit[m];
334 else
335 CV[0] |= bit[m];
336 }
337 else
338 {
339 iter it;
340 if ((it = CV.find(j)) != CV.end())
341 CV[it] |= bit[m];
342 else
343 {
344 CV[0] |= bit[m];
345 }
346 }
347 }
348 }
349 else
350 {
351 if (insens)
352 {
353 iter it;
354 if ((it = CV.find(upper(c))) != CV.end())
355 CV[it] |= bit[m];
356 else
357 CV[0] |= bit[m];
358 if ((it = CV.find(lower(c))) != CV.end())
359 CV[it] |= bit[m];
360 else
361 CV[0] |= bit[m];
362 }
363 else
364 {
365 iter it;
366 if ((it = CV.find(c)) != CV.end())
367 CV[it] |= bit[m];
368 else
369 CV[0] |= bit[m];
370 }
371 }
372 clast = c;
373 }
374 if (invert)
375 {
376 for (iter i = CV.begin(); i != CV.end(); ++i)
377 {
378 CV[i.first()] ^= bit[m];
379 }
380 }
381 m++;
382 break;
383 }
384 default :
385 {
386#ifdef _WINDOWS
387 tchar c = pat.at(p).unicode();
388#else
389 tchar c = pat[p].unicode();
390#endif
391 if (insens)
392 {
393 CV[upper(c)] |= bit[m];
394 CV[lower(c)] |= bit[m];
395 }
396 else CV[c] |= bit[m];
397 m++;
398 break;
399 }
400 }
401 }
402 qDebug("Returning:%u",m);
403 return m;
404}
405
406bool CRegExpFilt::empty()
407{
408 return m_outQueue.empty();
409}
410
411tchar CRegExpFilt::pop()
412{
413 return m_outQueue.pop();
414}
415
416bool CRegExpFilt::addch(tchar ch)
417{
418 word[cur] = ch;
419 cur = (cur+1)%patlength;
420 if (len < patlength) len++;
421
422 unsigned int cv = 0;
423 iter it;
424 if ((it = CV.find(ch)) == CV.end())
425 {
426 cv = CV[0];
427 }
428 else
429 {
430 cv = CV[it];
431 }
432
433 R = ((R >> 1) | bit_0) & cv; /* Exact matches */
434 if (R & endpos)
435 {
436 for (unsigned int i = 0; i < patlength; i++)
437 {
438 if (replace & bit[i])
439 {
440 for (unsigned int j = 0; j < lfcnt[i]; j++)
441 {
442 m_outQueue.push(10);
443 }
444 }
445 if (keep & bit[i])
446 {
447 m_outQueue.push(word[(cur+i)%patlength]);
448 // putchar('*');
449 // putchar(i + '0');
450 }
451 len = 0;
452 }
453 return true;
454 }
455 else
456 {
457 if (len == patlength)
458 {
459 tchar ch = word[cur];
460 if (ch == 10) ch = ' ';
461 m_outQueue.push(ch);
462 }
463 return false;
464 }
465}
466
467void CRegExpFilt::restart()
468{
469 R = 0;
470 len = 0;
471}
472
473CRegExpFilt::CRegExpFilt(const QString& pat, bool insensflag) : CV(300)
474{
475 cur = 0;
476 patlength = preprocessing(pat, insensflag);
477 qDebug("Preprocesing done:%u", patlength);
478 endpos = bit[patlength-1];
479 bit_0 = bit[0];
480
481 restart();
482
483 qDebug("Pattern: %s:%u", (const char*)pat, patlength);
484
485}
486
487
488CRegExpFilt::~CRegExpFilt()
489{
490}
491
492#ifdef NOWAYISTHISDEFINED
493void reportmatch(tchar *line, /*tchar *text,*/ unsigned int mtype, unsigned int lino)
494{
495 /*
496 tchar *text = line + strlen(line);
497
498 tchar *ptr = line;
499 if (mtype == 0)
500 printf("Exact match at line number %u.\n", lino);
501 else
502 printf("%u error match at line number %u.\n", mtype, lino);
503 while (ptr < text) putchar(*ptr++);
504 printf("%c[4m^%c[24m%s\n", 27, 27, ptr);
505 */
506}
507
508
509void usage(void)
510{
511 printf("Usage: CRegExpFilt [-i] pattern/a file\n");
512}
513
514int getline(tchar *s,int lim,FILE *f)
515{
516 int c, i;
517
518 for (i = 0; i < lim-1 && (c = getc(f)) != EOF && c != '\n'; )
519 {
520 s[i++] = (tchar)c;
521 }
522 s[i] = '\0';
523 return ((c == EOF && i == 0) ? -1 : i);
524}
525
526 #define BUF_SIZE256
527
528int main(int argc, char **argv)
529{
530 unsigned int lino = 0;
531 unsigned int blino = 0;
532 bool insens = false;
533 int len;
534 tchar line[BUF_SIZE];
535 FILE *inf;
536
537 /* Error checking of cmd ln args! */
538 if (argc < 3)
539 {usage(); return 10; }
540 /* Corresponds to requiring a minimum of 3 matches */
541 for (len = 1; len < argc-2; len++)
542 {
543 if (argv[len][0] != '-')
544 {usage(); return 10; }
545 else switch (argv[len][1])
546 {
547 case 'i' :
548 {
549 insens = true;
550 break;
551 }
552 default :
553 {usage(); return 10;}
554 }
555 }
556
557 tchar* pattern = new tchar[strlen(argv[argc-2])+1];
558
559 for (int i = 0; (pattern[i] = argv[argc-2][i]) != 0; i++);
560
561
562
563 CRegExpFilt test(pattern, insens);
564
565 delete [] pattern;
566
567 inf = fopen(argv[argc-1], "r");
568 if (!inf)
569 {
570 printf("file not found\n");
571 return 10;
572 }
573
574 while ((len = getline(line, BUF_SIZE, inf)) >= 0)
575 {
576 lino++;
577 bool ret = false;
578
579 {
580 tchar *textend = line+len;
581 tchar *text = line;
582 while (text < textend)
583 {
584 ret |= test.addch(*text++);
585 }
586 ret |= test.addch('\n');
587 while (!test.empty())
588 {
589 putchar(test.pop());
590 }
591 }
592 // inswt = test.addch(line, len);
593 if (ret) reportmatch(line, 0, lino);
594 }
595 fclose(inf);
596 //CloseSTDLIB();
597 return 0;
598}
599#endif