summaryrefslogtreecommitdiff
path: root/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
Unidiff
Diffstat (limited to 'noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp') (more/less context) (show whitespace changes)
-rw-r--r--noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp80
1 files changed, 43 insertions, 37 deletions
diff --git a/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp b/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
index a2c680f..78635b2 100644
--- a/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
+++ b/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
@@ -17,62 +17,68 @@
17** GNU General Public License version 2 as published by the Free Software 17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the 18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file. 19** packaging of this file.
20** 20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition 21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License 22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software. 23** Agreement provided with the Software.
24** 24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27** 27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for 28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements. 29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information. 30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information. 31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32** 32**
33** Contact info@trolltech.com if any conditions of this licensing are 33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you. 34** not clear to you.
35** 35**
36**********************************************************************/ 36**********************************************************************/
37#if QT_VERSION >=300 37#if QT_VERSION >=300
38#error QRegExp3 is now in QT 3 use QRegExp instead 38#error QRegExp3 is now in QT 3 use QRegExp instead
39#endif 39#endif
40 40
41#include "qarray.h"
42#include "qbitarray.h"
43#include "qcache.h"
44#include "qintdict.h"
45#include "qmap.h"
46#if QT_VERSION < 300 41#if QT_VERSION < 300
47#include "./qregexp3.h" 42#include "./qregexp3.h"
48#else 43#else
49#include "qregexp.h" 44#include "qregexp.h"
50#endif 45#endif
51#include "qstring.h"
52#include "qtl.h"
53#include "qvector.h"
54 46
47/* OPIE */
48#include <opie2/odebug.h>
49
50/* QT */
51#include <qarray.h>
52#include <qbitarray.h>
53#include <qcache.h>
54#include <qintdict.h>
55#include <qmap.h>
56#include <qstring.h>
57#include <qtl.h>
58#include <qvector.h>
59
60/* STD */
55#include <limits.h> 61#include <limits.h>
56 62
57/* 63/*
58 WARNING! Be sure to read qregexp.tex before modifying this file. 64 WARNING! Be sure to read qregexp.tex before modifying this file.
59*/ 65*/
60 66
61/*! 67/*!
62 \class QRegExp3 qregexp.h 68 \class QRegExp3 qregexp.h
63 69
64 \brief The QRegExp class provides pattern matching using regular expressions. 70 \brief The QRegExp class provides pattern matching using regular expressions.
65 71
66 \ingroup tools 72 \ingroup tools
67 \ingroup misc 73 \ingroup misc
68 \ingroup shared 74 \ingroup shared
69 75
70 76
71 Regular expressions, "regexps", provide a way to find patterns 77 Regular expressions, "regexps", provide a way to find patterns
72 within text. This is useful in many contexts, for example: 78 within text. This is useful in many contexts, for example:
73 79
74 <ol> 80 <ol>
75 <li>\e Validation. A regexp can be used to check whether a piece of 81 <li>\e Validation. A regexp can be used to check whether a piece of
76 text meets some criteria, e.g. is an integer or contains no 82 text meets some criteria, e.g. is an integer or contains no
77 whitespace. 83 whitespace.
78 <li>\e Searching. Regexps provide a much more powerful means of 84 <li>\e Searching. Regexps provide a much more powerful means of
@@ -1410,92 +1416,92 @@ void QRegExpEngine::heuristicallyChooseHeuristic()
1410 ( goodLateStart - goodEarlyStart ); 1416 ( goodLateStart - goodEarlyStart );
1411 1417
1412 /* 1418 /*
1413 Less magic formula: We pick a couple of characters at random, and check 1419 Less magic formula: We pick a couple of characters at random, and check
1414 whether they are good or bad. 1420 whether they are good or bad.
1415 */ 1421 */
1416 int badCharScore = 0; 1422 int badCharScore = 0;
1417 int step = QMAX( 1, NumBadChars / 32 ); 1423 int step = QMAX( 1, NumBadChars / 32 );
1418 for ( i = 1; i < NumBadChars; i += step ) { 1424 for ( i = 1; i < NumBadChars; i += step ) {
1419 if ( occ1[i] == NoOccurrence ) 1425 if ( occ1[i] == NoOccurrence )
1420 badCharScore += minl; 1426 badCharScore += minl;
1421 else 1427 else
1422 badCharScore += occ1[i]; 1428 badCharScore += occ1[i];
1423 } 1429 }
1424 badCharScore /= minl; 1430 badCharScore /= minl;
1425 1431
1426 useGoodStringHeuristic = ( goodStringScore > badCharScore ); 1432 useGoodStringHeuristic = ( goodStringScore > badCharScore );
1427} 1433}
1428#endif 1434#endif
1429 1435
1430#if defined(QT_DEBUG) 1436#if defined(QT_DEBUG)
1431void QRegExpEngine::dump() const 1437void QRegExpEngine::dump() const
1432{ 1438{
1433 int i, j; 1439 int i, j;
1434 qDebug( "Case %ssensitive engine", cs ? "" : "in" ); 1440 odebug << "Case " << (cs ? "" : "in") << "sensitive engine" << oendl;
1435 qDebug( " States" ); 1441 odebug << " States" << oendl;
1436 for ( i = 0; i < ns; i++ ) { 1442 for ( i = 0; i < ns; i++ ) {
1437 qDebug( " %d%s", i, 1443 odebug << " " << i
1438 i == InitialState ? " (initial)" : 1444 << (i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "") << oendl;
1439 i == FinalState ? " (final)" : "" ); 1445
1440#ifndef QT_NO_REGEXP_CAPTURE 1446#ifndef QT_NO_REGEXP_CAPTURE
1441 qDebug( " in atom %d", s[i]->atom ); 1447 odebug << " in atom " << s[i]->atom << oendl;
1442#endif 1448#endif
1443 int m = s[i]->match; 1449 int m = s[i]->match;
1444 if ( (m & CharClassBit) != 0 ) { 1450 if ( (m & CharClassBit) != 0 ) {
1445 qDebug( " match character class %d", m ^ CharClassBit ); 1451 odebug << " match character class " << (m ^ CharClassBit) << oendl;
1446#ifndef QT_NO_REGEXP_CCLASS 1452#ifndef QT_NO_REGEXP_CCLASS
1447 cl[m ^ CharClassBit]->dump(); 1453 cl[m ^ CharClassBit]->dump();
1448#else 1454#else
1449 qDebug( " negative character class" ); 1455 odebug << " negative character class" << oendl;
1450#endif 1456#endif
1451 } else if ( (m & BackRefBit) != 0 ) { 1457 } else if ( (m & BackRefBit) != 0 ) {
1452 qDebug( " match back-reference %d", m ^ BackRefBit ); 1458 odebug << " match back-reference " << (m ^ BackRefBit) << oendl;
1453 } else if ( m >= 0x20 && m <= 0x7e ) { 1459 } else if ( m >= 0x20 && m <= 0x7e ) {
1454 qDebug( " match 0x%.4x (%c)", m, m ); 1460 odebug << " match " << QString().sprintf( "0x%.4x", m) << " (" << m << ")" << oendl;
1461
1455 } else { 1462 } else {
1456 qDebug( " match 0x%.4x", m ); 1463 odebug << " match " << QString().sprintf( "0x%.4x", m) << oendl;
1457 } 1464 }
1458 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) { 1465 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
1459 int next = s[i]->outs[j]; 1466 int next = s[i]->outs[j];
1460 qDebug( " -> %d", next ); 1467 odebug << " -> " << next << oendl;
1461 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) ) 1468 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
1462 qDebug( " [reenter %d]", (*s[i]->reenter)[next] ); 1469 odebug << " [reenter " << (*s[i]->reenter)[next] << "]" << oendl;
1463 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 ) 1470 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
1464 qDebug( " [anchors 0x%.8x]", (*s[i]->anchors)[next] ); 1471 odebug << " [anchors " << QString().sprintf( "0x%.8x]", (*s[i]->anchors)[next] ) << oendl;
1465 } 1472 }
1466 } 1473 }
1467#ifndef QT_NO_REGEXP_CAPTURE 1474#ifndef QT_NO_REGEXP_CAPTURE
1468 if ( nf > 0 ) { 1475 if ( nf > 0 ) {
1469 qDebug( " Atom Parent Capture" ); 1476 odebug << " Atom Parent Capture" << oendl;
1470 for ( i = 0; i < nf; i++ ) 1477 for ( i = 0; i < nf; i++ )
1471 qDebug( " %6d %6d %6d", i, f[i].parent, f[i].capture ); 1478 odebug << QString().sprintf(" %6d %6d %6d", i, f[i].parent, f[i].capture ) << oendl;
1472 } 1479 }
1473#endif 1480#endif
1474#ifndef QT_NO_REGEXP_ANCHOR_ALT 1481#ifndef QT_NO_REGEXP_ANCHOR_ALT
1475 for ( i = 0; i < (int) aa.size(); i++ ) 1482 for ( i = 0; i < (int) aa.size(); i++ )
1476 qDebug( " Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, 1483 odebug << QString().sprintf(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b ) << oendl;
1477 aa[i].b );
1478#endif 1484#endif
1479} 1485}
1480#endif 1486#endif
1481 1487
1482void QRegExpEngine::setup( bool caseSensitive ) 1488void QRegExpEngine::setup( bool caseSensitive )
1483{ 1489{
1484#ifndef QT_NO_REGEXP_OPTIM 1490#ifndef QT_NO_REGEXP_OPTIM
1485 if ( engCount++ == 0 ) { 1491 if ( engCount++ == 0 ) {
1486 noOccurrences = new QArray<int>( NumBadChars ); 1492 noOccurrences = new QArray<int>( NumBadChars );
1487 firstOccurrenceAtZero = new QArray<int>( NumBadChars ); 1493 firstOccurrenceAtZero = new QArray<int>( NumBadChars );
1488 noOccurrences->fill( NoOccurrence ); 1494 noOccurrences->fill( NoOccurrence );
1489 firstOccurrenceAtZero->fill( 0 ); 1495 firstOccurrenceAtZero->fill( 0 );
1490 } 1496 }
1491#endif 1497#endif
1492 s.setAutoDelete( TRUE ); 1498 s.setAutoDelete( TRUE );
1493 s.resize( 32 ); 1499 s.resize( 32 );
1494 ns = 0; 1500 ns = 0;
1495#ifndef QT_NO_REGEXP_CAPTURE 1501#ifndef QT_NO_REGEXP_CAPTURE
1496 f.resize( 32 ); 1502 f.resize( 32 );
1497 nf = 0; 1503 nf = 0;
1498 cf = -1; 1504 cf = -1;
1499#endif 1505#endif
1500 realncap = 0; 1506 realncap = 0;
1501 ncap = 0; 1507 ncap = 0;
@@ -2175,55 +2181,55 @@ void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
2175 } 2181 }
2176#endif 2182#endif
2177} 2183}
2178 2184
2179bool QRegExpEngine::CharClass::in( QChar ch ) const 2185bool QRegExpEngine::CharClass::in( QChar ch ) const
2180{ 2186{
2181#ifndef QT_NO_REGEXP_OPTIM 2187#ifndef QT_NO_REGEXP_OPTIM
2182 if ( occ1[BadChar(ch)] == NoOccurrence ) 2188 if ( occ1[BadChar(ch)] == NoOccurrence )
2183 return n; 2189 return n;
2184#endif 2190#endif
2185 2191
2186 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 ) 2192 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
2187 return !n; 2193 return !n;
2188 for ( int i = 0; i < (int) r.size(); i++ ) { 2194 for ( int i = 0; i < (int) r.size(); i++ ) {
2189 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to ) 2195 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
2190 return !n; 2196 return !n;
2191 } 2197 }
2192 return n; 2198 return n;
2193} 2199}
2194 2200
2195#if defined(QT_DEBUG) 2201#if defined(QT_DEBUG)
2196void QRegExpEngine::CharClass::dump() const 2202void QRegExpEngine::CharClass::dump() const
2197{ 2203{
2198 int i; 2204 int i;
2199 qDebug( " %stive character class", n ? "nega" : "posi" ); 2205 odebug << " " << (n ? "nega" : "posi") << "tive character class" << oendl;
2200#ifndef QT_NO_REGEXP_CCLASS 2206#ifndef QT_NO_REGEXP_CCLASS
2201 if ( c != 0 ) 2207 if ( c != 0 )
2202 qDebug( " categories 0x%.8x", c ); 2208 odebug << QString().sprintf(" categories 0x%.8x", c ) << oendl;
2203#endif 2209#endif
2204 for ( i = 0; i < (int) r.size(); i++ ) 2210 for ( i = 0; i < (int) r.size(); i++ )
2205 qDebug( " 0x%.4x through 0x%.4x", r[i].from, r[i].to ); 2211 odebug << QString().sprintf(" 0x%.4x through 0x%.4x", r[i].from, r[i].to ) << oendl;
2206} 2212}
2207#endif 2213#endif
2208#endif 2214#endif
2209 2215
2210QRegExpEngine::Box::Box( QRegExpEngine *engine ) 2216QRegExpEngine::Box::Box( QRegExpEngine *engine )
2211 : eng( engine ), skipanchors( 0 ) 2217 : eng( engine ), skipanchors( 0 )
2212#ifndef QT_NO_REGEXP_OPTIM 2218#ifndef QT_NO_REGEXP_OPTIM
2213 , earlyStart( 0 ), lateStart( 0 ), maxl( 0 ), occ1( *noOccurrences ) 2219 , earlyStart( 0 ), lateStart( 0 ), maxl( 0 ), occ1( *noOccurrences )
2214#endif 2220#endif
2215{ 2221{
2216 minl = 0; 2222 minl = 0;
2217} 2223}
2218 2224
2219QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b ) 2225QRegExpEngine::Box& QRegExpEngine::Box::operator=( const Box& b )
2220{ 2226{
2221 eng = b.eng; 2227 eng = b.eng;
2222 ls = b.ls; 2228 ls = b.ls;
2223 rs = b.rs; 2229 rs = b.rs;
2224 lanchors = b.lanchors; 2230 lanchors = b.lanchors;
2225 ranchors = b.ranchors; 2231 ranchors = b.ranchors;
2226 skipanchors = b.skipanchors; 2232 skipanchors = b.skipanchors;
2227#ifndef QT_NO_REGEXP_OPTIM 2233#ifndef QT_NO_REGEXP_OPTIM
2228 earlyStart = b.earlyStart; 2234 earlyStart = b.earlyStart;
2229 lateStart = b.lateStart; 2235 lateStart = b.lateStart;
@@ -2425,64 +2431,64 @@ void QRegExpEngine::Box::setupHeuristics()
2425 2431
2426 /* 2432 /*
2427 A regular expression such as 112|1 has occ1['2'] = 2 and minl = 1 at this 2433 A regular expression such as 112|1 has occ1['2'] = 2 and minl = 1 at this
2428 point. An entry of occ1 has to be at most minl or infinity for the rest 2434 point. An entry of occ1 has to be at most minl or infinity for the rest
2429 of the algorithm to go well. 2435 of the algorithm to go well.
2430 2436
2431 We waited until here before normalizing these cases (instead of doing it 2437 We waited until here before normalizing these cases (instead of doing it
2432 in Box::orx()) because sometimes things improve by themselves; consider 2438 in Box::orx()) because sometimes things improve by themselves; consider
2433 (112|1)34. 2439 (112|1)34.
2434 */ 2440 */
2435 for ( int i = 0; i < NumBadChars; i++ ) { 2441 for ( int i = 0; i < NumBadChars; i++ ) {
2436 if ( occ1[i] != NoOccurrence && occ1[i] >= minl ) 2442 if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
2437 occ1[i] = minl; 2443 occ1[i] = minl;
2438 } 2444 }
2439 eng->setupBadCharHeuristic( minl, occ1 ); 2445 eng->setupBadCharHeuristic( minl, occ1 );
2440 2446
2441 eng->heuristicallyChooseHeuristic(); 2447 eng->heuristicallyChooseHeuristic();
2442} 2448}
2443#endif 2449#endif
2444 2450
2445#if defined(QT_DEBUG) 2451#if defined(QT_DEBUG)
2446void QRegExpEngine::Box::dump() const 2452void QRegExpEngine::Box::dump() const
2447{ 2453{
2448 int i; 2454 int i;
2449 qDebug( "Box of at least %d character%s", minl, minl == 1 ? "" : "s" ); 2455 odebug << "Box of at least " << minl << " character" << (minl == 1 ? "" : "s") << oendl;
2450 qDebug( " Left states:" ); 2456 odebug << " Left states:" << oendl;
2451 for ( i = 0; i < (int) ls.size(); i++ ) { 2457 for ( i = 0; i < (int) ls.size(); i++ ) {
2452 if ( at(lanchors, ls[i]) == 0 ) 2458 if ( at(lanchors, ls[i]) == 0 )
2453 qDebug( " %d", ls[i] ); 2459 odebug << " " << ls[i] << oendl;
2454 else 2460 else
2455 qDebug( " %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]] ); 2461 odebug << " " << ls[i] << QString().sprintf(" [anchors 0x%.8x]", lanchors[ls[i]]) << oendl;
2456 } 2462 }
2457 qDebug( " Right states:" ); 2463 odebug << " Right states:" << oendl;
2458 for ( i = 0; i < (int) rs.size(); i++ ) { 2464 for ( i = 0; i < (int) rs.size(); i++ ) {
2459 if ( at(ranchors, ls[i]) == 0 ) 2465 if ( at(ranchors, ls[i]) == 0 )
2460 qDebug( " %d", rs[i] ); 2466 odebug << " " << rs[i] << oendl;
2461 else 2467 else
2462 qDebug( " %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]] ); 2468 odebug << " " << rs[i] << QString().sprintf(" [anchors 0x%.8x]", ranchors[rs[i]]) << oendl;
2463 } 2469 }
2464 qDebug( " Skip anchors: 0x%.8x", skipanchors ); 2470 odebug << QString().sprintf(" Skip anchors: 0x%.8x", skipanchors) << oendl;
2465} 2471}
2466#endif 2472#endif
2467 2473
2468void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const 2474void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
2469{ 2475{
2470 for ( int i = 0; i < (int) to.ls.size(); i++ ) { 2476 for ( int i = 0; i < (int) to.ls.size(); i++ ) {
2471 for ( int j = 0; j < (int) rs.size(); j++ ) { 2477 for ( int j = 0; j < (int) rs.size(); j++ ) {
2472 int a = eng->anchorConcatenation( at(ranchors, rs[j]), 2478 int a = eng->anchorConcatenation( at(ranchors, rs[j]),
2473 at(to.lanchors, to.ls[i]) ); 2479 at(to.lanchors, to.ls[i]) );
2474 eng->addAnchors( rs[j], to.ls[i], a ); 2480 eng->addAnchors( rs[j], to.ls[i], a );
2475 } 2481 }
2476 } 2482 }
2477} 2483}
2478 2484
2479int QRegExpEngine::getChar() 2485int QRegExpEngine::getChar()
2480{ 2486{
2481 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode(); 2487 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
2482} 2488}
2483 2489
2484int QRegExpEngine::getEscape() 2490int QRegExpEngine::getEscape()
2485{ 2491{
2486#ifndef QT_NO_REGEXP_ESCAPE 2492#ifndef QT_NO_REGEXP_ESCAPE
2487 const char tab[] = "afnrtv"; // no b, as \b means word boundary 2493 const char tab[] = "afnrtv"; // no b, as \b means word boundary
2488 const char backTab[] = "\a\f\n\r\t\v"; 2494 const char backTab[] = "\a\f\n\r\t\v";