author | llornkcor <llornkcor> | 2003-07-10 02:40:10 (UTC) |
---|---|---|
committer | llornkcor <llornkcor> | 2003-07-10 02:40:10 (UTC) |
commit | 155d68c1e7d7dc0fed2534ac43d6d77ce2781f55 (patch) (side-by-side diff) | |
tree | e6edaa5a7040fe6c224c3943d1094dcf02e4f74c /qmake/tools/qregexp.cpp | |
parent | 86703e8a5527ef114facd02c005b6b3a7e62e263 (diff) | |
download | opie-155d68c1e7d7dc0fed2534ac43d6d77ce2781f55.zip opie-155d68c1e7d7dc0fed2534ac43d6d77ce2781f55.tar.gz opie-155d68c1e7d7dc0fed2534ac43d6d77ce2781f55.tar.bz2 |
update qmake to 1.05a
-rw-r--r-- | qmake/tools/qregexp.cpp | 70 |
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp index 500efed..0c1f060 100644 --- a/qmake/tools/qregexp.cpp +++ b/qmake/tools/qregexp.cpp @@ -258,25 +258,25 @@ \row \i <b>\\xhhhh</b> \i This matches the Unicode character corresponding to the hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo (i.e., \zero ooo) matches the ASCII/Latin-1 character corresponding to the octal number ooo (between 0 and 0377). \row \i <b>. (dot)</b> \i This matches any character (including newline). \row \i <b>\\d</b> - \i This matches a digit (see QChar::isDigit()). + \i This matches a digit (QChar::isDigit()). \row \i <b>\\D</b> \i This matches a non-digit. \row \i <b>\\s</b> - \i This matches a whitespace (see QChar::isSpace()). + \i This matches a whitespace (QChar::isSpace()). \row \i <b>\\S</b> \i This matches a non-whitespace. \row \i <b>\\w</b> - \i This matches a word character (see QChar::isLetterOrNumber()). + \i This matches a word character (QChar::isLetterOrNumber() or '_'). \row \i <b>\\W</b> \i This matches a non-word character. \row \i <b>\\n</b> \i The n-th \link #capturing-text backreference \endlink, e.g. \1, \2, etc. \endtable \e {Note that the C++ compiler transforms backslashes in strings @@ -542,17 +542,24 @@ Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, ... correspond to cap(1) or capturedTexts()[1], cap(2) or capturedTexts()[2], etc. To substitute a pattern use QString::replace(). Perl's extended \c{/x} syntax is not supported, nor are - regexp comments (?#comment) or directives, e.g. (?i). + directives, e.g. (?i), or regexp comments, e.g. (?#comment). On + the other hand, C++'s rules for literal strings can be used to + achieve the same: + \code + QRegExp mark( "\\b" // word boundary + "[Mm]ark" // the word we want to match + ); + \endcode Both zero-width positive and zero-width negative lookahead assertions (?=pattern) and (?!pattern) are supported with the same syntax as Perl. Perl's lookbehind assertions, "independent" subexpressions and conditional expressions are not supported. Non-capturing parentheses are also supported, with the same (?:pattern) syntax. @@ -672,21 +679,21 @@ QStringList field = QStringList::split( "\t", str ); \endcode Here field[0] is the company, field[1] the web address and so on. To imitate the matching of a shell we can use wildcard mode. \code - QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything - rx.setWildcard( TRUE ); // now it's a valid wildcard regexp - rx.search( "index.html" ); // returns 0 (matched at position 0) - rx.search( "default.htm" ); // returns -1 (no match) - rx.search( "readme.txt" ); // returns -1 (no match) + QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything + rx.setWildcard( TRUE ); // now it's a valid wildcard regexp + rx.exactMatch( "index.html" ); // returns TRUE + rx.exactMatch( "default.htm" ); // returns FALSE + rx.exactMatch( "readme.txt" ); // returns FALSE \endcode Wildcard matching can be convenient because of its simplicity, but any wildcard regexp can be defined using full regexps, e.g. <b>.*\.html$</b>. Notice that we can't match both \c .html and \c .htm files with a wildcard unless we use <b>*.htm*</b> which will also match 'test.html.bak'. A full regexp gives us the precision we need, <b>.*\\.html?$</b>. @@ -710,16 +717,21 @@ const int NumBadChars = 64; #define BadChar( ch ) ( (ch).unicode() % NumBadChars ) const int NoOccurrence = INT_MAX; const int EmptyCapture = INT_MAX; const int InftyLen = INT_MAX; const int InftyRep = 1025; const int EOS = -1; +static bool isWord( QChar ch ) +{ + return ch.isLetterOrNumber() || ch == QChar( '_' ); +} + /* Merges two QMemArrays of ints and puts the result into the first one. */ static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) { int asize = a->size(); int bsize = b.size(); if ( asize == 0 ) { @@ -1675,19 +1687,19 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) if ( mmPos + i != mmLen ) return FALSE; } #ifndef QT_NO_REGEXP_ESCAPE if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { bool before = FALSE; bool after = FALSE; if ( mmPos + i != 0 ) - before = mmIn[mmPos + i - 1].isLetterOrNumber(); + before = isWord( mmIn[mmPos + i - 1] ); if ( mmPos + i != mmLen ) - after = mmIn[mmPos + i].isLetterOrNumber(); + after = isWord( mmIn[mmPos + i] ); if ( (a & Anchor_Word) != 0 && (before == after) ) return FALSE; if ( (a & Anchor_NonWord) != 0 && (before != after) ) return FALSE; } #endif #ifndef QT_NO_REGEXP_LOOKAHEAD bool catchx = TRUE; @@ -2627,17 +2639,24 @@ int QRegExpEngine::getEscape() // see QChar::isSpace() yyCharClass->addCategories( 0x7ffff87f ); yyCharClass->addRange( 0x0000, 0x0008 ); yyCharClass->addRange( 0x000e, 0x001f ); yyCharClass->addRange( 0x007f, 0x009f ); return Tok_CharClass; case 'W': // see QChar::isLetterOrNumber() - yyCharClass->addCategories( 0x7ff07f8f ); + yyCharClass->addCategories( 0x7fe07f8f ); + yyCharClass->addRange( 0x203f, 0x2040 ); + yyCharClass->addSingleton( 0x2040 ); + yyCharClass->addSingleton( 0x30fb ); + yyCharClass->addRange( 0xfe33, 0xfe34 ); + yyCharClass->addRange( 0xfe4d, 0xfe4f ); + yyCharClass->addSingleton( 0xff3f ); + yyCharClass->addSingleton( 0xff65 ); return Tok_CharClass; #endif #ifndef QT_NO_REGEXP_ESCAPE case 'b': return Tok_Word; #endif #ifndef QT_NO_REGEXP_CCLASS case 'd': @@ -2647,16 +2666,17 @@ int QRegExpEngine::getEscape() case 's': // see QChar::isSpace() yyCharClass->addCategories( 0x00000380 ); yyCharClass->addRange( 0x0009, 0x000d ); return Tok_CharClass; case 'w': // see QChar::isLetterOrNumber() yyCharClass->addCategories( 0x000f8070 ); + yyCharClass->addSingleton( 0x005f ); // '_' return Tok_CharClass; #endif #ifndef QT_NO_REGEXP_ESCAPE case 'x': val = 0; for ( i = 0; i < 4; i++ ) { low = QChar( yyCh ).lower(); if ( low >= '0' && low <= '9' ) @@ -3178,37 +3198,39 @@ static QCache<QRegExpEngine> *engineCache = 0; static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; #endif static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) { #ifndef QT_NO_REGEXP_OPTIM if ( engineCache != 0 ) { #ifdef QT_THREAD_SUPPORT - QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); + QMutexLocker locker( qt_global_mutexpool ? + qt_global_mutexpool->get( &engineCache ) : 0 ); #endif QRegExpEngine *eng = engineCache->take( pattern ); if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { delete eng; } else { eng->ref(); return eng; } } #endif return new QRegExpEngine( pattern, caseSensitive ); } static void derefEngine( QRegExpEngine *eng, const QString& pattern ) { - if ( eng != 0 && eng->deref() ) { -#ifndef QT_NO_REGEXP_OPTIM #ifdef QT_THREAD_SUPPORT - QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); + QMutexLocker locker( qt_global_mutexpool ? + qt_global_mutexpool->get( &engineCache ) : 0 ); #endif + if ( eng != 0 && eng->deref() ) { +#ifndef QT_NO_REGEXP_OPTIM if ( engineCache == 0 ) { engineCache = new QCache<QRegExpEngine>; engineCache->setAutoDelete( TRUE ); cleanup_cache.set( &engineCache ); } if ( !pattern.isNull() && engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) return; @@ -3560,23 +3582,16 @@ int QRegExp::match( const QString& str, int index, int *len, { int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero ); if ( len != 0 ) *len = matchedLength(); return pos; } #endif // QT_NO_COMPAT -/*! - \overload - - This convenience function searches with a \c CaretMode of \c - CaretAtZero which is the most common usage. -*/ - int QRegExp::search( const QString& str, int offset ) const { return search( str, offset, CaretAtZero ); } /*! Attempts to find a match in \a str from position \a offset (0 by default). If \a offset is -1, the search starts at the last @@ -3620,23 +3635,16 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const priv->capturedCache.clear(); #endif priv->captured = eng->match( str, offset, priv->min, FALSE, caretIndex(offset, caretMode) ); return priv->captured[0]; } -/*! - \overload - - This convenience function searches with a \c CaretMode of \c - CaretAtZero which is the most common usage. -*/ - int QRegExp::searchRev( const QString& str, int offset ) const { return searchRev( str, offset, CaretAtZero ); } /*! Attempts to find a match backwards in \a str from position \a offset. If \a offset is -1 (the default), the search starts at the @@ -3689,17 +3697,17 @@ int QRegExp::searchRev( const QString& str, int offset, \sa exactMatch() search() searchRev() */ int QRegExp::matchedLength() const { return priv->captured[1]; } #ifndef QT_NO_REGEXP_CAPTURE -/*! +/*! Returns the number of captures contained in the regular expression. */ int QRegExp::numCaptures() const { return eng->numCaptures(); } |