summaryrefslogtreecommitdiff
path: root/qmake/tools/qregexp.cpp
Unidiff
Diffstat (limited to 'qmake/tools/qregexp.cpp') (more/less context) (show whitespace changes)
-rw-r--r--qmake/tools/qregexp.cpp64
1 files changed, 36 insertions, 28 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp
index 500efed..0c1f060 100644
--- a/qmake/tools/qregexp.cpp
+++ b/qmake/tools/qregexp.cpp
@@ -258,25 +258,25 @@
258 \row \i <b>\\xhhhh</b> 258 \row \i <b>\\xhhhh</b>
259 \i This matches the Unicode character corresponding to the 259 \i This matches the Unicode character corresponding to the
260 hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo 260 hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo
261 (i.e., \zero ooo) matches the ASCII/Latin-1 character 261 (i.e., \zero ooo) matches the ASCII/Latin-1 character
262 corresponding to the octal number ooo (between 0 and 0377). 262 corresponding to the octal number ooo (between 0 and 0377).
263 \row \i <b>. (dot)</b> 263 \row \i <b>. (dot)</b>
264 \i This matches any character (including newline). 264 \i This matches any character (including newline).
265 \row \i <b>\\d</b> 265 \row \i <b>\\d</b>
266 \i This matches a digit (see QChar::isDigit()). 266 \i This matches a digit (QChar::isDigit()).
267 \row \i <b>\\D</b> 267 \row \i <b>\\D</b>
268 \i This matches a non-digit. 268 \i This matches a non-digit.
269 \row \i <b>\\s</b> 269 \row \i <b>\\s</b>
270 \i This matches a whitespace (see QChar::isSpace()). 270 \i This matches a whitespace (QChar::isSpace()).
271 \row \i <b>\\S</b> 271 \row \i <b>\\S</b>
272 \i This matches a non-whitespace. 272 \i This matches a non-whitespace.
273 \row \i <b>\\w</b> 273 \row \i <b>\\w</b>
274 \i This matches a word character (see QChar::isLetterOrNumber()). 274 \i This matches a word character (QChar::isLetterOrNumber() or '_').
275 \row \i <b>\\W</b> 275 \row \i <b>\\W</b>
276 \i This matches a non-word character. 276 \i This matches a non-word character.
277 \row \i <b>\\n</b> 277 \row \i <b>\\n</b>
278 \i The n-th \link #capturing-text backreference \endlink, 278 \i The n-th \link #capturing-text backreference \endlink,
279 e.g. \1, \2, etc. 279 e.g. \1, \2, etc.
280 \endtable 280 \endtable
281 281
282 \e {Note that the C++ compiler transforms backslashes in strings 282 \e {Note that the C++ compiler transforms backslashes in strings
@@ -542,17 +542,24 @@
542 Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp 542 Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
543 equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, 543 equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
544 ... correspond to cap(1) or capturedTexts()[1], cap(2) or 544 ... correspond to cap(1) or capturedTexts()[1], cap(2) or
545 capturedTexts()[2], etc. 545 capturedTexts()[2], etc.
546 546
547 To substitute a pattern use QString::replace(). 547 To substitute a pattern use QString::replace().
548 548
549 Perl's extended \c{/x} syntax is not supported, nor are 549 Perl's extended \c{/x} syntax is not supported, nor are
550 regexp comments (?#comment) or directives, e.g. (?i). 550 directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
551 the other hand, C++'s rules for literal strings can be used to
552 achieve the same:
553 \code
554 QRegExp mark( "\\b" // word boundary
555 "[Mm]ark" // the word we want to match
556 );
557 \endcode
551 558
552 Both zero-width positive and zero-width negative lookahead 559 Both zero-width positive and zero-width negative lookahead
553 assertions (?=pattern) and (?!pattern) are supported with the same 560 assertions (?=pattern) and (?!pattern) are supported with the same
554 syntax as Perl. Perl's lookbehind assertions, "independent" 561 syntax as Perl. Perl's lookbehind assertions, "independent"
555 subexpressions and conditional expressions are not supported. 562 subexpressions and conditional expressions are not supported.
556 563
557 Non-capturing parentheses are also supported, with the same 564 Non-capturing parentheses are also supported, with the same
558 (?:pattern) syntax. 565 (?:pattern) syntax.
@@ -674,19 +681,19 @@
674 681
675 Here field[0] is the company, field[1] the web address and so on. 682 Here field[0] is the company, field[1] the web address and so on.
676 683
677 To imitate the matching of a shell we can use wildcard mode. 684 To imitate the matching of a shell we can use wildcard mode.
678 685
679 \code 686 \code
680 QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything 687 QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
681 rx.setWildcard( TRUE ); // now it's a valid wildcard regexp 688 rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
682 rx.search( "index.html" ); // returns 0 (matched at position 0) 689 rx.exactMatch( "index.html" ); // returns TRUE
683 rx.search( "default.htm" ); // returns -1 (no match) 690 rx.exactMatch( "default.htm" ); // returns FALSE
684 rx.search( "readme.txt" ); // returns -1 (no match) 691 rx.exactMatch( "readme.txt" ); // returns FALSE
685 \endcode 692 \endcode
686 693
687 Wildcard matching can be convenient because of its simplicity, but 694 Wildcard matching can be convenient because of its simplicity, but
688 any wildcard regexp can be defined using full regexps, e.g. 695 any wildcard regexp can be defined using full regexps, e.g.
689 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c 696 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c
690 .htm files with a wildcard unless we use <b>*.htm*</b> which will 697 .htm files with a wildcard unless we use <b>*.htm*</b> which will
691 also match 'test.html.bak'. A full regexp gives us the precision 698 also match 'test.html.bak'. A full regexp gives us the precision
692 we need, <b>.*\\.html?$</b>. 699 we need, <b>.*\\.html?$</b>.
@@ -710,16 +717,21 @@ const int NumBadChars = 64;
710#define BadChar( ch ) ( (ch).unicode() % NumBadChars ) 717#define BadChar( ch ) ( (ch).unicode() % NumBadChars )
711 718
712const int NoOccurrence = INT_MAX; 719const int NoOccurrence = INT_MAX;
713const int EmptyCapture = INT_MAX; 720const int EmptyCapture = INT_MAX;
714const int InftyLen = INT_MAX; 721const int InftyLen = INT_MAX;
715const int InftyRep = 1025; 722const int InftyRep = 1025;
716const int EOS = -1; 723const int EOS = -1;
717 724
725static bool isWord( QChar ch )
726{
727 return ch.isLetterOrNumber() || ch == QChar( '_' );
728}
729
718/* 730/*
719 Merges two QMemArrays of ints and puts the result into the first one. 731 Merges two QMemArrays of ints and puts the result into the first one.
720*/ 732*/
721static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) 733static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
722{ 734{
723 int asize = a->size(); 735 int asize = a->size();
724 int bsize = b.size(); 736 int bsize = b.size();
725 if ( asize == 0 ) { 737 if ( asize == 0 ) {
@@ -1675,19 +1687,19 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
1675 if ( mmPos + i != mmLen ) 1687 if ( mmPos + i != mmLen )
1676 return FALSE; 1688 return FALSE;
1677 } 1689 }
1678#ifndef QT_NO_REGEXP_ESCAPE 1690#ifndef QT_NO_REGEXP_ESCAPE
1679 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { 1691 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
1680 bool before = FALSE; 1692 bool before = FALSE;
1681 bool after = FALSE; 1693 bool after = FALSE;
1682 if ( mmPos + i != 0 ) 1694 if ( mmPos + i != 0 )
1683 before = mmIn[mmPos + i - 1].isLetterOrNumber(); 1695 before = isWord( mmIn[mmPos + i - 1] );
1684 if ( mmPos + i != mmLen ) 1696 if ( mmPos + i != mmLen )
1685 after = mmIn[mmPos + i].isLetterOrNumber(); 1697 after = isWord( mmIn[mmPos + i] );
1686 if ( (a & Anchor_Word) != 0 && (before == after) ) 1698 if ( (a & Anchor_Word) != 0 && (before == after) )
1687 return FALSE; 1699 return FALSE;
1688 if ( (a & Anchor_NonWord) != 0 && (before != after) ) 1700 if ( (a & Anchor_NonWord) != 0 && (before != after) )
1689 return FALSE; 1701 return FALSE;
1690 } 1702 }
1691#endif 1703#endif
1692#ifndef QT_NO_REGEXP_LOOKAHEAD 1704#ifndef QT_NO_REGEXP_LOOKAHEAD
1693 bool catchx = TRUE; 1705 bool catchx = TRUE;
@@ -2627,17 +2639,24 @@ int QRegExpEngine::getEscape()
2627 // see QChar::isSpace() 2639 // see QChar::isSpace()
2628 yyCharClass->addCategories( 0x7ffff87f ); 2640 yyCharClass->addCategories( 0x7ffff87f );
2629 yyCharClass->addRange( 0x0000, 0x0008 ); 2641 yyCharClass->addRange( 0x0000, 0x0008 );
2630 yyCharClass->addRange( 0x000e, 0x001f ); 2642 yyCharClass->addRange( 0x000e, 0x001f );
2631 yyCharClass->addRange( 0x007f, 0x009f ); 2643 yyCharClass->addRange( 0x007f, 0x009f );
2632 return Tok_CharClass; 2644 return Tok_CharClass;
2633 case 'W': 2645 case 'W':
2634 // see QChar::isLetterOrNumber() 2646 // see QChar::isLetterOrNumber()
2635 yyCharClass->addCategories( 0x7ff07f8f ); 2647 yyCharClass->addCategories( 0x7fe07f8f );
2648 yyCharClass->addRange( 0x203f, 0x2040 );
2649 yyCharClass->addSingleton( 0x2040 );
2650 yyCharClass->addSingleton( 0x30fb );
2651 yyCharClass->addRange( 0xfe33, 0xfe34 );
2652 yyCharClass->addRange( 0xfe4d, 0xfe4f );
2653 yyCharClass->addSingleton( 0xff3f );
2654 yyCharClass->addSingleton( 0xff65 );
2636 return Tok_CharClass; 2655 return Tok_CharClass;
2637#endif 2656#endif
2638#ifndef QT_NO_REGEXP_ESCAPE 2657#ifndef QT_NO_REGEXP_ESCAPE
2639 case 'b': 2658 case 'b':
2640 return Tok_Word; 2659 return Tok_Word;
2641#endif 2660#endif
2642#ifndef QT_NO_REGEXP_CCLASS 2661#ifndef QT_NO_REGEXP_CCLASS
2643 case 'd': 2662 case 'd':
@@ -2647,16 +2666,17 @@ int QRegExpEngine::getEscape()
2647 case 's': 2666 case 's':
2648 // see QChar::isSpace() 2667 // see QChar::isSpace()
2649 yyCharClass->addCategories( 0x00000380 ); 2668 yyCharClass->addCategories( 0x00000380 );
2650 yyCharClass->addRange( 0x0009, 0x000d ); 2669 yyCharClass->addRange( 0x0009, 0x000d );
2651 return Tok_CharClass; 2670 return Tok_CharClass;
2652 case 'w': 2671 case 'w':
2653 // see QChar::isLetterOrNumber() 2672 // see QChar::isLetterOrNumber()
2654 yyCharClass->addCategories( 0x000f8070 ); 2673 yyCharClass->addCategories( 0x000f8070 );
2674 yyCharClass->addSingleton( 0x005f ); // '_'
2655 return Tok_CharClass; 2675 return Tok_CharClass;
2656#endif 2676#endif
2657#ifndef QT_NO_REGEXP_ESCAPE 2677#ifndef QT_NO_REGEXP_ESCAPE
2658 case 'x': 2678 case 'x':
2659 val = 0; 2679 val = 0;
2660 for ( i = 0; i < 4; i++ ) { 2680 for ( i = 0; i < 4; i++ ) {
2661 low = QChar( yyCh ).lower(); 2681 low = QChar( yyCh ).lower();
2662 if ( low >= '0' && low <= '9' ) 2682 if ( low >= '0' && low <= '9' )
@@ -3178,37 +3198,39 @@ static QCache<QRegExpEngine> *engineCache = 0;
3178static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; 3198static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
3179#endif 3199#endif
3180 3200
3181static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) 3201static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
3182{ 3202{
3183#ifndef QT_NO_REGEXP_OPTIM 3203#ifndef QT_NO_REGEXP_OPTIM
3184 if ( engineCache != 0 ) { 3204 if ( engineCache != 0 ) {
3185#ifdef QT_THREAD_SUPPORT 3205#ifdef QT_THREAD_SUPPORT
3186 QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); 3206 QMutexLocker locker( qt_global_mutexpool ?
3207 qt_global_mutexpool->get( &engineCache ) : 0 );
3187#endif 3208#endif
3188 QRegExpEngine *eng = engineCache->take( pattern ); 3209 QRegExpEngine *eng = engineCache->take( pattern );
3189 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { 3210 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
3190 delete eng; 3211 delete eng;
3191 } else { 3212 } else {
3192 eng->ref(); 3213 eng->ref();
3193 return eng; 3214 return eng;
3194 } 3215 }
3195 } 3216 }
3196#endif 3217#endif
3197 return new QRegExpEngine( pattern, caseSensitive ); 3218 return new QRegExpEngine( pattern, caseSensitive );
3198} 3219}
3199 3220
3200static void derefEngine( QRegExpEngine *eng, const QString& pattern ) 3221static void derefEngine( QRegExpEngine *eng, const QString& pattern )
3201{ 3222{
3202 if ( eng != 0 && eng->deref() ) {
3203#ifndef QT_NO_REGEXP_OPTIM
3204#ifdef QT_THREAD_SUPPORT 3223#ifdef QT_THREAD_SUPPORT
3205 QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); 3224 QMutexLocker locker( qt_global_mutexpool ?
3225 qt_global_mutexpool->get( &engineCache ) : 0 );
3206#endif 3226#endif
3227 if ( eng != 0 && eng->deref() ) {
3228#ifndef QT_NO_REGEXP_OPTIM
3207 if ( engineCache == 0 ) { 3229 if ( engineCache == 0 ) {
3208 engineCache = new QCache<QRegExpEngine>; 3230 engineCache = new QCache<QRegExpEngine>;
3209 engineCache->setAutoDelete( TRUE ); 3231 engineCache->setAutoDelete( TRUE );
3210 cleanup_cache.set( &engineCache ); 3232 cleanup_cache.set( &engineCache );
3211 } 3233 }
3212 if ( !pattern.isNull() && 3234 if ( !pattern.isNull() &&
3213 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) 3235 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
3214 return; 3236 return;
@@ -3560,23 +3582,16 @@ int QRegExp::match( const QString& str, int index, int *len,
3560{ 3582{
3561 int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero ); 3583 int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
3562 if ( len != 0 ) 3584 if ( len != 0 )
3563 *len = matchedLength(); 3585 *len = matchedLength();
3564 return pos; 3586 return pos;
3565} 3587}
3566#endif // QT_NO_COMPAT 3588#endif // QT_NO_COMPAT
3567 3589
3568/*!
3569 \overload
3570
3571 This convenience function searches with a \c CaretMode of \c
3572 CaretAtZero which is the most common usage.
3573*/
3574
3575int QRegExp::search( const QString& str, int offset ) const 3590int QRegExp::search( const QString& str, int offset ) const
3576{ 3591{
3577 return search( str, offset, CaretAtZero ); 3592 return search( str, offset, CaretAtZero );
3578} 3593}
3579 3594
3580/*! 3595/*!
3581 Attempts to find a match in \a str from position \a offset (0 by 3596 Attempts to find a match in \a str from position \a offset (0 by
3582 default). If \a offset is -1, the search starts at the last 3597 default). If \a offset is -1, the search starts at the last
@@ -3620,23 +3635,16 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
3620 priv->capturedCache.clear(); 3635 priv->capturedCache.clear();
3621#endif 3636#endif
3622 priv->captured = eng->match( str, offset, priv->min, FALSE, 3637 priv->captured = eng->match( str, offset, priv->min, FALSE,
3623 caretIndex(offset, caretMode) ); 3638 caretIndex(offset, caretMode) );
3624 return priv->captured[0]; 3639 return priv->captured[0];
3625} 3640}
3626 3641
3627 3642
3628/*!
3629 \overload
3630
3631 This convenience function searches with a \c CaretMode of \c
3632 CaretAtZero which is the most common usage.
3633*/
3634
3635int QRegExp::searchRev( const QString& str, int offset ) const 3643int QRegExp::searchRev( const QString& str, int offset ) const
3636{ 3644{
3637 return searchRev( str, offset, CaretAtZero ); 3645 return searchRev( str, offset, CaretAtZero );
3638} 3646}
3639 3647
3640/*! 3648/*!
3641 Attempts to find a match backwards in \a str from position \a 3649 Attempts to find a match backwards in \a str from position \a
3642 offset. If \a offset is -1 (the default), the search starts at the 3650 offset. If \a offset is -1 (the default), the search starts at the