-rw-r--r-- | qmake/tools/qregexp.cpp | 70 |
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp index 500efed..0c1f060 100644 --- a/qmake/tools/qregexp.cpp +++ b/qmake/tools/qregexp.cpp | |||
@@ -260,21 +260,21 @@ | |||
260 | hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo | 260 | hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo |
261 | (i.e., \zero ooo) matches the ASCII/Latin-1 character | 261 | (i.e., \zero ooo) matches the ASCII/Latin-1 character |
262 | corresponding to the octal number ooo (between 0 and 0377). | 262 | corresponding to the octal number ooo (between 0 and 0377). |
263 | \row \i <b>. (dot)</b> | 263 | \row \i <b>. (dot)</b> |
264 | \i This matches any character (including newline). | 264 | \i This matches any character (including newline). |
265 | \row \i <b>\\d</b> | 265 | \row \i <b>\\d</b> |
266 | \i This matches a digit (see QChar::isDigit()). | 266 | \i This matches a digit (QChar::isDigit()). |
267 | \row \i <b>\\D</b> | 267 | \row \i <b>\\D</b> |
268 | \i This matches a non-digit. | 268 | \i This matches a non-digit. |
269 | \row \i <b>\\s</b> | 269 | \row \i <b>\\s</b> |
270 | \i This matches a whitespace (see QChar::isSpace()). | 270 | \i This matches a whitespace (QChar::isSpace()). |
271 | \row \i <b>\\S</b> | 271 | \row \i <b>\\S</b> |
272 | \i This matches a non-whitespace. | 272 | \i This matches a non-whitespace. |
273 | \row \i <b>\\w</b> | 273 | \row \i <b>\\w</b> |
274 | \i This matches a word character (see QChar::isLetterOrNumber()). | 274 | \i This matches a word character (QChar::isLetterOrNumber() or '_'). |
275 | \row \i <b>\\W</b> | 275 | \row \i <b>\\W</b> |
276 | \i This matches a non-word character. | 276 | \i This matches a non-word character. |
277 | \row \i <b>\\n</b> | 277 | \row \i <b>\\n</b> |
278 | \i The n-th \link #capturing-text backreference \endlink, | 278 | \i The n-th \link #capturing-text backreference \endlink, |
279 | e.g. \1, \2, etc. | 279 | e.g. \1, \2, etc. |
280 | \endtable | 280 | \endtable |
@@ -544,13 +544,20 @@ | |||
544 | ... correspond to cap(1) or capturedTexts()[1], cap(2) or | 544 | ... correspond to cap(1) or capturedTexts()[1], cap(2) or |
545 | capturedTexts()[2], etc. | 545 | capturedTexts()[2], etc. |
546 | 546 | ||
547 | To substitute a pattern use QString::replace(). | 547 | To substitute a pattern use QString::replace(). |
548 | 548 | ||
549 | Perl's extended \c{/x} syntax is not supported, nor are | 549 | Perl's extended \c{/x} syntax is not supported, nor are |
550 | regexp comments (?#comment) or directives, e.g. (?i). | 550 | directives, e.g. (?i), or regexp comments, e.g. (?#comment). On |
551 | the other hand, C++'s rules for literal strings can be used to | ||
552 | achieve the same: | ||
553 | \code | ||
554 | QRegExp mark( "\\b" // word boundary | ||
555 | "[Mm]ark" // the word we want to match | ||
556 | ); | ||
557 | \endcode | ||
551 | 558 | ||
552 | Both zero-width positive and zero-width negative lookahead | 559 | Both zero-width positive and zero-width negative lookahead |
553 | assertions (?=pattern) and (?!pattern) are supported with the same | 560 | assertions (?=pattern) and (?!pattern) are supported with the same |
554 | syntax as Perl. Perl's lookbehind assertions, "independent" | 561 | syntax as Perl. Perl's lookbehind assertions, "independent" |
555 | subexpressions and conditional expressions are not supported. | 562 | subexpressions and conditional expressions are not supported. |
556 | 563 | ||
@@ -674,17 +681,17 @@ | |||
674 | 681 | ||
675 | Here field[0] is the company, field[1] the web address and so on. | 682 | Here field[0] is the company, field[1] the web address and so on. |
676 | 683 | ||
677 | To imitate the matching of a shell we can use wildcard mode. | 684 | To imitate the matching of a shell we can use wildcard mode. |
678 | 685 | ||
679 | \code | 686 | \code |
680 | QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything | 687 | QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything |
681 | rx.setWildcard( TRUE ); // now it's a valid wildcard regexp | 688 | rx.setWildcard( TRUE ); // now it's a valid wildcard regexp |
682 | rx.search( "index.html" ); // returns 0 (matched at position 0) | 689 | rx.exactMatch( "index.html" ); // returns TRUE |
683 | rx.search( "default.htm" ); // returns -1 (no match) | 690 | rx.exactMatch( "default.htm" ); // returns FALSE |
684 | rx.search( "readme.txt" ); // returns -1 (no match) | 691 | rx.exactMatch( "readme.txt" ); // returns FALSE |
685 | \endcode | 692 | \endcode |
686 | 693 | ||
687 | Wildcard matching can be convenient because of its simplicity, but | 694 | Wildcard matching can be convenient because of its simplicity, but |
688 | any wildcard regexp can be defined using full regexps, e.g. | 695 | any wildcard regexp can be defined using full regexps, e.g. |
689 | <b>.*\.html$</b>. Notice that we can't match both \c .html and \c | 696 | <b>.*\.html$</b>. Notice that we can't match both \c .html and \c |
690 | .htm files with a wildcard unless we use <b>*.htm*</b> which will | 697 | .htm files with a wildcard unless we use <b>*.htm*</b> which will |
@@ -712,12 +719,17 @@ const int NumBadChars = 64; | |||
712 | const int NoOccurrence = INT_MAX; | 719 | const int NoOccurrence = INT_MAX; |
713 | const int EmptyCapture = INT_MAX; | 720 | const int EmptyCapture = INT_MAX; |
714 | const int InftyLen = INT_MAX; | 721 | const int InftyLen = INT_MAX; |
715 | const int InftyRep = 1025; | 722 | const int InftyRep = 1025; |
716 | const int EOS = -1; | 723 | const int EOS = -1; |
717 | 724 | ||
725 | static bool isWord( QChar ch ) | ||
726 | { | ||
727 | return ch.isLetterOrNumber() || ch == QChar( '_' ); | ||
728 | } | ||
729 | |||
718 | /* | 730 | /* |
719 | Merges two QMemArrays of ints and puts the result into the first one. | 731 | Merges two QMemArrays of ints and puts the result into the first one. |
720 | */ | 732 | */ |
721 | static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) | 733 | static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) |
722 | { | 734 | { |
723 | int asize = a->size(); | 735 | int asize = a->size(); |
@@ -1677,15 +1689,15 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) | |||
1677 | } | 1689 | } |
1678 | #ifndef QT_NO_REGEXP_ESCAPE | 1690 | #ifndef QT_NO_REGEXP_ESCAPE |
1679 | if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { | 1691 | if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { |
1680 | bool before = FALSE; | 1692 | bool before = FALSE; |
1681 | bool after = FALSE; | 1693 | bool after = FALSE; |
1682 | if ( mmPos + i != 0 ) | 1694 | if ( mmPos + i != 0 ) |
1683 | before = mmIn[mmPos + i - 1].isLetterOrNumber(); | 1695 | before = isWord( mmIn[mmPos + i - 1] ); |
1684 | if ( mmPos + i != mmLen ) | 1696 | if ( mmPos + i != mmLen ) |
1685 | after = mmIn[mmPos + i].isLetterOrNumber(); | 1697 | after = isWord( mmIn[mmPos + i] ); |
1686 | if ( (a & Anchor_Word) != 0 && (before == after) ) | 1698 | if ( (a & Anchor_Word) != 0 && (before == after) ) |
1687 | return FALSE; | 1699 | return FALSE; |
1688 | if ( (a & Anchor_NonWord) != 0 && (before != after) ) | 1700 | if ( (a & Anchor_NonWord) != 0 && (before != after) ) |
1689 | return FALSE; | 1701 | return FALSE; |
1690 | } | 1702 | } |
1691 | #endif | 1703 | #endif |
@@ -2629,13 +2641,20 @@ int QRegExpEngine::getEscape() | |||
2629 | yyCharClass->addRange( 0x0000, 0x0008 ); | 2641 | yyCharClass->addRange( 0x0000, 0x0008 ); |
2630 | yyCharClass->addRange( 0x000e, 0x001f ); | 2642 | yyCharClass->addRange( 0x000e, 0x001f ); |
2631 | yyCharClass->addRange( 0x007f, 0x009f ); | 2643 | yyCharClass->addRange( 0x007f, 0x009f ); |
2632 | return Tok_CharClass; | 2644 | return Tok_CharClass; |
2633 | case 'W': | 2645 | case 'W': |
2634 | // see QChar::isLetterOrNumber() | 2646 | // see QChar::isLetterOrNumber() |
2635 | yyCharClass->addCategories( 0x7ff07f8f ); | 2647 | yyCharClass->addCategories( 0x7fe07f8f ); |
2648 | yyCharClass->addRange( 0x203f, 0x2040 ); | ||
2649 | yyCharClass->addSingleton( 0x2040 ); | ||
2650 | yyCharClass->addSingleton( 0x30fb ); | ||
2651 | yyCharClass->addRange( 0xfe33, 0xfe34 ); | ||
2652 | yyCharClass->addRange( 0xfe4d, 0xfe4f ); | ||
2653 | yyCharClass->addSingleton( 0xff3f ); | ||
2654 | yyCharClass->addSingleton( 0xff65 ); | ||
2636 | return Tok_CharClass; | 2655 | return Tok_CharClass; |
2637 | #endif | 2656 | #endif |
2638 | #ifndef QT_NO_REGEXP_ESCAPE | 2657 | #ifndef QT_NO_REGEXP_ESCAPE |
2639 | case 'b': | 2658 | case 'b': |
2640 | return Tok_Word; | 2659 | return Tok_Word; |
2641 | #endif | 2660 | #endif |
@@ -2649,12 +2668,13 @@ int QRegExpEngine::getEscape() | |||
2649 | yyCharClass->addCategories( 0x00000380 ); | 2668 | yyCharClass->addCategories( 0x00000380 ); |
2650 | yyCharClass->addRange( 0x0009, 0x000d ); | 2669 | yyCharClass->addRange( 0x0009, 0x000d ); |
2651 | return Tok_CharClass; | 2670 | return Tok_CharClass; |
2652 | case 'w': | 2671 | case 'w': |
2653 | // see QChar::isLetterOrNumber() | 2672 | // see QChar::isLetterOrNumber() |
2654 | yyCharClass->addCategories( 0x000f8070 ); | 2673 | yyCharClass->addCategories( 0x000f8070 ); |
2674 | yyCharClass->addSingleton( 0x005f ); // '_' | ||
2655 | return Tok_CharClass; | 2675 | return Tok_CharClass; |
2656 | #endif | 2676 | #endif |
2657 | #ifndef QT_NO_REGEXP_ESCAPE | 2677 | #ifndef QT_NO_REGEXP_ESCAPE |
2658 | case 'x': | 2678 | case 'x': |
2659 | val = 0; | 2679 | val = 0; |
2660 | for ( i = 0; i < 4; i++ ) { | 2680 | for ( i = 0; i < 4; i++ ) { |
@@ -3180,13 +3200,14 @@ static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; | |||
3180 | 3200 | ||
3181 | static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) | 3201 | static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) |
3182 | { | 3202 | { |
3183 | #ifndef QT_NO_REGEXP_OPTIM | 3203 | #ifndef QT_NO_REGEXP_OPTIM |
3184 | if ( engineCache != 0 ) { | 3204 | if ( engineCache != 0 ) { |
3185 | #ifdef QT_THREAD_SUPPORT | 3205 | #ifdef QT_THREAD_SUPPORT |
3186 | QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); | 3206 | QMutexLocker locker( qt_global_mutexpool ? |
3207 | qt_global_mutexpool->get( &engineCache ) : 0 ); | ||
3187 | #endif | 3208 | #endif |
3188 | QRegExpEngine *eng = engineCache->take( pattern ); | 3209 | QRegExpEngine *eng = engineCache->take( pattern ); |
3189 | if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { | 3210 | if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { |
3190 | delete eng; | 3211 | delete eng; |
3191 | } else { | 3212 | } else { |
3192 | eng->ref(); | 3213 | eng->ref(); |
@@ -3196,17 +3217,18 @@ static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) | |||
3196 | #endif | 3217 | #endif |
3197 | return new QRegExpEngine( pattern, caseSensitive ); | 3218 | return new QRegExpEngine( pattern, caseSensitive ); |
3198 | } | 3219 | } |
3199 | 3220 | ||
3200 | static void derefEngine( QRegExpEngine *eng, const QString& pattern ) | 3221 | static void derefEngine( QRegExpEngine *eng, const QString& pattern ) |
3201 | { | 3222 | { |
3202 | if ( eng != 0 && eng->deref() ) { | ||
3203 | #ifndef QT_NO_REGEXP_OPTIM | ||
3204 | #ifdef QT_THREAD_SUPPORT | 3223 | #ifdef QT_THREAD_SUPPORT |
3205 | QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); | 3224 | QMutexLocker locker( qt_global_mutexpool ? |
3225 | qt_global_mutexpool->get( &engineCache ) : 0 ); | ||
3206 | #endif | 3226 | #endif |
3227 | if ( eng != 0 && eng->deref() ) { | ||
3228 | #ifndef QT_NO_REGEXP_OPTIM | ||
3207 | if ( engineCache == 0 ) { | 3229 | if ( engineCache == 0 ) { |
3208 | engineCache = new QCache<QRegExpEngine>; | 3230 | engineCache = new QCache<QRegExpEngine>; |
3209 | engineCache->setAutoDelete( TRUE ); | 3231 | engineCache->setAutoDelete( TRUE ); |
3210 | cleanup_cache.set( &engineCache ); | 3232 | cleanup_cache.set( &engineCache ); |
3211 | } | 3233 | } |
3212 | if ( !pattern.isNull() && | 3234 | if ( !pattern.isNull() && |
@@ -3562,19 +3584,12 @@ int QRegExp::match( const QString& str, int index, int *len, | |||
3562 | if ( len != 0 ) | 3584 | if ( len != 0 ) |
3563 | *len = matchedLength(); | 3585 | *len = matchedLength(); |
3564 | return pos; | 3586 | return pos; |
3565 | } | 3587 | } |
3566 | #endif // QT_NO_COMPAT | 3588 | #endif // QT_NO_COMPAT |
3567 | 3589 | ||
3568 | /*! | ||
3569 | \overload | ||
3570 | |||
3571 | This convenience function searches with a \c CaretMode of \c | ||
3572 | CaretAtZero which is the most common usage. | ||
3573 | */ | ||
3574 | |||
3575 | int QRegExp::search( const QString& str, int offset ) const | 3590 | int QRegExp::search( const QString& str, int offset ) const |
3576 | { | 3591 | { |
3577 | return search( str, offset, CaretAtZero ); | 3592 | return search( str, offset, CaretAtZero ); |
3578 | } | 3593 | } |
3579 | 3594 | ||
3580 | /*! | 3595 | /*! |
@@ -3622,19 +3637,12 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const | |||
3622 | priv->captured = eng->match( str, offset, priv->min, FALSE, | 3637 | priv->captured = eng->match( str, offset, priv->min, FALSE, |
3623 | caretIndex(offset, caretMode) ); | 3638 | caretIndex(offset, caretMode) ); |
3624 | return priv->captured[0]; | 3639 | return priv->captured[0]; |
3625 | } | 3640 | } |
3626 | 3641 | ||
3627 | 3642 | ||
3628 | /*! | ||
3629 | \overload | ||
3630 | |||
3631 | This convenience function searches with a \c CaretMode of \c | ||
3632 | CaretAtZero which is the most common usage. | ||
3633 | */ | ||
3634 | |||
3635 | int QRegExp::searchRev( const QString& str, int offset ) const | 3643 | int QRegExp::searchRev( const QString& str, int offset ) const |
3636 | { | 3644 | { |
3637 | return searchRev( str, offset, CaretAtZero ); | 3645 | return searchRev( str, offset, CaretAtZero ); |
3638 | } | 3646 | } |
3639 | 3647 | ||
3640 | /*! | 3648 | /*! |
@@ -3691,13 +3699,13 @@ int QRegExp::searchRev( const QString& str, int offset, | |||
3691 | int QRegExp::matchedLength() const | 3699 | int QRegExp::matchedLength() const |
3692 | { | 3700 | { |
3693 | return priv->captured[1]; | 3701 | return priv->captured[1]; |
3694 | } | 3702 | } |
3695 | 3703 | ||
3696 | #ifndef QT_NO_REGEXP_CAPTURE | 3704 | #ifndef QT_NO_REGEXP_CAPTURE |
3697 | /*! | 3705 | /*! |
3698 | Returns the number of captures contained in the regular expression. | 3706 | Returns the number of captures contained in the regular expression. |
3699 | */ | 3707 | */ |
3700 | int QRegExp::numCaptures() const | 3708 | int QRegExp::numCaptures() const |
3701 | { | 3709 | { |
3702 | return eng->numCaptures(); | 3710 | return eng->numCaptures(); |
3703 | } | 3711 | } |