-rw-r--r-- | qmake/tools/qregexp.cpp | 70 |
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp index 500efed..0c1f060 100644 --- a/qmake/tools/qregexp.cpp +++ b/qmake/tools/qregexp.cpp | |||
@@ -242,57 +242,57 @@ | |||
242 | \i A character that follows a backslash matches the character | 242 | \i A character that follows a backslash matches the character |
243 | itself except where mentioned below. For example if you | 243 | itself except where mentioned below. For example if you |
244 | wished to match a literal caret at the beginning of a string | 244 | wished to match a literal caret at the beginning of a string |
245 | you would write <b>\^</b>. | 245 | you would write <b>\^</b>. |
246 | \row \i <b>\\a</b> | 246 | \row \i <b>\\a</b> |
247 | \i This matches the ASCII bell character (BEL, 0x07). | 247 | \i This matches the ASCII bell character (BEL, 0x07). |
248 | \row \i <b>\\f</b> | 248 | \row \i <b>\\f</b> |
249 | \i This matches the ASCII form feed character (FF, 0x0C). | 249 | \i This matches the ASCII form feed character (FF, 0x0C). |
250 | \row \i <b>\\n</b> | 250 | \row \i <b>\\n</b> |
251 | \i This matches the ASCII line feed character (LF, 0x0A, Unix newline). | 251 | \i This matches the ASCII line feed character (LF, 0x0A, Unix newline). |
252 | \row \i <b>\\r</b> | 252 | \row \i <b>\\r</b> |
253 | \i This matches the ASCII carriage return character (CR, 0x0D). | 253 | \i This matches the ASCII carriage return character (CR, 0x0D). |
254 | \row \i <b>\\t</b> | 254 | \row \i <b>\\t</b> |
255 | \i This matches the ASCII horizontal tab character (HT, 0x09). | 255 | \i This matches the ASCII horizontal tab character (HT, 0x09). |
256 | \row \i <b>\\v</b> | 256 | \row \i <b>\\v</b> |
257 | \i This matches the ASCII vertical tab character (VT, 0x0B). | 257 | \i This matches the ASCII vertical tab character (VT, 0x0B). |
258 | \row \i <b>\\xhhhh</b> | 258 | \row \i <b>\\xhhhh</b> |
259 | \i This matches the Unicode character corresponding to the | 259 | \i This matches the Unicode character corresponding to the |
260 | hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo | 260 | hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo |
261 | (i.e., \zero ooo) matches the ASCII/Latin-1 character | 261 | (i.e., \zero ooo) matches the ASCII/Latin-1 character |
262 | corresponding to the octal number ooo (between 0 and 0377). | 262 | corresponding to the octal number ooo (between 0 and 0377). |
263 | \row \i <b>. (dot)</b> | 263 | \row \i <b>. (dot)</b> |
264 | \i This matches any character (including newline). | 264 | \i This matches any character (including newline). |
265 | \row \i <b>\\d</b> | 265 | \row \i <b>\\d</b> |
266 | \i This matches a digit (see QChar::isDigit()). | 266 | \i This matches a digit (QChar::isDigit()). |
267 | \row \i <b>\\D</b> | 267 | \row \i <b>\\D</b> |
268 | \i This matches a non-digit. | 268 | \i This matches a non-digit. |
269 | \row \i <b>\\s</b> | 269 | \row \i <b>\\s</b> |
270 | \i This matches a whitespace (see QChar::isSpace()). | 270 | \i This matches a whitespace (QChar::isSpace()). |
271 | \row \i <b>\\S</b> | 271 | \row \i <b>\\S</b> |
272 | \i This matches a non-whitespace. | 272 | \i This matches a non-whitespace. |
273 | \row \i <b>\\w</b> | 273 | \row \i <b>\\w</b> |
274 | \i This matches a word character (see QChar::isLetterOrNumber()). | 274 | \i This matches a word character (QChar::isLetterOrNumber() or '_'). |
275 | \row \i <b>\\W</b> | 275 | \row \i <b>\\W</b> |
276 | \i This matches a non-word character. | 276 | \i This matches a non-word character. |
277 | \row \i <b>\\n</b> | 277 | \row \i <b>\\n</b> |
278 | \i The n-th \link #capturing-text backreference \endlink, | 278 | \i The n-th \link #capturing-text backreference \endlink, |
279 | e.g. \1, \2, etc. | 279 | e.g. \1, \2, etc. |
280 | \endtable | 280 | \endtable |
281 | 281 | ||
282 | \e {Note that the C++ compiler transforms backslashes in strings | 282 | \e {Note that the C++ compiler transforms backslashes in strings |
283 | so to include a <b>\\</b> in a regexp you will need to enter it | 283 | so to include a <b>\\</b> in a regexp you will need to enter it |
284 | twice, i.e. <b>\\\\</b>.} | 284 | twice, i.e. <b>\\\\</b>.} |
285 | 285 | ||
286 | \target sets-of-characters | 286 | \target sets-of-characters |
287 | \section1 Sets of Characters | 287 | \section1 Sets of Characters |
288 | 288 | ||
289 | Square brackets are used to match any character in the set of | 289 | Square brackets are used to match any character in the set of |
290 | characters contained within the square brackets. All the character | 290 | characters contained within the square brackets. All the character |
291 | set abbreviations described above can be used within square | 291 | set abbreviations described above can be used within square |
292 | brackets. Apart from the character set abbreviations and the | 292 | brackets. Apart from the character set abbreviations and the |
293 | following two exceptions no characters have special meanings in | 293 | following two exceptions no characters have special meanings in |
294 | square brackets. | 294 | square brackets. |
295 | 295 | ||
296 | \table | 296 | \table |
297 | \row \i <b>^</b> | 297 | \row \i <b>^</b> |
298 | \i The caret negates the character set if it occurs as the | 298 | \i The caret negates the character set if it occurs as the |
@@ -526,49 +526,56 @@ | |||
526 | The equivalent of Perl's \c{/i} option is | 526 | The equivalent of Perl's \c{/i} option is |
527 | setCaseSensitive(FALSE). | 527 | setCaseSensitive(FALSE). |
528 | 528 | ||
529 | Perl's \c{/g} option can be emulated using a \link | 529 | Perl's \c{/g} option can be emulated using a \link |
530 | #cap_in_a_loop loop \endlink. | 530 | #cap_in_a_loop loop \endlink. |
531 | 531 | ||
532 | In QRegExp <b>.</b> matches any character, therefore all QRegExp | 532 | In QRegExp <b>.</b> matches any character, therefore all QRegExp |
533 | regexps have the equivalent of Perl's \c{/s} option. QRegExp | 533 | regexps have the equivalent of Perl's \c{/s} option. QRegExp |
534 | does not have an equivalent to Perl's \c{/m} option, but this | 534 | does not have an equivalent to Perl's \c{/m} option, but this |
535 | can be emulated in various ways for example by splitting the input | 535 | can be emulated in various ways for example by splitting the input |
536 | into lines or by looping with a regexp that searches for newlines. | 536 | into lines or by looping with a regexp that searches for newlines. |
537 | 537 | ||
538 | Because QRegExp is string oriented there are no \A, \Z or \z | 538 | Because QRegExp is string oriented there are no \A, \Z or \z |
539 | assertions. The \G assertion is not supported but can be emulated | 539 | assertions. The \G assertion is not supported but can be emulated |
540 | in a loop. | 540 | in a loop. |
541 | 541 | ||
542 | Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp | 542 | Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp |
543 | equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, | 543 | equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, |
544 | ... correspond to cap(1) or capturedTexts()[1], cap(2) or | 544 | ... correspond to cap(1) or capturedTexts()[1], cap(2) or |
545 | capturedTexts()[2], etc. | 545 | capturedTexts()[2], etc. |
546 | 546 | ||
547 | To substitute a pattern use QString::replace(). | 547 | To substitute a pattern use QString::replace(). |
548 | 548 | ||
549 | Perl's extended \c{/x} syntax is not supported, nor are | 549 | Perl's extended \c{/x} syntax is not supported, nor are |
550 | regexp comments (?#comment) or directives, e.g. (?i). | 550 | directives, e.g. (?i), or regexp comments, e.g. (?#comment). On |
551 | the other hand, C++'s rules for literal strings can be used to | ||
552 | achieve the same: | ||
553 | \code | ||
554 | QRegExp mark( "\\b" // word boundary | ||
555 | "[Mm]ark" // the word we want to match | ||
556 | ); | ||
557 | \endcode | ||
551 | 558 | ||
552 | Both zero-width positive and zero-width negative lookahead | 559 | Both zero-width positive and zero-width negative lookahead |
553 | assertions (?=pattern) and (?!pattern) are supported with the same | 560 | assertions (?=pattern) and (?!pattern) are supported with the same |
554 | syntax as Perl. Perl's lookbehind assertions, "independent" | 561 | syntax as Perl. Perl's lookbehind assertions, "independent" |
555 | subexpressions and conditional expressions are not supported. | 562 | subexpressions and conditional expressions are not supported. |
556 | 563 | ||
557 | Non-capturing parentheses are also supported, with the same | 564 | Non-capturing parentheses are also supported, with the same |
558 | (?:pattern) syntax. | 565 | (?:pattern) syntax. |
559 | 566 | ||
560 | See QStringList::split() and QStringList::join() for equivalents | 567 | See QStringList::split() and QStringList::join() for equivalents |
561 | to Perl's split and join functions. | 568 | to Perl's split and join functions. |
562 | 569 | ||
563 | Note: because C++ transforms \\'s they must be written \e twice in | 570 | Note: because C++ transforms \\'s they must be written \e twice in |
564 | code, e.g. <b>\\b</b> must be written <b>\\\\b</b>. | 571 | code, e.g. <b>\\b</b> must be written <b>\\\\b</b>. |
565 | 572 | ||
566 | \target code-examples | 573 | \target code-examples |
567 | \section1 Code Examples | 574 | \section1 Code Examples |
568 | 575 | ||
569 | \code | 576 | \code |
570 | QRegExp rx( "^\\d\\d?$" ); // match integers 0 to 99 | 577 | QRegExp rx( "^\\d\\d?$" ); // match integers 0 to 99 |
571 | rx.search( "123" ); // returns -1 (no match) | 578 | rx.search( "123" ); // returns -1 (no match) |
572 | rx.search( "-6" ); // returns -1 (no match) | 579 | rx.search( "-6" ); // returns -1 (no match) |
573 | rx.search( "6" ); // returns 0 (matched as position 0) | 580 | rx.search( "6" ); // returns 0 (matched as position 0) |
574 | \endcode | 581 | \endcode |
@@ -656,86 +663,91 @@ | |||
656 | if ( rx.search( str ) != -1 ) { | 663 | if ( rx.search( str ) != -1 ) { |
657 | company = rx.cap( 1 ); | 664 | company = rx.cap( 1 ); |
658 | web = rx.cap( 2 ); | 665 | web = rx.cap( 2 ); |
659 | country = rx.cap( 3 ); | 666 | country = rx.cap( 3 ); |
660 | } | 667 | } |
661 | \endcode | 668 | \endcode |
662 | 669 | ||
663 | In this example our input lines have the format company name, web | 670 | In this example our input lines have the format company name, web |
664 | address and country. Unfortunately the regexp is rather long and | 671 | address and country. Unfortunately the regexp is rather long and |
665 | not very versatile -- the code will break if we add any more | 672 | not very versatile -- the code will break if we add any more |
666 | fields. A simpler and better solution is to look for the | 673 | fields. A simpler and better solution is to look for the |
667 | separator, '\t' in this case, and take the surrounding text. The | 674 | separator, '\t' in this case, and take the surrounding text. The |
668 | QStringList split() function can take a separator string or regexp | 675 | QStringList split() function can take a separator string or regexp |
669 | as an argument and split a string accordingly. | 676 | as an argument and split a string accordingly. |
670 | 677 | ||
671 | \code | 678 | \code |
672 | QStringList field = QStringList::split( "\t", str ); | 679 | QStringList field = QStringList::split( "\t", str ); |
673 | \endcode | 680 | \endcode |
674 | 681 | ||
675 | Here field[0] is the company, field[1] the web address and so on. | 682 | Here field[0] is the company, field[1] the web address and so on. |
676 | 683 | ||
677 | To imitate the matching of a shell we can use wildcard mode. | 684 | To imitate the matching of a shell we can use wildcard mode. |
678 | 685 | ||
679 | \code | 686 | \code |
680 | QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything | 687 | QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything |
681 | rx.setWildcard( TRUE ); // now it's a valid wildcard regexp | 688 | rx.setWildcard( TRUE ); // now it's a valid wildcard regexp |
682 | rx.search( "index.html" ); // returns 0 (matched at position 0) | 689 | rx.exactMatch( "index.html" ); // returns TRUE |
683 | rx.search( "default.htm" ); // returns -1 (no match) | 690 | rx.exactMatch( "default.htm" ); // returns FALSE |
684 | rx.search( "readme.txt" ); // returns -1 (no match) | 691 | rx.exactMatch( "readme.txt" ); // returns FALSE |
685 | \endcode | 692 | \endcode |
686 | 693 | ||
687 | Wildcard matching can be convenient because of its simplicity, but | 694 | Wildcard matching can be convenient because of its simplicity, but |
688 | any wildcard regexp can be defined using full regexps, e.g. | 695 | any wildcard regexp can be defined using full regexps, e.g. |
689 | <b>.*\.html$</b>. Notice that we can't match both \c .html and \c | 696 | <b>.*\.html$</b>. Notice that we can't match both \c .html and \c |
690 | .htm files with a wildcard unless we use <b>*.htm*</b> which will | 697 | .htm files with a wildcard unless we use <b>*.htm*</b> which will |
691 | also match 'test.html.bak'. A full regexp gives us the precision | 698 | also match 'test.html.bak'. A full regexp gives us the precision |
692 | we need, <b>.*\\.html?$</b>. | 699 | we need, <b>.*\\.html?$</b>. |
693 | 700 | ||
694 | QRegExp can match case insensitively using setCaseSensitive(), and | 701 | QRegExp can match case insensitively using setCaseSensitive(), and |
695 | can use non-greedy matching, see setMinimal(). By default QRegExp | 702 | can use non-greedy matching, see setMinimal(). By default QRegExp |
696 | uses full regexps but this can be changed with setWildcard(). | 703 | uses full regexps but this can be changed with setWildcard(). |
697 | Searching can be forward with search() or backward with | 704 | Searching can be forward with search() or backward with |
698 | searchRev(). Captured text can be accessed using capturedTexts() | 705 | searchRev(). Captured text can be accessed using capturedTexts() |
699 | which returns a string list of all captured strings, or using | 706 | which returns a string list of all captured strings, or using |
700 | cap() which returns the captured string for the given index. The | 707 | cap() which returns the captured string for the given index. The |
701 | pos() function takes a match index and returns the position in the | 708 | pos() function takes a match index and returns the position in the |
702 | string where the match was made (or -1 if there was no match). | 709 | string where the match was made (or -1 if there was no match). |
703 | 710 | ||
704 | \sa QRegExpValidator QString QStringList | 711 | \sa QRegExpValidator QString QStringList |
705 | 712 | ||
706 | \target member-function-documentation | 713 | \target member-function-documentation |
707 | */ | 714 | */ |
708 | 715 | ||
709 | const int NumBadChars = 64; | 716 | const int NumBadChars = 64; |
710 | #define BadChar( ch ) ( (ch).unicode() % NumBadChars ) | 717 | #define BadChar( ch ) ( (ch).unicode() % NumBadChars ) |
711 | 718 | ||
712 | const int NoOccurrence = INT_MAX; | 719 | const int NoOccurrence = INT_MAX; |
713 | const int EmptyCapture = INT_MAX; | 720 | const int EmptyCapture = INT_MAX; |
714 | const int InftyLen = INT_MAX; | 721 | const int InftyLen = INT_MAX; |
715 | const int InftyRep = 1025; | 722 | const int InftyRep = 1025; |
716 | const int EOS = -1; | 723 | const int EOS = -1; |
717 | 724 | ||
725 | static bool isWord( QChar ch ) | ||
726 | { | ||
727 | return ch.isLetterOrNumber() || ch == QChar( '_' ); | ||
728 | } | ||
729 | |||
718 | /* | 730 | /* |
719 | Merges two QMemArrays of ints and puts the result into the first one. | 731 | Merges two QMemArrays of ints and puts the result into the first one. |
720 | */ | 732 | */ |
721 | static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) | 733 | static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) |
722 | { | 734 | { |
723 | int asize = a->size(); | 735 | int asize = a->size(); |
724 | int bsize = b.size(); | 736 | int bsize = b.size(); |
725 | if ( asize == 0 ) { | 737 | if ( asize == 0 ) { |
726 | *a = b.copy(); | 738 | *a = b.copy(); |
727 | #ifndef QT_NO_REGEXP_OPTIM | 739 | #ifndef QT_NO_REGEXP_OPTIM |
728 | } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) { | 740 | } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) { |
729 | a->resize( asize + 1 ); | 741 | a->resize( asize + 1 ); |
730 | (*a)[asize] = b[0]; | 742 | (*a)[asize] = b[0]; |
731 | #endif | 743 | #endif |
732 | } else if ( bsize >= 1 ) { | 744 | } else if ( bsize >= 1 ) { |
733 | int csize = asize + bsize; | 745 | int csize = asize + bsize; |
734 | QMemArray<int> c( csize ); | 746 | QMemArray<int> c( csize ); |
735 | int i = 0, j = 0, k = 0; | 747 | int i = 0, j = 0, k = 0; |
736 | while ( i < asize ) { | 748 | while ( i < asize ) { |
737 | if ( j < bsize ) { | 749 | if ( j < bsize ) { |
738 | if ( (*a)[i] == b[j] ) { | 750 | if ( (*a)[i] == b[j] ) { |
739 | i++; | 751 | i++; |
740 | csize--; | 752 | csize--; |
741 | } else if ( (*a)[i] < b[j] ) { | 753 | } else if ( (*a)[i] < b[j] ) { |
@@ -1659,51 +1671,51 @@ bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1, | |||
1659 | bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) | 1671 | bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) |
1660 | { | 1672 | { |
1661 | int j; | 1673 | int j; |
1662 | 1674 | ||
1663 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | 1675 | #ifndef QT_NO_REGEXP_ANCHOR_ALT |
1664 | if ( (a & Anchor_Alternation) != 0 ) { | 1676 | if ( (a & Anchor_Alternation) != 0 ) { |
1665 | return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) || | 1677 | return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) || |
1666 | testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin ); | 1678 | testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin ); |
1667 | } | 1679 | } |
1668 | #endif | 1680 | #endif |
1669 | 1681 | ||
1670 | if ( (a & Anchor_Caret) != 0 ) { | 1682 | if ( (a & Anchor_Caret) != 0 ) { |
1671 | if ( mmPos + i != mmCaretPos ) | 1683 | if ( mmPos + i != mmCaretPos ) |
1672 | return FALSE; | 1684 | return FALSE; |
1673 | } | 1685 | } |
1674 | if ( (a & Anchor_Dollar) != 0 ) { | 1686 | if ( (a & Anchor_Dollar) != 0 ) { |
1675 | if ( mmPos + i != mmLen ) | 1687 | if ( mmPos + i != mmLen ) |
1676 | return FALSE; | 1688 | return FALSE; |
1677 | } | 1689 | } |
1678 | #ifndef QT_NO_REGEXP_ESCAPE | 1690 | #ifndef QT_NO_REGEXP_ESCAPE |
1679 | if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { | 1691 | if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { |
1680 | bool before = FALSE; | 1692 | bool before = FALSE; |
1681 | bool after = FALSE; | 1693 | bool after = FALSE; |
1682 | if ( mmPos + i != 0 ) | 1694 | if ( mmPos + i != 0 ) |
1683 | before = mmIn[mmPos + i - 1].isLetterOrNumber(); | 1695 | before = isWord( mmIn[mmPos + i - 1] ); |
1684 | if ( mmPos + i != mmLen ) | 1696 | if ( mmPos + i != mmLen ) |
1685 | after = mmIn[mmPos + i].isLetterOrNumber(); | 1697 | after = isWord( mmIn[mmPos + i] ); |
1686 | if ( (a & Anchor_Word) != 0 && (before == after) ) | 1698 | if ( (a & Anchor_Word) != 0 && (before == after) ) |
1687 | return FALSE; | 1699 | return FALSE; |
1688 | if ( (a & Anchor_NonWord) != 0 && (before != after) ) | 1700 | if ( (a & Anchor_NonWord) != 0 && (before != after) ) |
1689 | return FALSE; | 1701 | return FALSE; |
1690 | } | 1702 | } |
1691 | #endif | 1703 | #endif |
1692 | #ifndef QT_NO_REGEXP_LOOKAHEAD | 1704 | #ifndef QT_NO_REGEXP_LOOKAHEAD |
1693 | bool catchx = TRUE; | 1705 | bool catchx = TRUE; |
1694 | 1706 | ||
1695 | if ( (a & Anchor_LookaheadMask) != 0 ) { | 1707 | if ( (a & Anchor_LookaheadMask) != 0 ) { |
1696 | QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i, | 1708 | QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i, |
1697 | mmLen - mmPos - i ); | 1709 | mmLen - mmPos - i ); |
1698 | for ( j = 0; j < (int) ahead.size(); j++ ) { | 1710 | for ( j = 0; j < (int) ahead.size(); j++ ) { |
1699 | if ( (a & (Anchor_FirstLookahead << j)) != 0 ) { | 1711 | if ( (a & (Anchor_FirstLookahead << j)) != 0 ) { |
1700 | catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE, | 1712 | catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE, |
1701 | mmCaretPos - mmPos - i )[0] == 0; | 1713 | mmCaretPos - mmPos - i )[0] == 0; |
1702 | if ( catchx == ahead[j]->neg ) | 1714 | if ( catchx == ahead[j]->neg ) |
1703 | return FALSE; | 1715 | return FALSE; |
1704 | } | 1716 | } |
1705 | } | 1717 | } |
1706 | } | 1718 | } |
1707 | #endif | 1719 | #endif |
1708 | #ifndef QT_NO_REGEXP_CAPTURE | 1720 | #ifndef QT_NO_REGEXP_CAPTURE |
1709 | #ifndef QT_NO_REGEXP_BACKREF | 1721 | #ifndef QT_NO_REGEXP_BACKREF |
@@ -2611,68 +2623,76 @@ int QRegExpEngine::getEscape() | |||
2611 | yyCh = getChar(); | 2623 | yyCh = getChar(); |
2612 | } | 2624 | } |
2613 | if ( (val & ~0377) != 0 ) | 2625 | if ( (val & ~0377) != 0 ) |
2614 | error( RXERR_OCTAL ); | 2626 | error( RXERR_OCTAL ); |
2615 | return Tok_Char | val; | 2627 | return Tok_Char | val; |
2616 | #endif | 2628 | #endif |
2617 | #ifndef QT_NO_REGEXP_ESCAPE | 2629 | #ifndef QT_NO_REGEXP_ESCAPE |
2618 | case 'B': | 2630 | case 'B': |
2619 | return Tok_NonWord; | 2631 | return Tok_NonWord; |
2620 | #endif | 2632 | #endif |
2621 | #ifndef QT_NO_REGEXP_CCLASS | 2633 | #ifndef QT_NO_REGEXP_CCLASS |
2622 | case 'D': | 2634 | case 'D': |
2623 | // see QChar::isDigit() | 2635 | // see QChar::isDigit() |
2624 | yyCharClass->addCategories( 0x7fffffef ); | 2636 | yyCharClass->addCategories( 0x7fffffef ); |
2625 | return Tok_CharClass; | 2637 | return Tok_CharClass; |
2626 | case 'S': | 2638 | case 'S': |
2627 | // see QChar::isSpace() | 2639 | // see QChar::isSpace() |
2628 | yyCharClass->addCategories( 0x7ffff87f ); | 2640 | yyCharClass->addCategories( 0x7ffff87f ); |
2629 | yyCharClass->addRange( 0x0000, 0x0008 ); | 2641 | yyCharClass->addRange( 0x0000, 0x0008 ); |
2630 | yyCharClass->addRange( 0x000e, 0x001f ); | 2642 | yyCharClass->addRange( 0x000e, 0x001f ); |
2631 | yyCharClass->addRange( 0x007f, 0x009f ); | 2643 | yyCharClass->addRange( 0x007f, 0x009f ); |
2632 | return Tok_CharClass; | 2644 | return Tok_CharClass; |
2633 | case 'W': | 2645 | case 'W': |
2634 | // see QChar::isLetterOrNumber() | 2646 | // see QChar::isLetterOrNumber() |
2635 | yyCharClass->addCategories( 0x7ff07f8f ); | 2647 | yyCharClass->addCategories( 0x7fe07f8f ); |
2648 | yyCharClass->addRange( 0x203f, 0x2040 ); | ||
2649 | yyCharClass->addSingleton( 0x2040 ); | ||
2650 | yyCharClass->addSingleton( 0x30fb ); | ||
2651 | yyCharClass->addRange( 0xfe33, 0xfe34 ); | ||
2652 | yyCharClass->addRange( 0xfe4d, 0xfe4f ); | ||
2653 | yyCharClass->addSingleton( 0xff3f ); | ||
2654 | yyCharClass->addSingleton( 0xff65 ); | ||
2636 | return Tok_CharClass; | 2655 | return Tok_CharClass; |
2637 | #endif | 2656 | #endif |
2638 | #ifndef QT_NO_REGEXP_ESCAPE | 2657 | #ifndef QT_NO_REGEXP_ESCAPE |
2639 | case 'b': | 2658 | case 'b': |
2640 | return Tok_Word; | 2659 | return Tok_Word; |
2641 | #endif | 2660 | #endif |
2642 | #ifndef QT_NO_REGEXP_CCLASS | 2661 | #ifndef QT_NO_REGEXP_CCLASS |
2643 | case 'd': | 2662 | case 'd': |
2644 | // see QChar::isDigit() | 2663 | // see QChar::isDigit() |
2645 | yyCharClass->addCategories( 0x00000010 ); | 2664 | yyCharClass->addCategories( 0x00000010 ); |
2646 | return Tok_CharClass; | 2665 | return Tok_CharClass; |
2647 | case 's': | 2666 | case 's': |
2648 | // see QChar::isSpace() | 2667 | // see QChar::isSpace() |
2649 | yyCharClass->addCategories( 0x00000380 ); | 2668 | yyCharClass->addCategories( 0x00000380 ); |
2650 | yyCharClass->addRange( 0x0009, 0x000d ); | 2669 | yyCharClass->addRange( 0x0009, 0x000d ); |
2651 | return Tok_CharClass; | 2670 | return Tok_CharClass; |
2652 | case 'w': | 2671 | case 'w': |
2653 | // see QChar::isLetterOrNumber() | 2672 | // see QChar::isLetterOrNumber() |
2654 | yyCharClass->addCategories( 0x000f8070 ); | 2673 | yyCharClass->addCategories( 0x000f8070 ); |
2674 | yyCharClass->addSingleton( 0x005f ); // '_' | ||
2655 | return Tok_CharClass; | 2675 | return Tok_CharClass; |
2656 | #endif | 2676 | #endif |
2657 | #ifndef QT_NO_REGEXP_ESCAPE | 2677 | #ifndef QT_NO_REGEXP_ESCAPE |
2658 | case 'x': | 2678 | case 'x': |
2659 | val = 0; | 2679 | val = 0; |
2660 | for ( i = 0; i < 4; i++ ) { | 2680 | for ( i = 0; i < 4; i++ ) { |
2661 | low = QChar( yyCh ).lower(); | 2681 | low = QChar( yyCh ).lower(); |
2662 | if ( low >= '0' && low <= '9' ) | 2682 | if ( low >= '0' && low <= '9' ) |
2663 | val = ( val << 4 ) | ( low - '0' ); | 2683 | val = ( val << 4 ) | ( low - '0' ); |
2664 | else if ( low >= 'a' && low <= 'f' ) | 2684 | else if ( low >= 'a' && low <= 'f' ) |
2665 | val = ( val << 4 ) | ( low - 'a' + 10 ); | 2685 | val = ( val << 4 ) | ( low - 'a' + 10 ); |
2666 | else | 2686 | else |
2667 | break; | 2687 | break; |
2668 | yyCh = getChar(); | 2688 | yyCh = getChar(); |
2669 | } | 2689 | } |
2670 | return Tok_Char | val; | 2690 | return Tok_Char | val; |
2671 | #endif | 2691 | #endif |
2672 | default: | 2692 | default: |
2673 | if ( prevCh >= '1' && prevCh <= '9' ) { | 2693 | if ( prevCh >= '1' && prevCh <= '9' ) { |
2674 | #ifndef QT_NO_REGEXP_BACKREF | 2694 | #ifndef QT_NO_REGEXP_BACKREF |
2675 | val = prevCh - '0'; | 2695 | val = prevCh - '0'; |
2676 | while ( yyCh >= '0' && yyCh <= '9' ) { | 2696 | while ( yyCh >= '0' && yyCh <= '9' ) { |
2677 | val = ( val *= 10 ) | ( yyCh - '0' ); | 2697 | val = ( val *= 10 ) | ( yyCh - '0' ); |
2678 | yyCh = getChar(); | 2698 | yyCh = getChar(); |
@@ -3162,69 +3182,71 @@ struct QRegExpPrivate | |||
3162 | QString rxpattern; // regular-expression pattern | 3182 | QString rxpattern; // regular-expression pattern |
3163 | #ifndef QT_NO_REGEXP_WILDCARD | 3183 | #ifndef QT_NO_REGEXP_WILDCARD |
3164 | bool wc; // wildcard mode? | 3184 | bool wc; // wildcard mode? |
3165 | #endif | 3185 | #endif |
3166 | bool min; // minimal matching? (instead of maximal) | 3186 | bool min; // minimal matching? (instead of maximal) |
3167 | #ifndef QT_NO_REGEXP_CAPTURE | 3187 | #ifndef QT_NO_REGEXP_CAPTURE |
3168 | QString t; // last string passed to QRegExp::search() or searchRev() | 3188 | QString t; // last string passed to QRegExp::search() or searchRev() |
3169 | QStringList capturedCache; // what QRegExp::capturedTexts() returned last | 3189 | QStringList capturedCache; // what QRegExp::capturedTexts() returned last |
3170 | #endif | 3190 | #endif |
3171 | QMemArray<int> captured; // what QRegExpEngine::search() returned last | 3191 | QMemArray<int> captured; // what QRegExpEngine::search() returned last |
3172 | 3192 | ||
3173 | QRegExpPrivate() { captured.fill( -1, 2 ); } | 3193 | QRegExpPrivate() { captured.fill( -1, 2 ); } |
3174 | }; | 3194 | }; |
3175 | 3195 | ||
3176 | #ifndef QT_NO_REGEXP_OPTIM | 3196 | #ifndef QT_NO_REGEXP_OPTIM |
3177 | static QCache<QRegExpEngine> *engineCache = 0; | 3197 | static QCache<QRegExpEngine> *engineCache = 0; |
3178 | static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; | 3198 | static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; |
3179 | #endif | 3199 | #endif |
3180 | 3200 | ||
3181 | static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) | 3201 | static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) |
3182 | { | 3202 | { |
3183 | #ifndef QT_NO_REGEXP_OPTIM | 3203 | #ifndef QT_NO_REGEXP_OPTIM |
3184 | if ( engineCache != 0 ) { | 3204 | if ( engineCache != 0 ) { |
3185 | #ifdef QT_THREAD_SUPPORT | 3205 | #ifdef QT_THREAD_SUPPORT |
3186 | QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); | 3206 | QMutexLocker locker( qt_global_mutexpool ? |
3207 | qt_global_mutexpool->get( &engineCache ) : 0 ); | ||
3187 | #endif | 3208 | #endif |
3188 | QRegExpEngine *eng = engineCache->take( pattern ); | 3209 | QRegExpEngine *eng = engineCache->take( pattern ); |
3189 | if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { | 3210 | if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { |
3190 | delete eng; | 3211 | delete eng; |
3191 | } else { | 3212 | } else { |
3192 | eng->ref(); | 3213 | eng->ref(); |
3193 | return eng; | 3214 | return eng; |
3194 | } | 3215 | } |
3195 | } | 3216 | } |
3196 | #endif | 3217 | #endif |
3197 | return new QRegExpEngine( pattern, caseSensitive ); | 3218 | return new QRegExpEngine( pattern, caseSensitive ); |
3198 | } | 3219 | } |
3199 | 3220 | ||
3200 | static void derefEngine( QRegExpEngine *eng, const QString& pattern ) | 3221 | static void derefEngine( QRegExpEngine *eng, const QString& pattern ) |
3201 | { | 3222 | { |
3202 | if ( eng != 0 && eng->deref() ) { | ||
3203 | #ifndef QT_NO_REGEXP_OPTIM | ||
3204 | #ifdef QT_THREAD_SUPPORT | 3223 | #ifdef QT_THREAD_SUPPORT |
3205 | QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); | 3224 | QMutexLocker locker( qt_global_mutexpool ? |
3225 | qt_global_mutexpool->get( &engineCache ) : 0 ); | ||
3206 | #endif | 3226 | #endif |
3227 | if ( eng != 0 && eng->deref() ) { | ||
3228 | #ifndef QT_NO_REGEXP_OPTIM | ||
3207 | if ( engineCache == 0 ) { | 3229 | if ( engineCache == 0 ) { |
3208 | engineCache = new QCache<QRegExpEngine>; | 3230 | engineCache = new QCache<QRegExpEngine>; |
3209 | engineCache->setAutoDelete( TRUE ); | 3231 | engineCache->setAutoDelete( TRUE ); |
3210 | cleanup_cache.set( &engineCache ); | 3232 | cleanup_cache.set( &engineCache ); |
3211 | } | 3233 | } |
3212 | if ( !pattern.isNull() && | 3234 | if ( !pattern.isNull() && |
3213 | engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) | 3235 | engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) |
3214 | return; | 3236 | return; |
3215 | #else | 3237 | #else |
3216 | Q_UNUSED( pattern ); | 3238 | Q_UNUSED( pattern ); |
3217 | #endif | 3239 | #endif |
3218 | delete eng; | 3240 | delete eng; |
3219 | } | 3241 | } |
3220 | } | 3242 | } |
3221 | 3243 | ||
3222 | /*! | 3244 | /*! |
3223 | \enum QRegExp::CaretMode | 3245 | \enum QRegExp::CaretMode |
3224 | 3246 | ||
3225 | The CaretMode enum defines the different meanings of the caret | 3247 | The CaretMode enum defines the different meanings of the caret |
3226 | (<b>^</b>) in a regular expression. The possible values are: | 3248 | (<b>^</b>) in a regular expression. The possible values are: |
3227 | 3249 | ||
3228 | \value CaretAtZero | 3250 | \value CaretAtZero |
3229 | The caret corresponds to index 0 in the searched string. | 3251 | The caret corresponds to index 0 in the searched string. |
3230 | 3252 | ||
@@ -3544,55 +3566,48 @@ bool QRegExp::exactMatch( const QString& str ) const | |||
3544 | Attempts to match in \a str, starting from position \a index. | 3566 | Attempts to match in \a str, starting from position \a index. |
3545 | Returns the position of the match, or -1 if there was no match. | 3567 | Returns the position of the match, or -1 if there was no match. |
3546 | 3568 | ||
3547 | The length of the match is stored in \a *len, unless \a len is a | 3569 | The length of the match is stored in \a *len, unless \a len is a |
3548 | null pointer. | 3570 | null pointer. |
3549 | 3571 | ||
3550 | If \a indexIsStart is TRUE (the default), the position \a index in | 3572 | If \a indexIsStart is TRUE (the default), the position \a index in |
3551 | the string will match the start of string anchor, <b>^</b>, in the | 3573 | the string will match the start of string anchor, <b>^</b>, in the |
3552 | regexp, if present. Otherwise, position 0 in \a str will match. | 3574 | regexp, if present. Otherwise, position 0 in \a str will match. |
3553 | 3575 | ||
3554 | Use search() and matchedLength() instead of this function. | 3576 | Use search() and matchedLength() instead of this function. |
3555 | 3577 | ||
3556 | \sa QString::mid() QConstString | 3578 | \sa QString::mid() QConstString |
3557 | */ | 3579 | */ |
3558 | int QRegExp::match( const QString& str, int index, int *len, | 3580 | int QRegExp::match( const QString& str, int index, int *len, |
3559 | bool indexIsStart ) const | 3581 | bool indexIsStart ) const |
3560 | { | 3582 | { |
3561 | int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero ); | 3583 | int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero ); |
3562 | if ( len != 0 ) | 3584 | if ( len != 0 ) |
3563 | *len = matchedLength(); | 3585 | *len = matchedLength(); |
3564 | return pos; | 3586 | return pos; |
3565 | } | 3587 | } |
3566 | #endif // QT_NO_COMPAT | 3588 | #endif // QT_NO_COMPAT |
3567 | 3589 | ||
3568 | /*! | ||
3569 | \overload | ||
3570 | |||
3571 | This convenience function searches with a \c CaretMode of \c | ||
3572 | CaretAtZero which is the most common usage. | ||
3573 | */ | ||
3574 | |||
3575 | int QRegExp::search( const QString& str, int offset ) const | 3590 | int QRegExp::search( const QString& str, int offset ) const |
3576 | { | 3591 | { |
3577 | return search( str, offset, CaretAtZero ); | 3592 | return search( str, offset, CaretAtZero ); |
3578 | } | 3593 | } |
3579 | 3594 | ||
3580 | /*! | 3595 | /*! |
3581 | Attempts to find a match in \a str from position \a offset (0 by | 3596 | Attempts to find a match in \a str from position \a offset (0 by |
3582 | default). If \a offset is -1, the search starts at the last | 3597 | default). If \a offset is -1, the search starts at the last |
3583 | character; if -2, at the next to last character; etc. | 3598 | character; if -2, at the next to last character; etc. |
3584 | 3599 | ||
3585 | Returns the position of the first match, or -1 if there was no | 3600 | Returns the position of the first match, or -1 if there was no |
3586 | match. | 3601 | match. |
3587 | 3602 | ||
3588 | The \a caretMode parameter can be used to instruct whether <b>^</b> | 3603 | The \a caretMode parameter can be used to instruct whether <b>^</b> |
3589 | should match at index 0 or at \a offset. | 3604 | should match at index 0 or at \a offset. |
3590 | 3605 | ||
3591 | You might prefer to use QString::find(), QString::contains() or | 3606 | You might prefer to use QString::find(), QString::contains() or |
3592 | even QStringList::grep(). To replace matches use | 3607 | even QStringList::grep(). To replace matches use |
3593 | QString::replace(). | 3608 | QString::replace(). |
3594 | 3609 | ||
3595 | Example: | 3610 | Example: |
3596 | \code | 3611 | \code |
3597 | QString str = "offsets: 1.23 .50 71.00 6.00"; | 3612 | QString str = "offsets: 1.23 .50 71.00 6.00"; |
3598 | QRegExp rx( "\\d*\\.\\d+" ); // primitive floating point matching | 3613 | QRegExp rx( "\\d*\\.\\d+" ); // primitive floating point matching |
@@ -3604,55 +3619,48 @@ int QRegExp::search( const QString& str, int offset ) const | |||
3604 | } | 3619 | } |
3605 | // pos will be 9, 14, 18 and finally 24; count will end up as 4 | 3620 | // pos will be 9, 14, 18 and finally 24; count will end up as 4 |
3606 | \endcode | 3621 | \endcode |
3607 | 3622 | ||
3608 | Although const, this function sets matchedLength(), | 3623 | Although const, this function sets matchedLength(), |
3609 | capturedTexts() and pos(). | 3624 | capturedTexts() and pos(). |
3610 | 3625 | ||
3611 | \sa searchRev() exactMatch() | 3626 | \sa searchRev() exactMatch() |
3612 | */ | 3627 | */ |
3613 | 3628 | ||
3614 | int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const | 3629 | int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const |
3615 | { | 3630 | { |
3616 | if ( offset < 0 ) | 3631 | if ( offset < 0 ) |
3617 | offset += str.length(); | 3632 | offset += str.length(); |
3618 | #ifndef QT_NO_REGEXP_CAPTURE | 3633 | #ifndef QT_NO_REGEXP_CAPTURE |
3619 | priv->t = str; | 3634 | priv->t = str; |
3620 | priv->capturedCache.clear(); | 3635 | priv->capturedCache.clear(); |
3621 | #endif | 3636 | #endif |
3622 | priv->captured = eng->match( str, offset, priv->min, FALSE, | 3637 | priv->captured = eng->match( str, offset, priv->min, FALSE, |
3623 | caretIndex(offset, caretMode) ); | 3638 | caretIndex(offset, caretMode) ); |
3624 | return priv->captured[0]; | 3639 | return priv->captured[0]; |
3625 | } | 3640 | } |
3626 | 3641 | ||
3627 | 3642 | ||
3628 | /*! | ||
3629 | \overload | ||
3630 | |||
3631 | This convenience function searches with a \c CaretMode of \c | ||
3632 | CaretAtZero which is the most common usage. | ||
3633 | */ | ||
3634 | |||
3635 | int QRegExp::searchRev( const QString& str, int offset ) const | 3643 | int QRegExp::searchRev( const QString& str, int offset ) const |
3636 | { | 3644 | { |
3637 | return searchRev( str, offset, CaretAtZero ); | 3645 | return searchRev( str, offset, CaretAtZero ); |
3638 | } | 3646 | } |
3639 | 3647 | ||
3640 | /*! | 3648 | /*! |
3641 | Attempts to find a match backwards in \a str from position \a | 3649 | Attempts to find a match backwards in \a str from position \a |
3642 | offset. If \a offset is -1 (the default), the search starts at the | 3650 | offset. If \a offset is -1 (the default), the search starts at the |
3643 | last character; if -2, at the next to last character; etc. | 3651 | last character; if -2, at the next to last character; etc. |
3644 | 3652 | ||
3645 | Returns the position of the first match, or -1 if there was no | 3653 | Returns the position of the first match, or -1 if there was no |
3646 | match. | 3654 | match. |
3647 | 3655 | ||
3648 | The \a caretMode parameter can be used to instruct whether <b>^</b> | 3656 | The \a caretMode parameter can be used to instruct whether <b>^</b> |
3649 | should match at index 0 or at \a offset. | 3657 | should match at index 0 or at \a offset. |
3650 | 3658 | ||
3651 | Although const, this function sets matchedLength(), | 3659 | Although const, this function sets matchedLength(), |
3652 | capturedTexts() and pos(). | 3660 | capturedTexts() and pos(). |
3653 | 3661 | ||
3654 | \warning Searching backwards is much slower than searching | 3662 | \warning Searching backwards is much slower than searching |
3655 | forwards. | 3663 | forwards. |
3656 | 3664 | ||
3657 | \sa search() exactMatch() | 3665 | \sa search() exactMatch() |
3658 | */ | 3666 | */ |
@@ -3673,49 +3681,49 @@ int QRegExp::searchRev( const QString& str, int offset, | |||
3673 | } | 3681 | } |
3674 | 3682 | ||
3675 | while ( offset >= 0 ) { | 3683 | while ( offset >= 0 ) { |
3676 | priv->captured = eng->match( str, offset, priv->min, TRUE, | 3684 | priv->captured = eng->match( str, offset, priv->min, TRUE, |
3677 | caretIndex(offset, caretMode) ); | 3685 | caretIndex(offset, caretMode) ); |
3678 | if ( priv->captured[0] == offset ) | 3686 | if ( priv->captured[0] == offset ) |
3679 | return offset; | 3687 | return offset; |
3680 | offset--; | 3688 | offset--; |
3681 | } | 3689 | } |
3682 | return -1; | 3690 | return -1; |
3683 | } | 3691 | } |
3684 | 3692 | ||
3685 | /*! | 3693 | /*! |
3686 | Returns the length of the last matched string, or -1 if there was | 3694 | Returns the length of the last matched string, or -1 if there was |
3687 | no match. | 3695 | no match. |
3688 | 3696 | ||
3689 | \sa exactMatch() search() searchRev() | 3697 | \sa exactMatch() search() searchRev() |
3690 | */ | 3698 | */ |
3691 | int QRegExp::matchedLength() const | 3699 | int QRegExp::matchedLength() const |
3692 | { | 3700 | { |
3693 | return priv->captured[1]; | 3701 | return priv->captured[1]; |
3694 | } | 3702 | } |
3695 | 3703 | ||
3696 | #ifndef QT_NO_REGEXP_CAPTURE | 3704 | #ifndef QT_NO_REGEXP_CAPTURE |
3697 | /*! | 3705 | /*! |
3698 | Returns the number of captures contained in the regular expression. | 3706 | Returns the number of captures contained in the regular expression. |
3699 | */ | 3707 | */ |
3700 | int QRegExp::numCaptures() const | 3708 | int QRegExp::numCaptures() const |
3701 | { | 3709 | { |
3702 | return eng->numCaptures(); | 3710 | return eng->numCaptures(); |
3703 | } | 3711 | } |
3704 | 3712 | ||
3705 | 3713 | ||
3706 | 3714 | ||
3707 | /*! | 3715 | /*! |
3708 | Returns a list of the captured text strings. | 3716 | Returns a list of the captured text strings. |
3709 | 3717 | ||
3710 | The first string in the list is the entire matched string. Each | 3718 | The first string in the list is the entire matched string. Each |
3711 | subsequent list element contains a string that matched a | 3719 | subsequent list element contains a string that matched a |
3712 | (capturing) subexpression of the regexp. | 3720 | (capturing) subexpression of the regexp. |
3713 | 3721 | ||
3714 | For example: | 3722 | For example: |
3715 | \code | 3723 | \code |
3716 | QRegExp rx( "(\\d+)(\\s*)(cm|inch(es)?)" ); | 3724 | QRegExp rx( "(\\d+)(\\s*)(cm|inch(es)?)" ); |
3717 | int pos = rx.search( "Length: 36 inches" ); | 3725 | int pos = rx.search( "Length: 36 inches" ); |
3718 | QStringList list = rx.capturedTexts(); | 3726 | QStringList list = rx.capturedTexts(); |
3719 | // list is now ( "36 inches", "36", " ", "inches", "es" ) | 3727 | // list is now ( "36 inches", "36", " ", "inches", "es" ) |
3720 | \endcode | 3728 | \endcode |
3721 | 3729 | ||