summaryrefslogtreecommitdiff
path: root/qmake/tools/qregexp.cpp
Unidiff
Diffstat (limited to 'qmake/tools/qregexp.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--qmake/tools/qregexp.cpp70
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp
index 500efed..0c1f060 100644
--- a/qmake/tools/qregexp.cpp
+++ b/qmake/tools/qregexp.cpp
@@ -242,57 +242,57 @@
242 \i A character that follows a backslash matches the character 242 \i A character that follows a backslash matches the character
243 itself except where mentioned below. For example if you 243 itself except where mentioned below. For example if you
244 wished to match a literal caret at the beginning of a string 244 wished to match a literal caret at the beginning of a string
245 you would write <b>\^</b>. 245 you would write <b>\^</b>.
246 \row \i <b>\\a</b> 246 \row \i <b>\\a</b>
247 \i This matches the ASCII bell character (BEL, 0x07). 247 \i This matches the ASCII bell character (BEL, 0x07).
248 \row \i <b>\\f</b> 248 \row \i <b>\\f</b>
249 \i This matches the ASCII form feed character (FF, 0x0C). 249 \i This matches the ASCII form feed character (FF, 0x0C).
250 \row \i <b>\\n</b> 250 \row \i <b>\\n</b>
251 \i This matches the ASCII line feed character (LF, 0x0A, Unix newline). 251 \i This matches the ASCII line feed character (LF, 0x0A, Unix newline).
252 \row \i <b>\\r</b> 252 \row \i <b>\\r</b>
253 \i This matches the ASCII carriage return character (CR, 0x0D). 253 \i This matches the ASCII carriage return character (CR, 0x0D).
254 \row \i <b>\\t</b> 254 \row \i <b>\\t</b>
255 \i This matches the ASCII horizontal tab character (HT, 0x09). 255 \i This matches the ASCII horizontal tab character (HT, 0x09).
256 \row \i <b>\\v</b> 256 \row \i <b>\\v</b>
257 \i This matches the ASCII vertical tab character (VT, 0x0B). 257 \i This matches the ASCII vertical tab character (VT, 0x0B).
258 \row \i <b>\\xhhhh</b> 258 \row \i <b>\\xhhhh</b>
259 \i This matches the Unicode character corresponding to the 259 \i This matches the Unicode character corresponding to the
260 hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo 260 hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo
261 (i.e., \zero ooo) matches the ASCII/Latin-1 character 261 (i.e., \zero ooo) matches the ASCII/Latin-1 character
262 corresponding to the octal number ooo (between 0 and 0377). 262 corresponding to the octal number ooo (between 0 and 0377).
263 \row \i <b>. (dot)</b> 263 \row \i <b>. (dot)</b>
264 \i This matches any character (including newline). 264 \i This matches any character (including newline).
265 \row \i <b>\\d</b> 265 \row \i <b>\\d</b>
266 \i This matches a digit (see QChar::isDigit()). 266 \i This matches a digit (QChar::isDigit()).
267 \row \i <b>\\D</b> 267 \row \i <b>\\D</b>
268 \i This matches a non-digit. 268 \i This matches a non-digit.
269 \row \i <b>\\s</b> 269 \row \i <b>\\s</b>
270 \i This matches a whitespace (see QChar::isSpace()). 270 \i This matches a whitespace (QChar::isSpace()).
271 \row \i <b>\\S</b> 271 \row \i <b>\\S</b>
272 \i This matches a non-whitespace. 272 \i This matches a non-whitespace.
273 \row \i <b>\\w</b> 273 \row \i <b>\\w</b>
274 \i This matches a word character (see QChar::isLetterOrNumber()). 274 \i This matches a word character (QChar::isLetterOrNumber() or '_').
275 \row \i <b>\\W</b> 275 \row \i <b>\\W</b>
276 \i This matches a non-word character. 276 \i This matches a non-word character.
277 \row \i <b>\\n</b> 277 \row \i <b>\\n</b>
278 \i The n-th \link #capturing-text backreference \endlink, 278 \i The n-th \link #capturing-text backreference \endlink,
279 e.g. \1, \2, etc. 279 e.g. \1, \2, etc.
280 \endtable 280 \endtable
281 281
282 \e {Note that the C++ compiler transforms backslashes in strings 282 \e {Note that the C++ compiler transforms backslashes in strings
283 so to include a <b>\\</b> in a regexp you will need to enter it 283 so to include a <b>\\</b> in a regexp you will need to enter it
284 twice, i.e. <b>\\\\</b>.} 284 twice, i.e. <b>\\\\</b>.}
285 285
286 \target sets-of-characters 286 \target sets-of-characters
287 \section1 Sets of Characters 287 \section1 Sets of Characters
288 288
289 Square brackets are used to match any character in the set of 289 Square brackets are used to match any character in the set of
290 characters contained within the square brackets. All the character 290 characters contained within the square brackets. All the character
291 set abbreviations described above can be used within square 291 set abbreviations described above can be used within square
292 brackets. Apart from the character set abbreviations and the 292 brackets. Apart from the character set abbreviations and the
293 following two exceptions no characters have special meanings in 293 following two exceptions no characters have special meanings in
294 square brackets. 294 square brackets.
295 295
296 \table 296 \table
297 \row \i <b>^</b> 297 \row \i <b>^</b>
298 \i The caret negates the character set if it occurs as the 298 \i The caret negates the character set if it occurs as the
@@ -526,49 +526,56 @@
526 The equivalent of Perl's \c{/i} option is 526 The equivalent of Perl's \c{/i} option is
527 setCaseSensitive(FALSE). 527 setCaseSensitive(FALSE).
528 528
529 Perl's \c{/g} option can be emulated using a \link 529 Perl's \c{/g} option can be emulated using a \link
530 #cap_in_a_loop loop \endlink. 530 #cap_in_a_loop loop \endlink.
531 531
532 In QRegExp <b>.</b> matches any character, therefore all QRegExp 532 In QRegExp <b>.</b> matches any character, therefore all QRegExp
533 regexps have the equivalent of Perl's \c{/s} option. QRegExp 533 regexps have the equivalent of Perl's \c{/s} option. QRegExp
534 does not have an equivalent to Perl's \c{/m} option, but this 534 does not have an equivalent to Perl's \c{/m} option, but this
535 can be emulated in various ways for example by splitting the input 535 can be emulated in various ways for example by splitting the input
536 into lines or by looping with a regexp that searches for newlines. 536 into lines or by looping with a regexp that searches for newlines.
537 537
538 Because QRegExp is string oriented there are no \A, \Z or \z 538 Because QRegExp is string oriented there are no \A, \Z or \z
539 assertions. The \G assertion is not supported but can be emulated 539 assertions. The \G assertion is not supported but can be emulated
540 in a loop. 540 in a loop.
541 541
542 Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp 542 Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
543 equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, 543 equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
544 ... correspond to cap(1) or capturedTexts()[1], cap(2) or 544 ... correspond to cap(1) or capturedTexts()[1], cap(2) or
545 capturedTexts()[2], etc. 545 capturedTexts()[2], etc.
546 546
547 To substitute a pattern use QString::replace(). 547 To substitute a pattern use QString::replace().
548 548
549 Perl's extended \c{/x} syntax is not supported, nor are 549 Perl's extended \c{/x} syntax is not supported, nor are
550 regexp comments (?#comment) or directives, e.g. (?i). 550 directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
551 the other hand, C++'s rules for literal strings can be used to
552 achieve the same:
553 \code
554 QRegExp mark( "\\b" // word boundary
555 "[Mm]ark" // the word we want to match
556 );
557 \endcode
551 558
552 Both zero-width positive and zero-width negative lookahead 559 Both zero-width positive and zero-width negative lookahead
553 assertions (?=pattern) and (?!pattern) are supported with the same 560 assertions (?=pattern) and (?!pattern) are supported with the same
554 syntax as Perl. Perl's lookbehind assertions, "independent" 561 syntax as Perl. Perl's lookbehind assertions, "independent"
555 subexpressions and conditional expressions are not supported. 562 subexpressions and conditional expressions are not supported.
556 563
557 Non-capturing parentheses are also supported, with the same 564 Non-capturing parentheses are also supported, with the same
558 (?:pattern) syntax. 565 (?:pattern) syntax.
559 566
560 See QStringList::split() and QStringList::join() for equivalents 567 See QStringList::split() and QStringList::join() for equivalents
561 to Perl's split and join functions. 568 to Perl's split and join functions.
562 569
563 Note: because C++ transforms \\'s they must be written \e twice in 570 Note: because C++ transforms \\'s they must be written \e twice in
564 code, e.g. <b>\\b</b> must be written <b>\\\\b</b>. 571 code, e.g. <b>\\b</b> must be written <b>\\\\b</b>.
565 572
566 \target code-examples 573 \target code-examples
567 \section1 Code Examples 574 \section1 Code Examples
568 575
569 \code 576 \code
570 QRegExp rx( "^\\d\\d?$" ); // match integers 0 to 99 577 QRegExp rx( "^\\d\\d?$" ); // match integers 0 to 99
571 rx.search( "123" ); // returns -1 (no match) 578 rx.search( "123" ); // returns -1 (no match)
572 rx.search( "-6" ); // returns -1 (no match) 579 rx.search( "-6" ); // returns -1 (no match)
573 rx.search( "6" ); // returns 0 (matched as position 0) 580 rx.search( "6" ); // returns 0 (matched as position 0)
574 \endcode 581 \endcode
@@ -656,86 +663,91 @@
656 if ( rx.search( str ) != -1 ) { 663 if ( rx.search( str ) != -1 ) {
657 company = rx.cap( 1 ); 664 company = rx.cap( 1 );
658 web = rx.cap( 2 ); 665 web = rx.cap( 2 );
659 country = rx.cap( 3 ); 666 country = rx.cap( 3 );
660 } 667 }
661 \endcode 668 \endcode
662 669
663 In this example our input lines have the format company name, web 670 In this example our input lines have the format company name, web
664 address and country. Unfortunately the regexp is rather long and 671 address and country. Unfortunately the regexp is rather long and
665 not very versatile -- the code will break if we add any more 672 not very versatile -- the code will break if we add any more
666 fields. A simpler and better solution is to look for the 673 fields. A simpler and better solution is to look for the
667 separator, '\t' in this case, and take the surrounding text. The 674 separator, '\t' in this case, and take the surrounding text. The
668 QStringList split() function can take a separator string or regexp 675 QStringList split() function can take a separator string or regexp
669 as an argument and split a string accordingly. 676 as an argument and split a string accordingly.
670 677
671 \code 678 \code
672 QStringList field = QStringList::split( "\t", str ); 679 QStringList field = QStringList::split( "\t", str );
673 \endcode 680 \endcode
674 681
675 Here field[0] is the company, field[1] the web address and so on. 682 Here field[0] is the company, field[1] the web address and so on.
676 683
677 To imitate the matching of a shell we can use wildcard mode. 684 To imitate the matching of a shell we can use wildcard mode.
678 685
679 \code 686 \code
680 QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything 687 QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
681 rx.setWildcard( TRUE ); // now it's a valid wildcard regexp 688 rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
682 rx.search( "index.html" ); // returns 0 (matched at position 0) 689 rx.exactMatch( "index.html" ); // returns TRUE
683 rx.search( "default.htm" ); // returns -1 (no match) 690 rx.exactMatch( "default.htm" ); // returns FALSE
684 rx.search( "readme.txt" ); // returns -1 (no match) 691 rx.exactMatch( "readme.txt" ); // returns FALSE
685 \endcode 692 \endcode
686 693
687 Wildcard matching can be convenient because of its simplicity, but 694 Wildcard matching can be convenient because of its simplicity, but
688 any wildcard regexp can be defined using full regexps, e.g. 695 any wildcard regexp can be defined using full regexps, e.g.
689 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c 696 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c
690 .htm files with a wildcard unless we use <b>*.htm*</b> which will 697 .htm files with a wildcard unless we use <b>*.htm*</b> which will
691 also match 'test.html.bak'. A full regexp gives us the precision 698 also match 'test.html.bak'. A full regexp gives us the precision
692 we need, <b>.*\\.html?$</b>. 699 we need, <b>.*\\.html?$</b>.
693 700
694 QRegExp can match case insensitively using setCaseSensitive(), and 701 QRegExp can match case insensitively using setCaseSensitive(), and
695 can use non-greedy matching, see setMinimal(). By default QRegExp 702 can use non-greedy matching, see setMinimal(). By default QRegExp
696 uses full regexps but this can be changed with setWildcard(). 703 uses full regexps but this can be changed with setWildcard().
697 Searching can be forward with search() or backward with 704 Searching can be forward with search() or backward with
698 searchRev(). Captured text can be accessed using capturedTexts() 705 searchRev(). Captured text can be accessed using capturedTexts()
699 which returns a string list of all captured strings, or using 706 which returns a string list of all captured strings, or using
700 cap() which returns the captured string for the given index. The 707 cap() which returns the captured string for the given index. The
701 pos() function takes a match index and returns the position in the 708 pos() function takes a match index and returns the position in the
702 string where the match was made (or -1 if there was no match). 709 string where the match was made (or -1 if there was no match).
703 710
704 \sa QRegExpValidator QString QStringList 711 \sa QRegExpValidator QString QStringList
705 712
706 \target member-function-documentation 713 \target member-function-documentation
707*/ 714*/
708 715
709const int NumBadChars = 64; 716const int NumBadChars = 64;
710#define BadChar( ch ) ( (ch).unicode() % NumBadChars ) 717#define BadChar( ch ) ( (ch).unicode() % NumBadChars )
711 718
712const int NoOccurrence = INT_MAX; 719const int NoOccurrence = INT_MAX;
713const int EmptyCapture = INT_MAX; 720const int EmptyCapture = INT_MAX;
714const int InftyLen = INT_MAX; 721const int InftyLen = INT_MAX;
715const int InftyRep = 1025; 722const int InftyRep = 1025;
716const int EOS = -1; 723const int EOS = -1;
717 724
725static bool isWord( QChar ch )
726{
727 return ch.isLetterOrNumber() || ch == QChar( '_' );
728}
729
718/* 730/*
719 Merges two QMemArrays of ints and puts the result into the first one. 731 Merges two QMemArrays of ints and puts the result into the first one.
720*/ 732*/
721static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b ) 733static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
722{ 734{
723 int asize = a->size(); 735 int asize = a->size();
724 int bsize = b.size(); 736 int bsize = b.size();
725 if ( asize == 0 ) { 737 if ( asize == 0 ) {
726 *a = b.copy(); 738 *a = b.copy();
727#ifndef QT_NO_REGEXP_OPTIM 739#ifndef QT_NO_REGEXP_OPTIM
728 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) { 740 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
729 a->resize( asize + 1 ); 741 a->resize( asize + 1 );
730 (*a)[asize] = b[0]; 742 (*a)[asize] = b[0];
731#endif 743#endif
732 } else if ( bsize >= 1 ) { 744 } else if ( bsize >= 1 ) {
733 int csize = asize + bsize; 745 int csize = asize + bsize;
734 QMemArray<int> c( csize ); 746 QMemArray<int> c( csize );
735 int i = 0, j = 0, k = 0; 747 int i = 0, j = 0, k = 0;
736 while ( i < asize ) { 748 while ( i < asize ) {
737 if ( j < bsize ) { 749 if ( j < bsize ) {
738 if ( (*a)[i] == b[j] ) { 750 if ( (*a)[i] == b[j] ) {
739 i++; 751 i++;
740 csize--; 752 csize--;
741 } else if ( (*a)[i] < b[j] ) { 753 } else if ( (*a)[i] < b[j] ) {
@@ -1659,51 +1671,51 @@ bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
1659bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) 1671bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
1660{ 1672{
1661 int j; 1673 int j;
1662 1674
1663#ifndef QT_NO_REGEXP_ANCHOR_ALT 1675#ifndef QT_NO_REGEXP_ANCHOR_ALT
1664 if ( (a & Anchor_Alternation) != 0 ) { 1676 if ( (a & Anchor_Alternation) != 0 ) {
1665 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) || 1677 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
1666 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin ); 1678 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
1667 } 1679 }
1668#endif 1680#endif
1669 1681
1670 if ( (a & Anchor_Caret) != 0 ) { 1682 if ( (a & Anchor_Caret) != 0 ) {
1671 if ( mmPos + i != mmCaretPos ) 1683 if ( mmPos + i != mmCaretPos )
1672 return FALSE; 1684 return FALSE;
1673 } 1685 }
1674 if ( (a & Anchor_Dollar) != 0 ) { 1686 if ( (a & Anchor_Dollar) != 0 ) {
1675 if ( mmPos + i != mmLen ) 1687 if ( mmPos + i != mmLen )
1676 return FALSE; 1688 return FALSE;
1677 } 1689 }
1678#ifndef QT_NO_REGEXP_ESCAPE 1690#ifndef QT_NO_REGEXP_ESCAPE
1679 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { 1691 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
1680 bool before = FALSE; 1692 bool before = FALSE;
1681 bool after = FALSE; 1693 bool after = FALSE;
1682 if ( mmPos + i != 0 ) 1694 if ( mmPos + i != 0 )
1683 before = mmIn[mmPos + i - 1].isLetterOrNumber(); 1695 before = isWord( mmIn[mmPos + i - 1] );
1684 if ( mmPos + i != mmLen ) 1696 if ( mmPos + i != mmLen )
1685 after = mmIn[mmPos + i].isLetterOrNumber(); 1697 after = isWord( mmIn[mmPos + i] );
1686 if ( (a & Anchor_Word) != 0 && (before == after) ) 1698 if ( (a & Anchor_Word) != 0 && (before == after) )
1687 return FALSE; 1699 return FALSE;
1688 if ( (a & Anchor_NonWord) != 0 && (before != after) ) 1700 if ( (a & Anchor_NonWord) != 0 && (before != after) )
1689 return FALSE; 1701 return FALSE;
1690 } 1702 }
1691#endif 1703#endif
1692#ifndef QT_NO_REGEXP_LOOKAHEAD 1704#ifndef QT_NO_REGEXP_LOOKAHEAD
1693 bool catchx = TRUE; 1705 bool catchx = TRUE;
1694 1706
1695 if ( (a & Anchor_LookaheadMask) != 0 ) { 1707 if ( (a & Anchor_LookaheadMask) != 0 ) {
1696 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i, 1708 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
1697 mmLen - mmPos - i ); 1709 mmLen - mmPos - i );
1698 for ( j = 0; j < (int) ahead.size(); j++ ) { 1710 for ( j = 0; j < (int) ahead.size(); j++ ) {
1699 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) { 1711 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
1700 catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE, 1712 catchx = ahead[j]->eng->match( cstr.string(), 0, TRUE, TRUE,
1701 mmCaretPos - mmPos - i )[0] == 0; 1713 mmCaretPos - mmPos - i )[0] == 0;
1702 if ( catchx == ahead[j]->neg ) 1714 if ( catchx == ahead[j]->neg )
1703 return FALSE; 1715 return FALSE;
1704 } 1716 }
1705 } 1717 }
1706 } 1718 }
1707#endif 1719#endif
1708#ifndef QT_NO_REGEXP_CAPTURE 1720#ifndef QT_NO_REGEXP_CAPTURE
1709#ifndef QT_NO_REGEXP_BACKREF 1721#ifndef QT_NO_REGEXP_BACKREF
@@ -2611,68 +2623,76 @@ int QRegExpEngine::getEscape()
2611 yyCh = getChar(); 2623 yyCh = getChar();
2612 } 2624 }
2613 if ( (val & ~0377) != 0 ) 2625 if ( (val & ~0377) != 0 )
2614 error( RXERR_OCTAL ); 2626 error( RXERR_OCTAL );
2615 return Tok_Char | val; 2627 return Tok_Char | val;
2616#endif 2628#endif
2617#ifndef QT_NO_REGEXP_ESCAPE 2629#ifndef QT_NO_REGEXP_ESCAPE
2618 case 'B': 2630 case 'B':
2619 return Tok_NonWord; 2631 return Tok_NonWord;
2620#endif 2632#endif
2621#ifndef QT_NO_REGEXP_CCLASS 2633#ifndef QT_NO_REGEXP_CCLASS
2622 case 'D': 2634 case 'D':
2623 // see QChar::isDigit() 2635 // see QChar::isDigit()
2624 yyCharClass->addCategories( 0x7fffffef ); 2636 yyCharClass->addCategories( 0x7fffffef );
2625 return Tok_CharClass; 2637 return Tok_CharClass;
2626 case 'S': 2638 case 'S':
2627 // see QChar::isSpace() 2639 // see QChar::isSpace()
2628 yyCharClass->addCategories( 0x7ffff87f ); 2640 yyCharClass->addCategories( 0x7ffff87f );
2629 yyCharClass->addRange( 0x0000, 0x0008 ); 2641 yyCharClass->addRange( 0x0000, 0x0008 );
2630 yyCharClass->addRange( 0x000e, 0x001f ); 2642 yyCharClass->addRange( 0x000e, 0x001f );
2631 yyCharClass->addRange( 0x007f, 0x009f ); 2643 yyCharClass->addRange( 0x007f, 0x009f );
2632 return Tok_CharClass; 2644 return Tok_CharClass;
2633 case 'W': 2645 case 'W':
2634 // see QChar::isLetterOrNumber() 2646 // see QChar::isLetterOrNumber()
2635 yyCharClass->addCategories( 0x7ff07f8f ); 2647 yyCharClass->addCategories( 0x7fe07f8f );
2648 yyCharClass->addRange( 0x203f, 0x2040 );
2649 yyCharClass->addSingleton( 0x2040 );
2650 yyCharClass->addSingleton( 0x30fb );
2651 yyCharClass->addRange( 0xfe33, 0xfe34 );
2652 yyCharClass->addRange( 0xfe4d, 0xfe4f );
2653 yyCharClass->addSingleton( 0xff3f );
2654 yyCharClass->addSingleton( 0xff65 );
2636 return Tok_CharClass; 2655 return Tok_CharClass;
2637#endif 2656#endif
2638#ifndef QT_NO_REGEXP_ESCAPE 2657#ifndef QT_NO_REGEXP_ESCAPE
2639 case 'b': 2658 case 'b':
2640 return Tok_Word; 2659 return Tok_Word;
2641#endif 2660#endif
2642#ifndef QT_NO_REGEXP_CCLASS 2661#ifndef QT_NO_REGEXP_CCLASS
2643 case 'd': 2662 case 'd':
2644 // see QChar::isDigit() 2663 // see QChar::isDigit()
2645 yyCharClass->addCategories( 0x00000010 ); 2664 yyCharClass->addCategories( 0x00000010 );
2646 return Tok_CharClass; 2665 return Tok_CharClass;
2647 case 's': 2666 case 's':
2648 // see QChar::isSpace() 2667 // see QChar::isSpace()
2649 yyCharClass->addCategories( 0x00000380 ); 2668 yyCharClass->addCategories( 0x00000380 );
2650 yyCharClass->addRange( 0x0009, 0x000d ); 2669 yyCharClass->addRange( 0x0009, 0x000d );
2651 return Tok_CharClass; 2670 return Tok_CharClass;
2652 case 'w': 2671 case 'w':
2653 // see QChar::isLetterOrNumber() 2672 // see QChar::isLetterOrNumber()
2654 yyCharClass->addCategories( 0x000f8070 ); 2673 yyCharClass->addCategories( 0x000f8070 );
2674 yyCharClass->addSingleton( 0x005f ); // '_'
2655 return Tok_CharClass; 2675 return Tok_CharClass;
2656#endif 2676#endif
2657#ifndef QT_NO_REGEXP_ESCAPE 2677#ifndef QT_NO_REGEXP_ESCAPE
2658 case 'x': 2678 case 'x':
2659 val = 0; 2679 val = 0;
2660 for ( i = 0; i < 4; i++ ) { 2680 for ( i = 0; i < 4; i++ ) {
2661 low = QChar( yyCh ).lower(); 2681 low = QChar( yyCh ).lower();
2662 if ( low >= '0' && low <= '9' ) 2682 if ( low >= '0' && low <= '9' )
2663 val = ( val << 4 ) | ( low - '0' ); 2683 val = ( val << 4 ) | ( low - '0' );
2664 else if ( low >= 'a' && low <= 'f' ) 2684 else if ( low >= 'a' && low <= 'f' )
2665 val = ( val << 4 ) | ( low - 'a' + 10 ); 2685 val = ( val << 4 ) | ( low - 'a' + 10 );
2666 else 2686 else
2667 break; 2687 break;
2668 yyCh = getChar(); 2688 yyCh = getChar();
2669 } 2689 }
2670 return Tok_Char | val; 2690 return Tok_Char | val;
2671#endif 2691#endif
2672 default: 2692 default:
2673 if ( prevCh >= '1' && prevCh <= '9' ) { 2693 if ( prevCh >= '1' && prevCh <= '9' ) {
2674#ifndef QT_NO_REGEXP_BACKREF 2694#ifndef QT_NO_REGEXP_BACKREF
2675 val = prevCh - '0'; 2695 val = prevCh - '0';
2676 while ( yyCh >= '0' && yyCh <= '9' ) { 2696 while ( yyCh >= '0' && yyCh <= '9' ) {
2677 val = ( val *= 10 ) | ( yyCh - '0' ); 2697 val = ( val *= 10 ) | ( yyCh - '0' );
2678 yyCh = getChar(); 2698 yyCh = getChar();
@@ -3162,69 +3182,71 @@ struct QRegExpPrivate
3162 QString rxpattern; // regular-expression pattern 3182 QString rxpattern; // regular-expression pattern
3163#ifndef QT_NO_REGEXP_WILDCARD 3183#ifndef QT_NO_REGEXP_WILDCARD
3164 bool wc; // wildcard mode? 3184 bool wc; // wildcard mode?
3165#endif 3185#endif
3166 bool min; // minimal matching? (instead of maximal) 3186 bool min; // minimal matching? (instead of maximal)
3167#ifndef QT_NO_REGEXP_CAPTURE 3187#ifndef QT_NO_REGEXP_CAPTURE
3168 QString t; // last string passed to QRegExp::search() or searchRev() 3188 QString t; // last string passed to QRegExp::search() or searchRev()
3169 QStringList capturedCache; // what QRegExp::capturedTexts() returned last 3189 QStringList capturedCache; // what QRegExp::capturedTexts() returned last
3170#endif 3190#endif
3171 QMemArray<int> captured; // what QRegExpEngine::search() returned last 3191 QMemArray<int> captured; // what QRegExpEngine::search() returned last
3172 3192
3173 QRegExpPrivate() { captured.fill( -1, 2 ); } 3193 QRegExpPrivate() { captured.fill( -1, 2 ); }
3174}; 3194};
3175 3195
3176#ifndef QT_NO_REGEXP_OPTIM 3196#ifndef QT_NO_REGEXP_OPTIM
3177static QCache<QRegExpEngine> *engineCache = 0; 3197static QCache<QRegExpEngine> *engineCache = 0;
3178static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache; 3198static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
3179#endif 3199#endif
3180 3200
3181static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) 3201static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
3182{ 3202{
3183#ifndef QT_NO_REGEXP_OPTIM 3203#ifndef QT_NO_REGEXP_OPTIM
3184 if ( engineCache != 0 ) { 3204 if ( engineCache != 0 ) {
3185#ifdef QT_THREAD_SUPPORT 3205#ifdef QT_THREAD_SUPPORT
3186 QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); 3206 QMutexLocker locker( qt_global_mutexpool ?
3207 qt_global_mutexpool->get( &engineCache ) : 0 );
3187#endif 3208#endif
3188 QRegExpEngine *eng = engineCache->take( pattern ); 3209 QRegExpEngine *eng = engineCache->take( pattern );
3189 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { 3210 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
3190 delete eng; 3211 delete eng;
3191 } else { 3212 } else {
3192 eng->ref(); 3213 eng->ref();
3193 return eng; 3214 return eng;
3194 } 3215 }
3195 } 3216 }
3196#endif 3217#endif
3197 return new QRegExpEngine( pattern, caseSensitive ); 3218 return new QRegExpEngine( pattern, caseSensitive );
3198} 3219}
3199 3220
3200static void derefEngine( QRegExpEngine *eng, const QString& pattern ) 3221static void derefEngine( QRegExpEngine *eng, const QString& pattern )
3201{ 3222{
3202 if ( eng != 0 && eng->deref() ) {
3203#ifndef QT_NO_REGEXP_OPTIM
3204#ifdef QT_THREAD_SUPPORT 3223#ifdef QT_THREAD_SUPPORT
3205 QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) ); 3224 QMutexLocker locker( qt_global_mutexpool ?
3225 qt_global_mutexpool->get( &engineCache ) : 0 );
3206#endif 3226#endif
3227 if ( eng != 0 && eng->deref() ) {
3228#ifndef QT_NO_REGEXP_OPTIM
3207 if ( engineCache == 0 ) { 3229 if ( engineCache == 0 ) {
3208 engineCache = new QCache<QRegExpEngine>; 3230 engineCache = new QCache<QRegExpEngine>;
3209 engineCache->setAutoDelete( TRUE ); 3231 engineCache->setAutoDelete( TRUE );
3210 cleanup_cache.set( &engineCache ); 3232 cleanup_cache.set( &engineCache );
3211 } 3233 }
3212 if ( !pattern.isNull() && 3234 if ( !pattern.isNull() &&
3213 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) 3235 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
3214 return; 3236 return;
3215#else 3237#else
3216 Q_UNUSED( pattern ); 3238 Q_UNUSED( pattern );
3217#endif 3239#endif
3218 delete eng; 3240 delete eng;
3219 } 3241 }
3220} 3242}
3221 3243
3222/*! 3244/*!
3223 \enum QRegExp::CaretMode 3245 \enum QRegExp::CaretMode
3224 3246
3225 The CaretMode enum defines the different meanings of the caret 3247 The CaretMode enum defines the different meanings of the caret
3226 (<b>^</b>) in a regular expression. The possible values are: 3248 (<b>^</b>) in a regular expression. The possible values are:
3227 3249
3228 \value CaretAtZero 3250 \value CaretAtZero
3229 The caret corresponds to index 0 in the searched string. 3251 The caret corresponds to index 0 in the searched string.
3230 3252
@@ -3544,55 +3566,48 @@ bool QRegExp::exactMatch( const QString& str ) const
3544 Attempts to match in \a str, starting from position \a index. 3566 Attempts to match in \a str, starting from position \a index.
3545 Returns the position of the match, or -1 if there was no match. 3567 Returns the position of the match, or -1 if there was no match.
3546 3568
3547 The length of the match is stored in \a *len, unless \a len is a 3569 The length of the match is stored in \a *len, unless \a len is a
3548 null pointer. 3570 null pointer.
3549 3571
3550 If \a indexIsStart is TRUE (the default), the position \a index in 3572 If \a indexIsStart is TRUE (the default), the position \a index in
3551 the string will match the start of string anchor, <b>^</b>, in the 3573 the string will match the start of string anchor, <b>^</b>, in the
3552 regexp, if present. Otherwise, position 0 in \a str will match. 3574 regexp, if present. Otherwise, position 0 in \a str will match.
3553 3575
3554 Use search() and matchedLength() instead of this function. 3576 Use search() and matchedLength() instead of this function.
3555 3577
3556 \sa QString::mid() QConstString 3578 \sa QString::mid() QConstString
3557*/ 3579*/
3558int QRegExp::match( const QString& str, int index, int *len, 3580int QRegExp::match( const QString& str, int index, int *len,
3559 bool indexIsStart ) const 3581 bool indexIsStart ) const
3560{ 3582{
3561 int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero ); 3583 int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
3562 if ( len != 0 ) 3584 if ( len != 0 )
3563 *len = matchedLength(); 3585 *len = matchedLength();
3564 return pos; 3586 return pos;
3565} 3587}
3566#endif // QT_NO_COMPAT 3588#endif // QT_NO_COMPAT
3567 3589
3568/*!
3569 \overload
3570
3571 This convenience function searches with a \c CaretMode of \c
3572 CaretAtZero which is the most common usage.
3573*/
3574
3575int QRegExp::search( const QString& str, int offset ) const 3590int QRegExp::search( const QString& str, int offset ) const
3576{ 3591{
3577 return search( str, offset, CaretAtZero ); 3592 return search( str, offset, CaretAtZero );
3578} 3593}
3579 3594
3580/*! 3595/*!
3581 Attempts to find a match in \a str from position \a offset (0 by 3596 Attempts to find a match in \a str from position \a offset (0 by
3582 default). If \a offset is -1, the search starts at the last 3597 default). If \a offset is -1, the search starts at the last
3583 character; if -2, at the next to last character; etc. 3598 character; if -2, at the next to last character; etc.
3584 3599
3585 Returns the position of the first match, or -1 if there was no 3600 Returns the position of the first match, or -1 if there was no
3586 match. 3601 match.
3587 3602
3588 The \a caretMode parameter can be used to instruct whether <b>^</b> 3603 The \a caretMode parameter can be used to instruct whether <b>^</b>
3589 should match at index 0 or at \a offset. 3604 should match at index 0 or at \a offset.
3590 3605
3591 You might prefer to use QString::find(), QString::contains() or 3606 You might prefer to use QString::find(), QString::contains() or
3592 even QStringList::grep(). To replace matches use 3607 even QStringList::grep(). To replace matches use
3593 QString::replace(). 3608 QString::replace().
3594 3609
3595 Example: 3610 Example:
3596 \code 3611 \code
3597 QString str = "offsets: 1.23 .50 71.00 6.00"; 3612 QString str = "offsets: 1.23 .50 71.00 6.00";
3598 QRegExp rx( "\\d*\\.\\d+" ); // primitive floating point matching 3613 QRegExp rx( "\\d*\\.\\d+" ); // primitive floating point matching
@@ -3604,55 +3619,48 @@ int QRegExp::search( const QString& str, int offset ) const
3604 } 3619 }
3605 // pos will be 9, 14, 18 and finally 24; count will end up as 4 3620 // pos will be 9, 14, 18 and finally 24; count will end up as 4
3606 \endcode 3621 \endcode
3607 3622
3608 Although const, this function sets matchedLength(), 3623 Although const, this function sets matchedLength(),
3609 capturedTexts() and pos(). 3624 capturedTexts() and pos().
3610 3625
3611 \sa searchRev() exactMatch() 3626 \sa searchRev() exactMatch()
3612*/ 3627*/
3613 3628
3614int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const 3629int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
3615{ 3630{
3616 if ( offset < 0 ) 3631 if ( offset < 0 )
3617 offset += str.length(); 3632 offset += str.length();
3618#ifndef QT_NO_REGEXP_CAPTURE 3633#ifndef QT_NO_REGEXP_CAPTURE
3619 priv->t = str; 3634 priv->t = str;
3620 priv->capturedCache.clear(); 3635 priv->capturedCache.clear();
3621#endif 3636#endif
3622 priv->captured = eng->match( str, offset, priv->min, FALSE, 3637 priv->captured = eng->match( str, offset, priv->min, FALSE,
3623 caretIndex(offset, caretMode) ); 3638 caretIndex(offset, caretMode) );
3624 return priv->captured[0]; 3639 return priv->captured[0];
3625} 3640}
3626 3641
3627 3642
3628/*!
3629 \overload
3630
3631 This convenience function searches with a \c CaretMode of \c
3632 CaretAtZero which is the most common usage.
3633*/
3634
3635int QRegExp::searchRev( const QString& str, int offset ) const 3643int QRegExp::searchRev( const QString& str, int offset ) const
3636{ 3644{
3637 return searchRev( str, offset, CaretAtZero ); 3645 return searchRev( str, offset, CaretAtZero );
3638} 3646}
3639 3647
3640/*! 3648/*!
3641 Attempts to find a match backwards in \a str from position \a 3649 Attempts to find a match backwards in \a str from position \a
3642 offset. If \a offset is -1 (the default), the search starts at the 3650 offset. If \a offset is -1 (the default), the search starts at the
3643 last character; if -2, at the next to last character; etc. 3651 last character; if -2, at the next to last character; etc.
3644 3652
3645 Returns the position of the first match, or -1 if there was no 3653 Returns the position of the first match, or -1 if there was no
3646 match. 3654 match.
3647 3655
3648 The \a caretMode parameter can be used to instruct whether <b>^</b> 3656 The \a caretMode parameter can be used to instruct whether <b>^</b>
3649 should match at index 0 or at \a offset. 3657 should match at index 0 or at \a offset.
3650 3658
3651 Although const, this function sets matchedLength(), 3659 Although const, this function sets matchedLength(),
3652 capturedTexts() and pos(). 3660 capturedTexts() and pos().
3653 3661
3654 \warning Searching backwards is much slower than searching 3662 \warning Searching backwards is much slower than searching
3655 forwards. 3663 forwards.
3656 3664
3657 \sa search() exactMatch() 3665 \sa search() exactMatch()
3658*/ 3666*/
@@ -3673,49 +3681,49 @@ int QRegExp::searchRev( const QString& str, int offset,
3673 } 3681 }
3674 3682
3675 while ( offset >= 0 ) { 3683 while ( offset >= 0 ) {
3676 priv->captured = eng->match( str, offset, priv->min, TRUE, 3684 priv->captured = eng->match( str, offset, priv->min, TRUE,
3677 caretIndex(offset, caretMode) ); 3685 caretIndex(offset, caretMode) );
3678 if ( priv->captured[0] == offset ) 3686 if ( priv->captured[0] == offset )
3679 return offset; 3687 return offset;
3680 offset--; 3688 offset--;
3681 } 3689 }
3682 return -1; 3690 return -1;
3683} 3691}
3684 3692
3685/*! 3693/*!
3686 Returns the length of the last matched string, or -1 if there was 3694 Returns the length of the last matched string, or -1 if there was
3687 no match. 3695 no match.
3688 3696
3689 \sa exactMatch() search() searchRev() 3697 \sa exactMatch() search() searchRev()
3690*/ 3698*/
3691int QRegExp::matchedLength() const 3699int QRegExp::matchedLength() const
3692{ 3700{
3693 return priv->captured[1]; 3701 return priv->captured[1];
3694} 3702}
3695 3703
3696#ifndef QT_NO_REGEXP_CAPTURE 3704#ifndef QT_NO_REGEXP_CAPTURE
3697/*! 3705/*!
3698 Returns the number of captures contained in the regular expression. 3706 Returns the number of captures contained in the regular expression.
3699 */ 3707 */
3700int QRegExp::numCaptures() const 3708int QRegExp::numCaptures() const
3701{ 3709{
3702 return eng->numCaptures(); 3710 return eng->numCaptures();
3703} 3711}
3704 3712
3705 3713
3706 3714
3707/*! 3715/*!
3708 Returns a list of the captured text strings. 3716 Returns a list of the captured text strings.
3709 3717
3710 The first string in the list is the entire matched string. Each 3718 The first string in the list is the entire matched string. Each
3711 subsequent list element contains a string that matched a 3719 subsequent list element contains a string that matched a
3712 (capturing) subexpression of the regexp. 3720 (capturing) subexpression of the regexp.
3713 3721
3714 For example: 3722 For example:
3715 \code 3723 \code
3716 QRegExp rx( "(\\d+)(\\s*)(cm|inch(es)?)" ); 3724 QRegExp rx( "(\\d+)(\\s*)(cm|inch(es)?)" );
3717 int pos = rx.search( "Length: 36 inches" ); 3725 int pos = rx.search( "Length: 36 inches" );
3718 QStringList list = rx.capturedTexts(); 3726 QStringList list = rx.capturedTexts();
3719 // list is now ( "36 inches", "36", " ", "inches", "es" ) 3727 // list is now ( "36 inches", "36", " ", "inches", "es" )
3720 \endcode 3728 \endcode
3721 3729