summaryrefslogtreecommitdiff
path: root/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
Unidiff
Diffstat (limited to 'noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp2282
1 files changed, 1144 insertions, 1138 deletions
diff --git a/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp b/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
index a2c680f..78635b2 100644
--- a/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
+++ b/noncore/apps/tinykate/libkate/qt3back/qregexp3.cpp
@@ -35,26 +35,32 @@
35** 35**
36**********************************************************************/ 36**********************************************************************/
37#if QT_VERSION >=300 37#if QT_VERSION >=300
38#error QRegExp3 is now in QT 3 use QRegExp instead 38#error QRegExp3 is now in QT 3 use QRegExp instead
39#endif 39#endif
40 40
41#include "qarray.h"
42#include "qbitarray.h"
43#include "qcache.h"
44#include "qintdict.h"
45#include "qmap.h"
46#if QT_VERSION < 300 41#if QT_VERSION < 300
47#include "./qregexp3.h" 42#include "./qregexp3.h"
48#else 43#else
49#include "qregexp.h" 44#include "qregexp.h"
50#endif 45#endif
51#include "qstring.h"
52#include "qtl.h"
53#include "qvector.h"
54 46
47/* OPIE */
48#include <opie2/odebug.h>
49
50/* QT */
51#include <qarray.h>
52#include <qbitarray.h>
53#include <qcache.h>
54#include <qintdict.h>
55#include <qmap.h>
56#include <qstring.h>
57#include <qtl.h>
58#include <qvector.h>
59
60/* STD */
55#include <limits.h> 61#include <limits.h>
56 62
57/* 63/*
58 WARNING! Be sure to read qregexp.tex before modifying this file. 64 WARNING! Be sure to read qregexp.tex before modifying this file.
59*/ 65*/
60 66
@@ -512,38 +518,38 @@
512 Note: because C++ transforms \\'s they must be written \e twice in 518 Note: because C++ transforms \\'s they must be written \e twice in
513 code, e.g. <b>\\</b><b>b</b> must be written <b>\\</b><b>\\</b><b>b</b>. 519 code, e.g. <b>\\</b><b>b</b> must be written <b>\\</b><b>\\</b><b>b</b>.
514 520
515 <a name="code-examples"><b>Code Examples</b></a> 521 <a name="code-examples"><b>Code Examples</b></a>
516 522
517 \code 523 \code
518 QRegExp rx( "^\\d\\d?$" );// Match integers 0 to 99 524 QRegExp rx( "^\\d\\d?$" ); // Match integers 0 to 99
519 rx.search( "123" ); // Returns -1 (no match) 525 rx.search( "123" ); // Returns -1 (no match)
520 rx.search( "-6" ); // Returns -1 (no match) 526 rx.search( "-6" ); // Returns -1 (no match)
521 rx.search( "6" ); // Returns 0 (matched as position 0) 527 rx.search( "6" ); // Returns 0 (matched as position 0)
522 \endcode 528 \endcode
523 529
524 The third string matches '<u>6</u>'. This is a simple validation 530 The third string matches '<u>6</u>'. This is a simple validation
525 regexp for integers in the range 0 to 99. 531 regexp for integers in the range 0 to 99.
526 532
527 \code 533 \code
528 QRegExp rx( "^\\S+$" );// Match strings which have no whitespace 534 QRegExp rx( "^\\S+$" ); // Match strings which have no whitespace
529 rx.search( "Hello world" );// Returns -1 (no match) 535 rx.search( "Hello world" ); // Returns -1 (no match)
530 rx.search( "This_is-OK" );// Returns 0 (matched at position 0) 536 rx.search( "This_is-OK" ); // Returns 0 (matched at position 0)
531 \endcode 537 \endcode
532 538
533 The second string matches '<u>This_is-OK</u>'. We've used the 539 The second string matches '<u>This_is-OK</u>'. We've used the
534 character set abbreviation '\S' (non-whitespace) and the anchors to 540 character set abbreviation '\S' (non-whitespace) and the anchors to
535 match strings which contain no whitespace. 541 match strings which contain no whitespace.
536 542
537 In the following example we match strings containing 'mail' or 543 In the following example we match strings containing 'mail' or
538 'letter' or 'correspondence' but only match whole words i.e. not 544 'letter' or 'correspondence' but only match whole words i.e. not
539 'email' 545 'email'
540 546
541 \code 547 \code
542 QRegExp rx( "\\b(mail|letter|correspondence)\\b" ); 548 QRegExp rx( "\\b(mail|letter|correspondence)\\b" );
543 rx.search( "I sent you an email" ); // Returns -1 (no match) 549 rx.search( "I sent you an email" ); // Returns -1 (no match)
544 rx.search( "Please write the letter" ); // Returns 17 (matched at position 17) 550 rx.search( "Please write the letter" ); // Returns 17 (matched at position 17)
545 \endcode 551 \endcode
546 552
547 The second string matches "Please write the <u>letter</u>". The word 553 The second string matches "Please write the <u>letter</u>". The word
548 'letter' is also captured (because of the parenthesis). We can see 554 'letter' is also captured (because of the parenthesis). We can see
549 what text we've captured like this: 555 what text we've captured like this:
@@ -555,13 +561,13 @@
555 This will capture the text from the first set of capturing 561 This will capture the text from the first set of capturing
556 parenthesis (counting capturing left parenthesis from left to 562 parenthesis (counting capturing left parenthesis from left to
557 right). The parenthesis are counted from 1 since cap( 0 ) is the 563 right). The parenthesis are counted from 1 since cap( 0 ) is the
558 whole matched regexp (equivalent to '&' in most regexp engines). 564 whole matched regexp (equivalent to '&' in most regexp engines).
559 565
560 \code 566 \code
561 QRegExp rx( "&(?!amp;)" ); // Match ampersands but not &amp; 567 QRegExp rx( "&(?!amp;)" ); // Match ampersands but not &amp;
562 QString line1 = "This & that"; 568 QString line1 = "This & that";
563 line1.replace( rx, "&amp;" ); 569 line1.replace( rx, "&amp;" );
564 // line1 == "This &amp; that" 570 // line1 == "This &amp; that"
565 QString line2 = "His &amp; hers & theirs"; 571 QString line2 = "His &amp; hers & theirs";
566 line2.replace( rx, "&amp;" ); 572 line2.replace( rx, "&amp;" );
567 // line2 == "His &amp; hers &amp; theirs" 573 // line2 == "His &amp; hers &amp; theirs"
@@ -569,21 +575,21 @@
569 575
570 Here we've passed the QRegExp to QString's replace() function to 576 Here we've passed the QRegExp to QString's replace() function to
571 replace the matched text with new text. 577 replace the matched text with new text.
572 578
573 \code 579 \code
574 QString str = "One Eric another Eirik, and an Ericsson. How many Eiriks, Eric?"; 580 QString str = "One Eric another Eirik, and an Ericsson. How many Eiriks, Eric?";
575 QRegExp rx( "\\b(Eric|Eirik)\\b" );// Match Eric or Eirik 581 QRegExp rx( "\\b(Eric|Eirik)\\b" ); // Match Eric or Eirik
576 int pos = 0; // Where we are in the string 582 int pos = 0; // Where we are in the string
577 int count = 0; // How many Eric and Eirik's we've counted 583 int count = 0; // How many Eric and Eirik's we've counted
578 while ( pos >= 0 ) { 584 while ( pos >= 0 ) {
579 pos = rx.search( str, pos ); 585 pos = rx.search( str, pos );
580 if ( pos >= 0 ) { 586 if ( pos >= 0 ) {
581 pos++;// Move along in str 587 pos++; // Move along in str
582 count++;// Count our Eric or Eirik 588 count++; // Count our Eric or Eirik
583 } 589 }
584 } 590 }
585 \endcode 591 \endcode
586 592
587 We've used the search() function to repeatedly match the regexp in 593 We've used the search() function to repeatedly match the regexp in
588 the string. Note that instead of moving forward by one character at 594 the string. Note that instead of moving forward by one character at
589 a time <tt>pos++</tt> we could have written <tt>pos += 595 a time <tt>pos++</tt> we could have written <tt>pos +=
@@ -598,15 +604,15 @@
598 604
599 \code 605 \code
600 str = "Trolltech AS\twww.trolltech.com\tNorway"; 606 str = "Trolltech AS\twww.trolltech.com\tNorway";
601 QString company, web, country; 607 QString company, web, country;
602 rx.setPattern( "^([^\t]+)\t([^\t]+)\t([^\t]+)$" ); 608 rx.setPattern( "^([^\t]+)\t([^\t]+)\t([^\t]+)$" );
603 if ( rx.search( str ) != -1 ) { 609 if ( rx.search( str ) != -1 ) {
604 company = rx.cap( 1 ); 610 company = rx.cap( 1 );
605 web= rx.cap( 2 ); 611 web = rx.cap( 2 );
606 country = rx.cap( 3 ); 612 country = rx.cap( 3 );
607 } 613 }
608 \endcode 614 \endcode
609 615
610 In this example our input lines have the format company name, web 616 In this example our input lines have the format company name, web
611 address and country. Unfortunately the regexp is rather long and not 617 address and country. Unfortunately the regexp is rather long and not
612 very versatile -- the code will break if we add any more fields. A 618 very versatile -- the code will break if we add any more fields. A
@@ -621,17 +627,17 @@
621 627
622 Here field[0] is the company, field[1] the web address and so on. 628 Here field[0] is the company, field[1] the web address and so on.
623 629
624 To immitate the matching of a shell we can use wildcard mode. 630 To immitate the matching of a shell we can use wildcard mode.
625 631
626 \code 632 \code
627 QRegExp rx( "*.html" );// Invalid regexp: * doesn't quantify anything 633 QRegExp rx( "*.html" ); // Invalid regexp: * doesn't quantify anything
628 rx.setWildcard( TRUE );// Now its a valid wildcard regexp 634 rx.setWildcard( TRUE ); // Now its a valid wildcard regexp
629 rx.search( "index.html" );// Returns 0 (matched at position 0) 635 rx.search( "index.html" ); // Returns 0 (matched at position 0)
630 rx.search( "default.htm" );// Returns -1 (no match) 636 rx.search( "default.htm" ); // Returns -1 (no match)
631 rx.search( "readme.txt" );// Returns -1 (no match) 637 rx.search( "readme.txt" ); // Returns -1 (no match)
632 \endcode 638 \endcode
633 639
634 Wildcard matching can be convenient because of its simplicity, but 640 Wildcard matching can be convenient because of its simplicity, but
635 any wildcard regex can be defined using full regexps, e.g. 641 any wildcard regex can be defined using full regexps, e.g.
636 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c 642 <b>.*\.html$</b>. Notice that we can't match both \c .html and \c
637 .htm files with a wildcard unless we use <b>*.htm*</b> which will 643 .htm files with a wildcard unless we use <b>*.htm*</b> which will
@@ -673,67 +679,67 @@ static QArray<int> *firstOccurrenceAtZero = 0;
673*/ 679*/
674static void mergeInto( QArray<int> *a, const QArray<int>& b ) 680static void mergeInto( QArray<int> *a, const QArray<int>& b )
675{ 681{
676 int asize = a->size(); 682 int asize = a->size();
677 int bsize = b.size(); 683 int bsize = b.size();
678 if ( asize == 0 ) { 684 if ( asize == 0 ) {
679 *a = b.copy(); 685 *a = b.copy();
680#ifndef QT_NO_REGEXP_OPTIM 686#ifndef QT_NO_REGEXP_OPTIM
681 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) { 687 } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
682 a->resize( asize + 1 ); 688 a->resize( asize + 1 );
683 (*a)[asize] = b[0]; 689 (*a)[asize] = b[0];
684#endif 690#endif
685 } else if ( bsize >= 1 ) { 691 } else if ( bsize >= 1 ) {
686 int csize = asize + bsize; 692 int csize = asize + bsize;
687 QArray<int> c( csize ); 693 QArray<int> c( csize );
688 int i = 0, j = 0, k = 0; 694 int i = 0, j = 0, k = 0;
689 while ( i < asize ) { 695 while ( i < asize ) {
690 if ( j < bsize ) { 696 if ( j < bsize ) {
691 if ( (*a)[i] == b[j] ) { 697 if ( (*a)[i] == b[j] ) {
692 i++; 698 i++;
693 csize--; 699 csize--;
694 } else if ( (*a)[i] < b[j] ) { 700 } else if ( (*a)[i] < b[j] ) {
695 c[k++] = (*a)[i++]; 701 c[k++] = (*a)[i++];
696 } else { 702 } else {
697 c[k++] = b[j++]; 703 c[k++] = b[j++];
698 } 704 }
699 } else { 705 } else {
700 memcpy( c.data() + k, (*a).data() + i, 706 memcpy( c.data() + k, (*a).data() + i,
701 (asize - i) * sizeof(int) ); 707 (asize - i) * sizeof(int) );
702 break; 708 break;
703 } 709 }
704 } 710 }
705 c.resize( csize ); 711 c.resize( csize );
706 if ( j < bsize ) 712 if ( j < bsize )
707 memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) ); 713 memcpy( c.data() + k, b.data() + j, (bsize - j) * sizeof(int) );
708 *a = c; 714 *a = c;
709 } 715 }
710} 716}
711 717
712/* 718/*
713 Merges two disjoint QMaps of (int, int) pairs and puts the result into the 719 Merges two disjoint QMaps of (int, int) pairs and puts the result into the
714 first one. 720 first one.
715*/ 721*/
716static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b ) 722static void mergeInto( QMap<int, int> *a, const QMap<int, int>& b )
717{ 723{
718 QMap<int, int>::ConstIterator it; 724 QMap<int, int>::ConstIterator it;
719 for ( it = b.begin(); it != b.end(); ++it ) 725 for ( it = b.begin(); it != b.end(); ++it )
720 a->insert( it.key(), *it ); 726 a->insert( it.key(), *it );
721} 727}
722 728
723/* 729/*
724 Returns the value associated to key k in QMap m of (int, int) pairs, or 0 if 730 Returns the value associated to key k in QMap m of (int, int) pairs, or 0 if
725 no such value is explicitly present. 731 no such value is explicitly present.
726*/ 732*/
727static int at( const QMap<int, int>& m, int k ) 733static int at( const QMap<int, int>& m, int k )
728{ 734{
729 QMap<int, int>::ConstIterator it = m.find( k ); 735 QMap<int, int>::ConstIterator it = m.find( k );
730 if ( it == m.end() ) 736 if ( it == m.end() )
731 return 0; 737 return 0;
732 else 738 else
733 return *it; 739 return *it;
734} 740}
735 741
736#ifndef QT_NO_REGEXP_WILDCARD 742#ifndef QT_NO_REGEXP_WILDCARD
737/* 743/*
738 Translates a wildcard pattern to an equivalent regular expression pattern 744 Translates a wildcard pattern to an equivalent regular expression pattern
739 (e.g., *.cpp to .*\.cpp). 745 (e.g., *.cpp to .*\.cpp).
@@ -741,50 +747,50 @@ static int at( const QMap<int, int>& m, int k )
741static QString wc2rx( const QString& wc ) 747static QString wc2rx( const QString& wc )
742{ 748{
743 int wclen = wc.length(); 749 int wclen = wc.length();
744 QString rx = QString::fromLatin1( "" ); 750 QString rx = QString::fromLatin1( "" );
745 int i = 0; 751 int i = 0;
746 while ( i < wclen ) { 752 while ( i < wclen ) {
747 QChar c = wc[i++]; 753 QChar c = wc[i++];
748 switch ( c.unicode() ) { 754 switch ( c.unicode() ) {
749 case '*': 755 case '*':
750 rx += QString::fromLatin1( ".*" ); 756 rx += QString::fromLatin1( ".*" );
751 break; 757 break;
752 case '?': 758 case '?':
753 rx += QChar( '.' ); 759 rx += QChar( '.' );
754 break; 760 break;
755 case '$': 761 case '$':
756 case '(': 762 case '(':
757 case ')': 763 case ')':
758 case '+': 764 case '+':
759 case '.': 765 case '.':
760 case '\\': 766 case '\\':
761 case '^': 767 case '^':
762 case '{': 768 case '{':
763 case '|': 769 case '|':
764 case '}': 770 case '}':
765 rx += QChar( '\\' ); 771 rx += QChar( '\\' );
766 rx += c; 772 rx += c;
767 break; 773 break;
768 case '[': 774 case '[':
769 rx += c; 775 rx += c;
770 if ( wc[i] == QChar('^') ) 776 if ( wc[i] == QChar('^') )
771 rx += wc[i++]; 777 rx += wc[i++];
772 if ( i < wclen ) { 778 if ( i < wclen ) {
773 if ( rx[i] == ']' ) 779 if ( rx[i] == ']' )
774 rx += wc[i++]; 780 rx += wc[i++];
775 while ( i < wclen && wc[i] != QChar(']') ) { 781 while ( i < wclen && wc[i] != QChar(']') ) {
776 if ( wc[i] == '\\' ) 782 if ( wc[i] == '\\' )
777 rx += QChar( '\\' ); 783 rx += QChar( '\\' );
778 rx += wc[i++]; 784 rx += wc[i++];
779 } 785 }
780 } 786 }
781 break; 787 break;
782 default: 788 default:
783 rx += c; 789 rx += c;
784 } 790 }
785 } 791 }
786 return rx; 792 return rx;
787} 793}
788#endif 794#endif
789 795
790/* 796/*
@@ -799,60 +805,60 @@ public:
799 The class CharClass represents a set of characters, such as can be found 805 The class CharClass represents a set of characters, such as can be found
800 in regular expressions (e.g., [a-z] denotes the set {a, b, ..., z}). 806 in regular expressions (e.g., [a-z] denotes the set {a, b, ..., z}).
801 */ 807 */
802 class CharClass 808 class CharClass
803 { 809 {
804 public: 810 public:
805 CharClass(); 811 CharClass();
806 CharClass( const CharClass& cc ) { operator=( cc ); } 812 CharClass( const CharClass& cc ) { operator=( cc ); }
807 813
808 CharClass& operator=( const CharClass& cc ); 814 CharClass& operator=( const CharClass& cc );
809 815
810 void clear(); 816 void clear();
811 bool negative() const { return n; } 817 bool negative() const { return n; }
812 void setNegative( bool negative ); 818 void setNegative( bool negative );
813 void addCategories( int cats ); 819 void addCategories( int cats );
814 void addRange( ushort from, ushort to ); 820 void addRange( ushort from, ushort to );
815 void addSingleton( ushort ch ) { addRange( ch, ch ); } 821 void addSingleton( ushort ch ) { addRange( ch, ch ); }
816 822
817 bool in( QChar ch ) const; 823 bool in( QChar ch ) const;
818#ifndef QT_NO_REGEXP_OPTIM 824#ifndef QT_NO_REGEXP_OPTIM
819 const QArray<int>& firstOccurrence() const { return occ1; } 825 const QArray<int>& firstOccurrence() const { return occ1; }
820#endif 826#endif
821 827
822#if defined(QT_DEBUG) 828#if defined(QT_DEBUG)
823 void dump() const; 829 void dump() const;
824#endif 830#endif
825 831
826 private: 832 private:
827 /* 833 /*
828 The struct Range represents a range of characters (e.g., [0-9] denotes 834 The struct Range represents a range of characters (e.g., [0-9] denotes
829 range 48 to 57). 835 range 48 to 57).
830 */ 836 */
831 struct Range 837 struct Range
832 { 838 {
833 ushort from; // 48 839 ushort from; // 48
834 ushort to; // 57 840 ushort to; // 57
835 }; 841 };
836 842
837 int c; // character classes 843 int c; // character classes
838 QArray<Range> r; // character ranges 844 QArray<Range> r; // character ranges
839 bool n; // negative? 845 bool n; // negative?
840#ifndef QT_NO_REGEXP_OPTIM 846#ifndef QT_NO_REGEXP_OPTIM
841 QArray<int> occ1; // first-occurrence array 847 QArray<int> occ1; // first-occurrence array
842#endif 848#endif
843 }; 849 };
844#else 850#else
845 struct CharClass 851 struct CharClass
846 { 852 {
847 int x; // dummy 853 int x; // dummy
848 854
849#ifndef QT_NO_REGEXP_OPTIM 855#ifndef QT_NO_REGEXP_OPTIM
850 const QArray<int>& firstOccurrence() const { 856 const QArray<int>& firstOccurrence() const {
851 return *firstOccurrenceAtZero; 857 return *firstOccurrenceAtZero;
852 } 858 }
853#endif 859#endif
854 }; 860 };
855#endif 861#endif
856 862
857 QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); } 863 QRegExpEngine( bool caseSensitive ) { setup( caseSensitive ); }
858 QRegExpEngine( const QString& rx, bool caseSensitive ); 864 QRegExpEngine( const QString& rx, bool caseSensitive );
@@ -861,25 +867,25 @@ public:
861#endif 867#endif
862 868
863 bool isValid() const { return valid; } 869 bool isValid() const { return valid; }
864 bool caseSensitive() const { return cs; } 870 bool caseSensitive() const { return cs; }
865 int numCaptures() const { return realncap; } 871 int numCaptures() const { return realncap; }
866 QArray<int> match( const QString& str, int pos, bool minimal, 872 QArray<int> match( const QString& str, int pos, bool minimal,
867 bool oneTest ); 873 bool oneTest );
868 int matchedLength() const { return mmMatchedLen; } 874 int matchedLength() const { return mmMatchedLen; }
869 875
870 int createState( QChar ch ); 876 int createState( QChar ch );
871 int createState( const CharClass& cc ); 877 int createState( const CharClass& cc );
872#ifndef QT_NO_REGEXP_BACKREF 878#ifndef QT_NO_REGEXP_BACKREF
873 int createState( int bref ); 879 int createState( int bref );
874#endif 880#endif
875 881
876 void addCatTransitions( const QArray<int>& from, const QArray<int>& to ); 882 void addCatTransitions( const QArray<int>& from, const QArray<int>& to );
877#ifndef QT_NO_REGEXP_CAPTURE 883#ifndef QT_NO_REGEXP_CAPTURE
878 void addPlusTransitions( const QArray<int>& from, const QArray<int>& to, 884 void addPlusTransitions( const QArray<int>& from, const QArray<int>& to,
879 int atom ); 885 int atom );
880#endif 886#endif
881 887
882#ifndef QT_NO_REGEXP_ANCHOR_ALT 888#ifndef QT_NO_REGEXP_ANCHOR_ALT
883 int anchorAlternation( int a, int b ); 889 int anchorAlternation( int a, int b );
884 int anchorConcatenation( int a, int b ); 890 int anchorConcatenation( int a, int b );
885#else 891#else
@@ -887,13 +893,13 @@ public:
887 int anchorConcatenation( int a, int b ) { return a | b; } 893 int anchorConcatenation( int a, int b ) { return a | b; }
888#endif 894#endif
889 void addAnchors( int from, int to, int a ); 895 void addAnchors( int from, int to, int a );
890 896
891#ifndef QT_NO_REGEXP_OPTIM 897#ifndef QT_NO_REGEXP_OPTIM
892 void setupGoodStringHeuristic( int earlyStart, int lateStart, 898 void setupGoodStringHeuristic( int earlyStart, int lateStart,
893 const QString& str ); 899 const QString& str );
894 void setupBadCharHeuristic( int minLen, const QArray<int>& firstOcc ); 900 void setupBadCharHeuristic( int minLen, const QArray<int>& firstOcc );
895 void heuristicallyChooseHeuristic(); 901 void heuristicallyChooseHeuristic();
896#endif 902#endif
897 903
898#if defined(QT_DEBUG) 904#if defined(QT_DEBUG)
899 void dump() const; 905 void dump() const;
@@ -907,98 +913,98 @@ private:
907 characters matched are stored in the state instead of on the transitions, 913 characters matched are stored in the state instead of on the transitions,
908 something possible for an automaton constructed from a regular expression. 914 something possible for an automaton constructed from a regular expression.
909 */ 915 */
910 struct State 916 struct State
911 { 917 {
912#ifndef QT_NO_REGEXP_CAPTURE 918#ifndef QT_NO_REGEXP_CAPTURE
913 int atom; // which atom does this state belong to? 919 int atom; // which atom does this state belong to?
914#endif 920#endif
915 int match; // what does it match? (see CharClassBit and BackRefBit) 921 int match; // what does it match? (see CharClassBit and BackRefBit)
916 QArray<int> outs; // out-transitions 922 QArray<int> outs; // out-transitions
917 QMap<int, int> *reenter; // atoms reentered when transiting out 923 QMap<int, int> *reenter; // atoms reentered when transiting out
918 QMap<int, int> *anchors; // anchors met when transiting out 924 QMap<int, int> *anchors; // anchors met when transiting out
919 925
920#ifndef QT_NO_REGEXP_CAPTURE 926#ifndef QT_NO_REGEXP_CAPTURE
921 State( int a, int m ) 927 State( int a, int m )
922 : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { } 928 : atom( a ), match( m ), reenter( 0 ), anchors( 0 ) { }
923#else 929#else
924 State( int m ) 930 State( int m )
925 : match( m ), reenter( 0 ), anchors( 0 ) { } 931 : match( m ), reenter( 0 ), anchors( 0 ) { }
926#endif 932#endif
927 ~State() { delete reenter; delete anchors; } 933 ~State() { delete reenter; delete anchors; }
928 }; 934 };
929 935
930#ifndef QT_NO_REGEXP_LOOKAHEAD 936#ifndef QT_NO_REGEXP_LOOKAHEAD
931 /* 937 /*
932 The struct Lookahead represents a lookahead a la Perl (e.g., (?=foo) and 938 The struct Lookahead represents a lookahead a la Perl (e.g., (?=foo) and
933 (?!bar)). 939 (?!bar)).
934 */ 940 */
935 struct Lookahead 941 struct Lookahead
936 { 942 {
937 QRegExpEngine *eng; // NFA representing the embedded regular expression 943 QRegExpEngine *eng; // NFA representing the embedded regular expression
938 bool neg; // negative lookahead? 944 bool neg; // negative lookahead?
939 945
940 Lookahead( QRegExpEngine *eng0, bool neg0 ) 946 Lookahead( QRegExpEngine *eng0, bool neg0 )
941 : eng( eng0 ), neg( neg0 ) { } 947 : eng( eng0 ), neg( neg0 ) { }
942 ~Lookahead() { delete eng; } 948 ~Lookahead() { delete eng; }
943 }; 949 };
944#endif 950#endif
945 951
946#ifndef QT_NO_REGEXP_CAPTURE 952#ifndef QT_NO_REGEXP_CAPTURE
947 /* 953 /*
948 The struct Atom represents one node in the hierarchy of regular expression 954 The struct Atom represents one node in the hierarchy of regular expression
949 atoms. 955 atoms.
950 */ 956 */
951 struct Atom 957 struct Atom
952 { 958 {
953 int parent; // index of parent in array of atoms 959 int parent; // index of parent in array of atoms
954 int capture; // index of capture, from 1 to ncap 960 int capture; // index of capture, from 1 to ncap
955 }; 961 };
956#endif 962#endif
957 963
958#ifndef QT_NO_REGEXP_ANCHOR_ALT 964#ifndef QT_NO_REGEXP_ANCHOR_ALT
959 /* 965 /*
960 The struct AnchorAlternation represents a pair of anchors with OR 966 The struct AnchorAlternation represents a pair of anchors with OR
961 semantics. 967 semantics.
962 */ 968 */
963 struct AnchorAlternation 969 struct AnchorAlternation
964 { 970 {
965 int a; // this anchor... 971 int a; // this anchor...
966 int b; // ...or this one 972 int b; // ...or this one
967 }; 973 };
968#endif 974#endif
969 975
970 enum { InitialState = 0, FinalState = 1 }; 976 enum { InitialState = 0, FinalState = 1 };
971 void setup( bool caseSensitive ); 977 void setup( bool caseSensitive );
972 int setupState( int match ); 978 int setupState( int match );
973 979
974 /* 980 /*
975 Let's hope that 13 lookaheads and 14 back-references are enough. 981 Let's hope that 13 lookaheads and 14 back-references are enough.
976 */ 982 */
977 enum { MaxLookaheads = 13, MaxBackRefs = 14 }; 983 enum { MaxLookaheads = 13, MaxBackRefs = 14 };
978 enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, 984 enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002,
979 Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008, 985 Anchor_Word = 0x00000004, Anchor_NonWord = 0x00000008,
980 Anchor_FirstLookahead = 0x00000010, 986 Anchor_FirstLookahead = 0x00000010,
981 Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, 987 Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
982 Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, 988 Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
983 Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs, 989 Anchor_Alternation = Anchor_BackRef1Empty << MaxBackRefs,
984 990
985 Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^ 991 Anchor_LookaheadMask = ( Anchor_FirstLookahead - 1 ) ^
986 ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) }; 992 ( (Anchor_FirstLookahead << MaxLookaheads) - 1 ) };
987#ifndef QT_NO_REGEXP_CAPTURE 993#ifndef QT_NO_REGEXP_CAPTURE
988 int startAtom( bool capture ); 994 int startAtom( bool capture );
989 void finishAtom( int atom ) { cf = f[atom].parent; } 995 void finishAtom( int atom ) { cf = f[atom].parent; }
990#endif 996#endif
991 997
992#ifndef QT_NO_REGEXP_LOOKAHEAD 998#ifndef QT_NO_REGEXP_LOOKAHEAD
993 int addLookahead( QRegExpEngine *eng, bool negative ); 999 int addLookahead( QRegExpEngine *eng, bool negative );
994#endif 1000#endif
995 1001
996#ifndef QT_NO_REGEXP_CAPTURE 1002#ifndef QT_NO_REGEXP_CAPTURE
997 bool isBetterCapture( const int *begin1, const int *end1, const int *begin2, 1003 bool isBetterCapture( const int *begin1, const int *end1, const int *begin2,
998 const int *end2 ); 1004 const int *end2 );
999#endif 1005#endif
1000 bool testAnchor( int i, int a, const int *capBegin ); 1006 bool testAnchor( int i, int a, const int *capBegin );
1001 1007
1002#ifndef QT_NO_REGEXP_OPTIM 1008#ifndef QT_NO_REGEXP_OPTIM
1003 bool goodStringMatch(); 1009 bool goodStringMatch();
1004 bool badCharMatch(); 1010 bool badCharMatch();
@@ -1052,70 +1058,70 @@ private:
1052 1058
1053 It's interface is ugly for performance reasons. 1059 It's interface is ugly for performance reasons.
1054 */ 1060 */
1055 class Box 1061 class Box
1056 { 1062 {
1057 public: 1063 public:
1058 Box( QRegExpEngine *engine ); 1064 Box( QRegExpEngine *engine );
1059 Box( const Box& b ) { operator=( b ); } 1065 Box( const Box& b ) { operator=( b ); }
1060 1066
1061 Box& operator=( const Box& b ); 1067 Box& operator=( const Box& b );
1062 1068
1063 void clear() { operator=(Box(eng)); } 1069 void clear() { operator=(Box(eng)); }
1064 void set( QChar ch ); 1070 void set( QChar ch );
1065 void set( const CharClass& cc ); 1071 void set( const CharClass& cc );
1066#ifndef QT_NO_REGEXP_BACKREF 1072#ifndef QT_NO_REGEXP_BACKREF
1067 void set( int bref ); 1073 void set( int bref );
1068#endif 1074#endif
1069 1075
1070 void cat( const Box& b ); 1076 void cat( const Box& b );
1071 void orx( const Box& b ); 1077 void orx( const Box& b );
1072 void plus( int atom ); 1078 void plus( int atom );
1073 void opt(); 1079 void opt();
1074 void catAnchor( int a ); 1080 void catAnchor( int a );
1075#ifndef QT_NO_REGEXP_OPTIM 1081#ifndef QT_NO_REGEXP_OPTIM
1076 void setupHeuristics(); 1082 void setupHeuristics();
1077#endif 1083#endif
1078 1084
1079#if defined(QT_DEBUG) 1085#if defined(QT_DEBUG)
1080 void dump() const; 1086 void dump() const;
1081#endif 1087#endif
1082 1088
1083 private: 1089 private:
1084 void addAnchorsToEngine( const Box& to ) const; 1090 void addAnchorsToEngine( const Box& to ) const;
1085 1091
1086 QRegExpEngine *eng; // the automaton under construction 1092 QRegExpEngine *eng; // the automaton under construction
1087 QArray<int> ls; // the left states (firstpos) 1093 QArray<int> ls; // the left states (firstpos)
1088 QArray<int> rs; // the right states (lastpos) 1094 QArray<int> rs; // the right states (lastpos)
1089 QMap<int, int> lanchors; // the left anchors 1095 QMap<int, int> lanchors; // the left anchors
1090 QMap<int, int> ranchors; // the right anchors 1096 QMap<int, int> ranchors; // the right anchors
1091 int skipanchors; // the anchors to match if the box is skipped 1097 int skipanchors; // the anchors to match if the box is skipped
1092 1098
1093#ifndef QT_NO_REGEXP_OPTIM 1099#ifndef QT_NO_REGEXP_OPTIM
1094 int earlyStart; // the index where str can first occur 1100 int earlyStart; // the index where str can first occur
1095 int lateStart; // the index where str can last occur 1101 int lateStart; // the index where str can last occur
1096 QString str; // a string that has to occur in any match 1102 QString str; // a string that has to occur in any match
1097 QString leftStr; // a string occurring at the left of this box 1103 QString leftStr; // a string occurring at the left of this box
1098 QString rightStr; // a string occurring at the right of this box 1104 QString rightStr; // a string occurring at the right of this box
1099 int maxl; // the maximum length of this box (possibly InftyLen) 1105 int maxl; // the maximum length of this box (possibly InftyLen)
1100#endif 1106#endif
1101 1107
1102 int minl; // the minimum length of this box 1108 int minl; // the minimum length of this box
1103#ifndef QT_NO_REGEXP_OPTIM 1109#ifndef QT_NO_REGEXP_OPTIM
1104 QArray<int> occ1; // first-occurrence array 1110 QArray<int> occ1; // first-occurrence array
1105#endif 1111#endif
1106 }; 1112 };
1107 friend class Box; 1113 friend class Box;
1108 1114
1109 /* 1115 /*
1110 This is the lexical analyzer for regular expressions. 1116 This is the lexical analyzer for regular expressions.
1111 */ 1117 */
1112 enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, 1118 enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen,
1113 Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass, 1119 Tok_PosLookahead, Tok_NegLookahead, Tok_RightParen, Tok_CharClass,
1114 Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord, 1120 Tok_Caret, Tok_Quantifier, Tok_Bar, Tok_Word, Tok_NonWord,
1115 Tok_Char = 0x10000, Tok_BackRef = 0x20000 }; 1121 Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
1116 int getChar(); 1122 int getChar();
1117 int getEscape(); 1123 int getEscape();
1118#ifndef QT_NO_REGEXP_INTERVAL 1124#ifndef QT_NO_REGEXP_INTERVAL
1119 int getRep( int def ); 1125 int getRep( int def );
1120#endif 1126#endif
1121#ifndef QT_NO_REGEXP_LOOKAHEAD 1127#ifndef QT_NO_REGEXP_LOOKAHEAD
@@ -1186,64 +1192,64 @@ QRegExpEngine::QRegExpEngine( const QString& rx, bool caseSensitive )
1186} 1192}
1187 1193
1188#ifndef QT_NO_REGEXP_OPTIM 1194#ifndef QT_NO_REGEXP_OPTIM
1189QRegExpEngine::~QRegExpEngine() 1195QRegExpEngine::~QRegExpEngine()
1190{ 1196{
1191 if ( --engCount == 0 ) { 1197 if ( --engCount == 0 ) {
1192 delete noOccurrences; 1198 delete noOccurrences;
1193 noOccurrences = 0; 1199 noOccurrences = 0;
1194 delete firstOccurrenceAtZero; 1200 delete firstOccurrenceAtZero;
1195 firstOccurrenceAtZero = 0; 1201 firstOccurrenceAtZero = 0;
1196 } 1202 }
1197} 1203}
1198#endif 1204#endif
1199 1205
1200/* 1206/*
1201 Tries to match in str and returns an array of (begin, length) pairs for 1207 Tries to match in str and returns an array of (begin, length) pairs for
1202 captured text. If there is no match, all pairs are (-1, -1). 1208 captured text. If there is no match, all pairs are (-1, -1).
1203*/ 1209*/
1204QArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal, 1210QArray<int> QRegExpEngine::match( const QString& str, int pos, bool minimal,
1205 bool oneTest ) 1211 bool oneTest )
1206{ 1212{
1207 mmStr = &str; 1213 mmStr = &str;
1208 mmIn = str.unicode(); 1214 mmIn = str.unicode();
1209 if ( mmIn == 0 ) 1215 if ( mmIn == 0 )
1210 mmIn = &QChar::null; 1216 mmIn = &QChar::null;
1211 mmPos = pos; 1217 mmPos = pos;
1212 mmLen = str.length(); 1218 mmLen = str.length();
1213 mmMinimal = minimal; 1219 mmMinimal = minimal;
1214 mmMatchedLen = 0; 1220 mmMatchedLen = 0;
1215 1221
1216 bool matched = FALSE; 1222 bool matched = FALSE;
1217 if ( valid && mmPos >= 0 && mmPos <= mmLen ) { 1223 if ( valid && mmPos >= 0 && mmPos <= mmLen ) {
1218#ifndef QT_NO_REGEXP_OPTIM 1224#ifndef QT_NO_REGEXP_OPTIM
1219 if ( mmPos <= mmLen - minl ) { 1225 if ( mmPos <= mmLen - minl ) {
1220 if ( caretAnchored || oneTest ) 1226 if ( caretAnchored || oneTest )
1221 matched = matchHere(); 1227 matched = matchHere();
1222 else if ( useGoodStringHeuristic ) 1228 else if ( useGoodStringHeuristic )
1223 matched = goodStringMatch(); 1229 matched = goodStringMatch();
1224 else 1230 else
1225 matched = badCharMatch(); 1231 matched = badCharMatch();
1226 } 1232 }
1227#else 1233#else
1228 matched = oneTest ? matchHere() : bruteMatch(); 1234 matched = oneTest ? matchHere() : bruteMatch();
1229#endif 1235#endif
1230 } 1236 }
1231 1237
1232 if ( matched ) { 1238 if ( matched ) {
1233 mmCaptured.detach(); 1239 mmCaptured.detach();
1234 mmCaptured[0] = mmPos; 1240 mmCaptured[0] = mmPos;
1235 mmCaptured[1] = mmMatchedLen; 1241 mmCaptured[1] = mmMatchedLen;
1236 for ( int j = 0; j < realncap; j++ ) { 1242 for ( int j = 0; j < realncap; j++ ) {
1237 int len = mmCapEnd[j] - mmCapBegin[j]; 1243 int len = mmCapEnd[j] - mmCapBegin[j];
1238 mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0; 1244 mmCaptured[2 + 2 * j] = len > 0 ? mmPos + mmCapBegin[j] : 0;
1239 mmCaptured[2 + 2 * j + 1] = len; 1245 mmCaptured[2 + 2 * j + 1] = len;
1240 } 1246 }
1241 return mmCaptured; 1247 return mmCaptured;
1242 } else { 1248 } else {
1243 return mmCapturedNoMatch; 1249 return mmCapturedNoMatch;
1244 } 1250 }
1245} 1251}
1246 1252
1247/* 1253/*
1248 The three following functions add one state to the automaton and return the 1254 The three following functions add one state to the automaton and return the
1249 number of the state. 1255 number of the state.
@@ -1268,17 +1274,17 @@ int QRegExpEngine::createState( const CharClass& cc )
1268} 1274}
1269 1275
1270#ifndef QT_NO_REGEXP_BACKREF 1276#ifndef QT_NO_REGEXP_BACKREF
1271int QRegExpEngine::createState( int bref ) 1277int QRegExpEngine::createState( int bref )
1272{ 1278{
1273 if ( bref > nbrefs ) { 1279 if ( bref > nbrefs ) {
1274 nbrefs = bref; 1280 nbrefs = bref;
1275 if ( nbrefs > MaxBackRefs ) { 1281 if ( nbrefs > MaxBackRefs ) {
1276 yyError = TRUE; 1282 yyError = TRUE;
1277 return 0; 1283 return 0;
1278 } 1284 }
1279 } 1285 }
1280 return setupState( BackRefBit | bref ); 1286 return setupState( BackRefBit | bref );
1281} 1287}
1282#endif 1288#endif
1283 1289
1284/* 1290/*
@@ -1286,49 +1292,49 @@ int QRegExpEngine::createState( int bref )
1286 (i, j) where i is fond in from, and j is found in to. 1292 (i, j) where i is fond in from, and j is found in to.
1287 1293
1288 Cat-transitions are distinguished from plus-transitions for capturing. 1294 Cat-transitions are distinguished from plus-transitions for capturing.
1289*/ 1295*/
1290 1296
1291void QRegExpEngine::addCatTransitions( const QArray<int>& from, 1297void QRegExpEngine::addCatTransitions( const QArray<int>& from,
1292 const QArray<int>& to ) 1298 const QArray<int>& to )
1293{ 1299{
1294 for ( int i = 0; i < (int) from.size(); i++ ) { 1300 for ( int i = 0; i < (int) from.size(); i++ ) {
1295 State *st = s[from[i]]; 1301 State *st = s[from[i]];
1296 mergeInto( &st->outs, to ); 1302 mergeInto( &st->outs, to );
1297 } 1303 }
1298} 1304}
1299 1305
1300#ifndef QT_NO_REGEXP_CAPTURE 1306#ifndef QT_NO_REGEXP_CAPTURE
1301void QRegExpEngine::addPlusTransitions( const QArray<int>& from, 1307void QRegExpEngine::addPlusTransitions( const QArray<int>& from,
1302 const QArray<int>& to, int atom ) 1308 const QArray<int>& to, int atom )
1303{ 1309{
1304 for ( int i = 0; i < (int) from.size(); i++ ) { 1310 for ( int i = 0; i < (int) from.size(); i++ ) {
1305 State *st = s[from[i]]; 1311 State *st = s[from[i]];
1306 QArray<int> oldOuts = st->outs.copy(); 1312 QArray<int> oldOuts = st->outs.copy();
1307 mergeInto( &st->outs, to ); 1313 mergeInto( &st->outs, to );
1308 if ( f[atom].capture >= 0 ) { 1314 if ( f[atom].capture >= 0 ) {
1309 if ( st->reenter == 0 ) 1315 if ( st->reenter == 0 )
1310 st->reenter = new QMap<int, int>; 1316 st->reenter = new QMap<int, int>;
1311 for ( int j = 0; j < (int) to.size(); j++ ) { 1317 for ( int j = 0; j < (int) to.size(); j++ ) {
1312 if ( !st->reenter->contains(to[j]) && 1318 if ( !st->reenter->contains(to[j]) &&
1313 oldOuts.bsearch(to[j]) < 0 ) 1319 oldOuts.bsearch(to[j]) < 0 )
1314 st->reenter->insert( to[j], atom ); 1320 st->reenter->insert( to[j], atom );
1315 } 1321 }
1316 } 1322 }
1317 } 1323 }
1318} 1324}
1319#endif 1325#endif
1320 1326
1321#ifndef QT_NO_REGEXP_ANCHOR_ALT 1327#ifndef QT_NO_REGEXP_ANCHOR_ALT
1322/* 1328/*
1323 Returns an anchor that means a OR b. 1329 Returns an anchor that means a OR b.
1324*/ 1330*/
1325int QRegExpEngine::anchorAlternation( int a, int b ) 1331int QRegExpEngine::anchorAlternation( int a, int b )
1326{ 1332{
1327 if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 ) 1333 if ( ((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0 )
1328 return a & b; 1334 return a & b;
1329 1335
1330 int n = aa.size(); 1336 int n = aa.size();
1331 aa.resize( n + 1 ); 1337 aa.resize( n + 1 );
1332 aa[n].a = a; 1338 aa[n].a = a;
1333 aa[n].b = b; 1339 aa[n].b = b;
1334 return Anchor_Alternation | n; 1340 return Anchor_Alternation | n;
@@ -1337,15 +1343,15 @@ int QRegExpEngine::anchorAlternation( int a, int b )
1337/* 1343/*
1338 Returns an anchor that means a AND b. 1344 Returns an anchor that means a AND b.
1339*/ 1345*/
1340int QRegExpEngine::anchorConcatenation( int a, int b ) 1346int QRegExpEngine::anchorConcatenation( int a, int b )
1341{ 1347{
1342 if ( ((a | b) & Anchor_Alternation) == 0 ) 1348 if ( ((a | b) & Anchor_Alternation) == 0 )
1343 return a | b; 1349 return a | b;
1344 if ( (b & Anchor_Alternation) != 0 ) 1350 if ( (b & Anchor_Alternation) != 0 )
1345 qSwap( a, b ); 1351 qSwap( a, b );
1346 int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b ); 1352 int aprime = anchorConcatenation( aa[a ^ Anchor_Alternation].a, b );
1347 int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b ); 1353 int bprime = anchorConcatenation( aa[a ^ Anchor_Alternation].b, b );
1348 return anchorAlternation( aprime, bprime ); 1354 return anchorAlternation( aprime, bprime );
1349} 1355}
1350#endif 1356#endif
1351 1357
@@ -1353,34 +1359,34 @@ int QRegExpEngine::anchorConcatenation( int a, int b )
1353 Adds anchor a on a transition caracterised by its from state and its to state. 1359 Adds anchor a on a transition caracterised by its from state and its to state.
1354*/ 1360*/
1355void QRegExpEngine::addAnchors( int from, int to, int a ) 1361void QRegExpEngine::addAnchors( int from, int to, int a )
1356{ 1362{
1357 State *st = s[from]; 1363 State *st = s[from];
1358 if ( st->anchors == 0 ) 1364 if ( st->anchors == 0 )
1359 st->anchors = new QMap<int, int>; 1365 st->anchors = new QMap<int, int>;
1360 if ( st->anchors->contains(to) ) 1366 if ( st->anchors->contains(to) )
1361 a = anchorAlternation( (*st->anchors)[to], a ); 1367 a = anchorAlternation( (*st->anchors)[to], a );
1362 st->anchors->insert( to, a ); 1368 st->anchors->insert( to, a );
1363} 1369}
1364 1370
1365#ifndef QT_NO_REGEXP_OPTIM 1371#ifndef QT_NO_REGEXP_OPTIM
1366/* 1372/*
1367 The two following functions provide the engine with the information needed by 1373 The two following functions provide the engine with the information needed by
1368 its matching heuristics. 1374 its matching heuristics.
1369*/ 1375*/
1370 1376
1371void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart, 1377void QRegExpEngine::setupGoodStringHeuristic( int earlyStart, int lateStart,
1372 const QString& str ) 1378 const QString& str )
1373{ 1379{
1374 goodEarlyStart = earlyStart; 1380 goodEarlyStart = earlyStart;
1375 goodLateStart = lateStart; 1381 goodLateStart = lateStart;
1376 goodStr = cs ? str : str.lower(); 1382 goodStr = cs ? str : str.lower();
1377} 1383}
1378 1384
1379void QRegExpEngine::setupBadCharHeuristic( int minLen, 1385void QRegExpEngine::setupBadCharHeuristic( int minLen,
1380 const QArray<int>& firstOcc ) 1386 const QArray<int>& firstOcc )
1381{ 1387{
1382 minl = minLen; 1388 minl = minLen;
1383 occ1 = cs ? firstOcc : *firstOccurrenceAtZero; 1389 occ1 = cs ? firstOcc : *firstOccurrenceAtZero;
1384} 1390}
1385 1391
1386/* 1392/*
@@ -1397,99 +1403,99 @@ void QRegExpEngine::setupBadCharHeuristic( int minLen,
1397*/ 1403*/
1398void QRegExpEngine::heuristicallyChooseHeuristic() 1404void QRegExpEngine::heuristicallyChooseHeuristic()
1399{ 1405{
1400 int i; 1406 int i;
1401 1407
1402 if ( minl == 0 ) 1408 if ( minl == 0 )
1403 return; 1409 return;
1404 1410
1405 /* 1411 /*
1406 Magic formula: The good string has to constitute a good proportion of the 1412 Magic formula: The good string has to constitute a good proportion of the
1407 minimum-length string, and appear at a more-or-less known index. 1413 minimum-length string, and appear at a more-or-less known index.
1408 */ 1414 */
1409 int goodStringScore = ( 64 * goodStr.length() / minl ) - 1415 int goodStringScore = ( 64 * goodStr.length() / minl ) -
1410 ( goodLateStart - goodEarlyStart ); 1416 ( goodLateStart - goodEarlyStart );
1411 1417
1412 /* 1418 /*
1413 Less magic formula: We pick a couple of characters at random, and check 1419 Less magic formula: We pick a couple of characters at random, and check
1414 whether they are good or bad. 1420 whether they are good or bad.
1415 */ 1421 */
1416 int badCharScore = 0; 1422 int badCharScore = 0;
1417 int step = QMAX( 1, NumBadChars / 32 ); 1423 int step = QMAX( 1, NumBadChars / 32 );
1418 for ( i = 1; i < NumBadChars; i += step ) { 1424 for ( i = 1; i < NumBadChars; i += step ) {
1419 if ( occ1[i] == NoOccurrence ) 1425 if ( occ1[i] == NoOccurrence )
1420 badCharScore += minl; 1426 badCharScore += minl;
1421 else 1427 else
1422 badCharScore += occ1[i]; 1428 badCharScore += occ1[i];
1423 } 1429 }
1424 badCharScore /= minl; 1430 badCharScore /= minl;
1425 1431
1426 useGoodStringHeuristic = ( goodStringScore > badCharScore ); 1432 useGoodStringHeuristic = ( goodStringScore > badCharScore );
1427} 1433}
1428#endif 1434#endif
1429 1435
1430#if defined(QT_DEBUG) 1436#if defined(QT_DEBUG)
1431void QRegExpEngine::dump() const 1437void QRegExpEngine::dump() const
1432{ 1438{
1433 int i, j; 1439 int i, j;
1434 qDebug( "Case %ssensitive engine", cs ? "" : "in" ); 1440 odebug << "Case " << (cs ? "" : "in") << "sensitive engine" << oendl;
1435 qDebug( " States" ); 1441 odebug << " States" << oendl;
1436 for ( i = 0; i < ns; i++ ) { 1442 for ( i = 0; i < ns; i++ ) {
1437 qDebug( " %d%s", i, 1443 odebug << " " << i
1438 i == InitialState ? " (initial)" : 1444 << (i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "") << oendl;
1439 i == FinalState ? " (final)" : "" ); 1445
1440#ifndef QT_NO_REGEXP_CAPTURE 1446#ifndef QT_NO_REGEXP_CAPTURE
1441 qDebug( " in atom %d", s[i]->atom ); 1447 odebug << " in atom " << s[i]->atom << oendl;
1442#endif 1448#endif
1443 int m = s[i]->match; 1449 int m = s[i]->match;
1444 if ( (m & CharClassBit) != 0 ) { 1450 if ( (m & CharClassBit) != 0 ) {
1445 qDebug( " match character class %d", m ^ CharClassBit ); 1451 odebug << " match character class " << (m ^ CharClassBit) << oendl;
1446#ifndef QT_NO_REGEXP_CCLASS 1452#ifndef QT_NO_REGEXP_CCLASS
1447 cl[m ^ CharClassBit]->dump(); 1453 cl[m ^ CharClassBit]->dump();
1448#else 1454#else
1449 qDebug( " negative character class" ); 1455 odebug << " negative character class" << oendl;
1450#endif 1456#endif
1451 } else if ( (m & BackRefBit) != 0 ) { 1457 } else if ( (m & BackRefBit) != 0 ) {
1452 qDebug( " match back-reference %d", m ^ BackRefBit ); 1458 odebug << " match back-reference " << (m ^ BackRefBit) << oendl;
1453 } else if ( m >= 0x20 && m <= 0x7e ) { 1459 } else if ( m >= 0x20 && m <= 0x7e ) {
1454 qDebug( " match 0x%.4x (%c)", m, m ); 1460 odebug << " match " << QString().sprintf( "0x%.4x", m) << " (" << m << ")" << oendl;
1455 } else { 1461
1456 qDebug( " match 0x%.4x", m ); 1462 } else {
1457 } 1463 odebug << " match " << QString().sprintf( "0x%.4x", m) << oendl;
1458 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) { 1464 }
1459 int next = s[i]->outs[j]; 1465 for ( j = 0; j < (int) s[i]->outs.size(); j++ ) {
1460 qDebug( " -> %d", next ); 1466 int next = s[i]->outs[j];
1461 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) ) 1467 odebug << " -> " << next << oendl;
1462 qDebug( " [reenter %d]", (*s[i]->reenter)[next] ); 1468 if ( s[i]->reenter != 0 && s[i]->reenter->contains(next) )
1463 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 ) 1469 odebug << " [reenter " << (*s[i]->reenter)[next] << "]" << oendl;
1464 qDebug( " [anchors 0x%.8x]", (*s[i]->anchors)[next] ); 1470 if ( s[i]->anchors != 0 && at(*s[i]->anchors, next) != 0 )
1465 } 1471 odebug << " [anchors " << QString().sprintf( "0x%.8x]", (*s[i]->anchors)[next] ) << oendl;
1472 }
1466 } 1473 }
1467#ifndef QT_NO_REGEXP_CAPTURE 1474#ifndef QT_NO_REGEXP_CAPTURE
1468 if ( nf > 0 ) { 1475 if ( nf > 0 ) {
1469 qDebug( " Atom Parent Capture" ); 1476 odebug << " Atom Parent Capture" << oendl;
1470 for ( i = 0; i < nf; i++ ) 1477 for ( i = 0; i < nf; i++ )
1471 qDebug( " %6d %6d %6d", i, f[i].parent, f[i].capture ); 1478 odebug << QString().sprintf(" %6d %6d %6d", i, f[i].parent, f[i].capture ) << oendl;
1472 } 1479 }
1473#endif 1480#endif
1474#ifndef QT_NO_REGEXP_ANCHOR_ALT 1481#ifndef QT_NO_REGEXP_ANCHOR_ALT
1475 for ( i = 0; i < (int) aa.size(); i++ ) 1482 for ( i = 0; i < (int) aa.size(); i++ )
1476 qDebug( " Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, 1483 odebug << QString().sprintf(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b ) << oendl;
1477 aa[i].b );
1478#endif 1484#endif
1479} 1485}
1480#endif 1486#endif
1481 1487
1482void QRegExpEngine::setup( bool caseSensitive ) 1488void QRegExpEngine::setup( bool caseSensitive )
1483{ 1489{
1484#ifndef QT_NO_REGEXP_OPTIM 1490#ifndef QT_NO_REGEXP_OPTIM
1485 if ( engCount++ == 0 ) { 1491 if ( engCount++ == 0 ) {
1486 noOccurrences = new QArray<int>( NumBadChars ); 1492 noOccurrences = new QArray<int>( NumBadChars );
1487 firstOccurrenceAtZero = new QArray<int>( NumBadChars ); 1493 firstOccurrenceAtZero = new QArray<int>( NumBadChars );
1488 noOccurrences->fill( NoOccurrence ); 1494 noOccurrences->fill( NoOccurrence );
1489 firstOccurrenceAtZero->fill( 0 ); 1495 firstOccurrenceAtZero->fill( 0 );
1490 } 1496 }
1491#endif 1497#endif
1492 s.setAutoDelete( TRUE ); 1498 s.setAutoDelete( TRUE );
1493 s.resize( 32 ); 1499 s.resize( 32 );
1494 ns = 0; 1500 ns = 0;
1495#ifndef QT_NO_REGEXP_CAPTURE 1501#ifndef QT_NO_REGEXP_CAPTURE
@@ -1521,13 +1527,13 @@ void QRegExpEngine::setup( bool caseSensitive )
1521 mmCapturedNoMatch.fill( -1, 2 ); 1527 mmCapturedNoMatch.fill( -1, 2 );
1522} 1528}
1523 1529
1524int QRegExpEngine::setupState( int match ) 1530int QRegExpEngine::setupState( int match )
1525{ 1531{
1526 if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() ) 1532 if ( (ns & (ns + 1)) == 0 && ns + 1 >= (int) s.size() )
1527 s.resize( (ns + 1) << 1 ); 1533 s.resize( (ns + 1) << 1 );
1528#ifndef QT_NO_REGEXP_CAPTURE 1534#ifndef QT_NO_REGEXP_CAPTURE
1529 s.insert( ns, new State(cf, match) ); 1535 s.insert( ns, new State(cf, match) );
1530#else 1536#else
1531 s.insert( ns, new State(match) ); 1537 s.insert( ns, new State(match) );
1532#endif 1538#endif
1533 return ns++; 1539 return ns++;
@@ -1539,13 +1545,13 @@ int QRegExpEngine::setupState( int match )
1539 When a state is created, it is assigned to the current atom. The information 1545 When a state is created, it is assigned to the current atom. The information
1540 is later used for capturing. 1546 is later used for capturing.
1541*/ 1547*/
1542int QRegExpEngine::startAtom( bool capture ) 1548int QRegExpEngine::startAtom( bool capture )
1543{ 1549{
1544 if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() ) 1550 if ( (nf & (nf + 1)) == 0 && nf + 1 >= (int) f.size() )
1545 f.resize( (nf + 1) << 1 ); 1551 f.resize( (nf + 1) << 1 );
1546 f[nf].parent = cf; 1552 f[nf].parent = cf;
1547 cf = nf++; 1553 cf = nf++;
1548 f[cf].capture = capture ? ncap++ : -1; 1554 f[cf].capture = capture ? ncap++ : -1;
1549 return cf; 1555 return cf;
1550} 1556}
1551#endif 1557#endif
@@ -1555,35 +1561,35 @@ int QRegExpEngine::startAtom( bool capture )
1555 Creates a lookahead anchor. 1561 Creates a lookahead anchor.
1556*/ 1562*/
1557int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative ) 1563int QRegExpEngine::addLookahead( QRegExpEngine *eng, bool negative )
1558{ 1564{
1559 int n = ahead.size(); 1565 int n = ahead.size();
1560 if ( n == MaxLookaheads ) { 1566 if ( n == MaxLookaheads ) {
1561 yyError = TRUE; 1567 yyError = TRUE;
1562 return 0; 1568 return 0;
1563 } 1569 }
1564 ahead.resize( n + 1 ); 1570 ahead.resize( n + 1 );
1565 ahead.insert( n, new Lookahead(eng, negative) ); 1571 ahead.insert( n, new Lookahead(eng, negative) );
1566 return Anchor_FirstLookahead << n; 1572 return Anchor_FirstLookahead << n;
1567} 1573}
1568#endif 1574#endif
1569 1575
1570#ifndef QT_NO_REGEXP_CAPTURE 1576#ifndef QT_NO_REGEXP_CAPTURE
1571/* 1577/*
1572 We want the longest leftmost captures. 1578 We want the longest leftmost captures.
1573*/ 1579*/
1574bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1, 1580bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
1575 const int *begin2, const int *end2 ) 1581 const int *begin2, const int *end2 )
1576{ 1582{
1577 for ( int i = 0; i < ncap; i++ ) { 1583 for ( int i = 0; i < ncap; i++ ) {
1578 int delta = begin2[i] - begin1[i]; // it has to start early... 1584 int delta = begin2[i] - begin1[i]; // it has to start early...
1579 if ( delta == 0 ) 1585 if ( delta == 0 )
1580 delta = end1[i] - end2[i]; // ...and end late (like a party) 1586 delta = end1[i] - end2[i]; // ...and end late (like a party)
1581 1587
1582 if ( delta != 0 ) 1588 if ( delta != 0 )
1583 return delta > 0; 1589 return delta > 0;
1584 } 1590 }
1585 return FALSE; 1591 return FALSE;
1586} 1592}
1587#endif 1593#endif
1588 1594
1589/* 1595/*
@@ -1593,61 +1599,61 @@ bool QRegExpEngine::isBetterCapture( const int *begin1, const int *end1,
1593bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin ) 1599bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
1594{ 1600{
1595 int j; 1601 int j;
1596 1602
1597#ifndef QT_NO_REGEXP_ANCHOR_ALT 1603#ifndef QT_NO_REGEXP_ANCHOR_ALT
1598 if ( (a & Anchor_Alternation) != 0 ) { 1604 if ( (a & Anchor_Alternation) != 0 ) {
1599 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) || 1605 return testAnchor( i, aa[a ^ Anchor_Alternation].a, capBegin ) ||
1600 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin ); 1606 testAnchor( i, aa[a ^ Anchor_Alternation].b, capBegin );
1601 } 1607 }
1602#endif 1608#endif
1603 1609
1604 if ( (a & Anchor_Caret) != 0 ) { 1610 if ( (a & Anchor_Caret) != 0 ) {
1605 if ( mmPos + i != 0 ) 1611 if ( mmPos + i != 0 )
1606 return FALSE; 1612 return FALSE;
1607 } 1613 }
1608 if ( (a & Anchor_Dollar) != 0 ) { 1614 if ( (a & Anchor_Dollar) != 0 ) {
1609 if ( mmPos + i != mmLen ) 1615 if ( mmPos + i != mmLen )
1610 return FALSE; 1616 return FALSE;
1611 } 1617 }
1612#ifndef QT_NO_REGEXP_ESCAPE 1618#ifndef QT_NO_REGEXP_ESCAPE
1613 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) { 1619 if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
1614 bool before = FALSE, after = FALSE; 1620 bool before = FALSE, after = FALSE;
1615 if ( mmPos + i != 0 ) 1621 if ( mmPos + i != 0 )
1616 before = mmIn[mmPos + i - 1].isLetterOrNumber(); 1622 before = mmIn[mmPos + i - 1].isLetterOrNumber();
1617 if ( mmPos + i != mmLen ) 1623 if ( mmPos + i != mmLen )
1618 after = mmIn[mmPos + i].isLetterOrNumber(); 1624 after = mmIn[mmPos + i].isLetterOrNumber();
1619 if ( (a & Anchor_Word) != 0 && (before == after) ) 1625 if ( (a & Anchor_Word) != 0 && (before == after) )
1620 return FALSE; 1626 return FALSE;
1621 if ( (a & Anchor_NonWord) != 0 && (before != after) ) 1627 if ( (a & Anchor_NonWord) != 0 && (before != after) )
1622 return FALSE; 1628 return FALSE;
1623 } 1629 }
1624#endif 1630#endif
1625#ifndef QT_NO_REGEXP_LOOKAHEAD 1631#ifndef QT_NO_REGEXP_LOOKAHEAD
1626 bool catchx = TRUE; 1632 bool catchx = TRUE;
1627 1633
1628 if ( (a & Anchor_LookaheadMask) != 0 ) { 1634 if ( (a & Anchor_LookaheadMask) != 0 ) {
1629 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i, 1635 QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
1630 mmLen - mmPos - i ); 1636 mmLen - mmPos - i );
1631 for ( j = 0; j < (int) ahead.size(); j++ ) { 1637 for ( j = 0; j < (int) ahead.size(); j++ ) {
1632 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) { 1638 if ( (a & (Anchor_FirstLookahead << j)) != 0 ) {
1633 catchx = ( ahead[j]->eng->match(cstr.string(), 0, TRUE, 1639 catchx = ( ahead[j]->eng->match(cstr.string(), 0, TRUE,
1634 TRUE)[0] == 0 ); 1640 TRUE)[0] == 0 );
1635 if ( catchx == ahead[j]->neg ) 1641 if ( catchx == ahead[j]->neg )
1636 return FALSE; 1642 return FALSE;
1637 } 1643 }
1638 } 1644 }
1639 } 1645 }
1640#endif 1646#endif
1641#ifndef QT_NO_REGEXP_CAPTURE 1647#ifndef QT_NO_REGEXP_CAPTURE
1642#ifndef QT_NO_REGEXP_BACKREF 1648#ifndef QT_NO_REGEXP_BACKREF
1643 for ( j = 0; j < nbrefs; j++ ) { 1649 for ( j = 0; j < nbrefs; j++ ) {
1644 if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) { 1650 if ( (a & (Anchor_BackRef1Empty << j)) != 0 ) {
1645 if ( capBegin[j] != EmptyCapture ) 1651 if ( capBegin[j] != EmptyCapture )
1646 return FALSE; 1652 return FALSE;
1647 } 1653 }
1648 } 1654 }
1649#endif 1655#endif
1650#endif 1656#endif
1651 return TRUE; 1657 return TRUE;
1652} 1658}
1653 1659
@@ -1660,23 +1666,23 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
1660 1666
1661bool QRegExpEngine::goodStringMatch() 1667bool QRegExpEngine::goodStringMatch()
1662{ 1668{
1663 int k = mmPos + goodEarlyStart; 1669 int k = mmPos + goodEarlyStart;
1664 1670
1665 while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) { 1671 while ( (k = mmStr->find(goodStr, k, cs)) != -1 ) {
1666 int from = k - goodLateStart; 1672 int from = k - goodLateStart;
1667 int to = k - goodEarlyStart; 1673 int to = k - goodEarlyStart;
1668 if ( from > mmPos ) 1674 if ( from > mmPos )
1669 mmPos = from; 1675 mmPos = from;
1670 1676
1671 while ( mmPos <= to ) { 1677 while ( mmPos <= to ) {
1672 if ( matchHere() ) 1678 if ( matchHere() )
1673 return TRUE; 1679 return TRUE;
1674 mmPos++; 1680 mmPos++;
1675 } 1681 }
1676 k++; 1682 k++;
1677 } 1683 }
1678 return FALSE; 1684 return FALSE;
1679} 1685}
1680 1686
1681bool QRegExpEngine::badCharMatch() 1687bool QRegExpEngine::badCharMatch()
1682{ 1688{
@@ -1688,70 +1694,70 @@ bool QRegExpEngine::badCharMatch()
1688 1694
1689 /* 1695 /*
1690 Set up the slide table, used for the bad-character heuristic, using 1696 Set up the slide table, used for the bad-character heuristic, using
1691 the table of first occurrence of each character. 1697 the table of first occurrence of each character.
1692 */ 1698 */
1693 for ( i = 0; i < minl; i++ ) { 1699 for ( i = 0; i < minl; i++ ) {
1694 int sk = occ1[BadChar(mmIn[mmPos + i])]; 1700 int sk = occ1[BadChar(mmIn[mmPos + i])];
1695 if ( sk == NoOccurrence ) 1701 if ( sk == NoOccurrence )
1696 sk = i + 1; 1702 sk = i + 1;
1697 if ( sk > 0 ) { 1703 if ( sk > 0 ) {
1698 int k = i + 1 - sk; 1704 int k = i + 1 - sk;
1699 if ( k < 0 ) { 1705 if ( k < 0 ) {
1700 sk = i + 1; 1706 sk = i + 1;
1701 k = 0; 1707 k = 0;
1702 } 1708 }
1703 if ( sk > mmSlideTab[k] ) 1709 if ( sk > mmSlideTab[k] )
1704 mmSlideTab[k] = sk; 1710 mmSlideTab[k] = sk;
1705 } 1711 }
1706 } 1712 }
1707 1713
1708 if ( mmPos > lastPos ) 1714 if ( mmPos > lastPos )
1709 return FALSE; 1715 return FALSE;
1710 1716
1711 while ( TRUE ) { 1717 while ( TRUE ) {
1712 if ( ++slideNext >= mmSlideTabSize ) 1718 if ( ++slideNext >= mmSlideTabSize )
1713 slideNext = 0; 1719 slideNext = 0;
1714 if ( mmSlideTab[slideHead] > 0 ) { 1720 if ( mmSlideTab[slideHead] > 0 ) {
1715 if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] ) 1721 if ( mmSlideTab[slideHead] - 1 > mmSlideTab[slideNext] )
1716 mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1; 1722 mmSlideTab[slideNext] = mmSlideTab[slideHead] - 1;
1717 mmSlideTab[slideHead] = 0; 1723 mmSlideTab[slideHead] = 0;
1718 } else { 1724 } else {
1719 if ( matchHere() ) 1725 if ( matchHere() )
1720 return TRUE; 1726 return TRUE;
1721 } 1727 }
1722 1728
1723 if ( mmPos == lastPos ) 1729 if ( mmPos == lastPos )
1724 break; 1730 break;
1725 1731
1726 /* 1732 /*
1727 Update the slide table. This code has much in common with the 1733 Update the slide table. This code has much in common with the
1728 initialization code. 1734 initialization code.
1729 */ 1735 */
1730 int sk = occ1[BadChar(mmIn[mmPos + minl])]; 1736 int sk = occ1[BadChar(mmIn[mmPos + minl])];
1731 if ( sk == NoOccurrence ) { 1737 if ( sk == NoOccurrence ) {
1732 mmSlideTab[slideNext] = minl; 1738 mmSlideTab[slideNext] = minl;
1733 } else if ( sk > 0 ) { 1739 } else if ( sk > 0 ) {
1734 int k = slideNext + minl - sk; 1740 int k = slideNext + minl - sk;
1735 if ( k >= mmSlideTabSize ) 1741 if ( k >= mmSlideTabSize )
1736 k -= mmSlideTabSize; 1742 k -= mmSlideTabSize;
1737 if ( sk > mmSlideTab[k] ) 1743 if ( sk > mmSlideTab[k] )
1738 mmSlideTab[k] = sk; 1744 mmSlideTab[k] = sk;
1739 } 1745 }
1740 slideHead = slideNext; 1746 slideHead = slideNext;
1741 mmPos++; 1747 mmPos++;
1742 } 1748 }
1743 return FALSE; 1749 return FALSE;
1744} 1750}
1745#else 1751#else
1746bool QRegExpEngine::bruteMatch() 1752bool QRegExpEngine::bruteMatch()
1747{ 1753{
1748 while ( mmPos <= mmLen ) { 1754 while ( mmPos <= mmLen ) {
1749 if ( matchHere() ) 1755 if ( matchHere() )
1750 return TRUE; 1756 return TRUE;
1751 mmPos++; 1757 mmPos++;
1752 } 1758 }
1753 return FALSE; 1759 return FALSE;
1754} 1760}
1755#endif 1761#endif
1756 1762
1757/* 1763/*
@@ -1765,343 +1771,343 @@ bool QRegExpEngine::matchHere()
1765 1771
1766 mmMatchedLen = -1; 1772 mmMatchedLen = -1;
1767 mmCurStack[0] = InitialState; 1773 mmCurStack[0] = InitialState;
1768 1774
1769#ifndef QT_NO_REGEXP_CAPTURE 1775#ifndef QT_NO_REGEXP_CAPTURE
1770 if ( ncap > 0 ) { 1776 if ( ncap > 0 ) {
1771 for ( j = 0; j < ncap; j++ ) { 1777 for ( j = 0; j < ncap; j++ ) {
1772 mmCurCapBegin[j] = EmptyCapture; 1778 mmCurCapBegin[j] = EmptyCapture;
1773 mmCurCapEnd[j] = EmptyCapture; 1779 mmCurCapEnd[j] = EmptyCapture;
1774 } 1780 }
1775 } 1781 }
1776#endif 1782#endif
1777 1783
1778#ifndef QT_NO_REGEXP_BACKREF 1784#ifndef QT_NO_REGEXP_BACKREF
1779 int *zzZ = 0; 1785 int *zzZ = 0;
1780 1786
1781 while ( (ncur > 0 || mmSleeping.count() > 0) && i <= mmLen - mmPos && 1787 while ( (ncur > 0 || mmSleeping.count() > 0) && i <= mmLen - mmPos &&
1782 !match ) 1788 !match )
1783#else 1789#else
1784 while ( ncur > 0 && i <= mmLen - mmPos && !match ) 1790 while ( ncur > 0 && i <= mmLen - mmPos && !match )
1785#endif 1791#endif
1786 { 1792 {
1787 int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0; 1793 int ch = ( i < mmLen - mmPos ) ? mmIn[mmPos + i].unicode() : 0;
1788 for ( j = 0; j < ncur; j++ ) { 1794 for ( j = 0; j < ncur; j++ ) {
1789 int cur = mmCurStack[j]; 1795 int cur = mmCurStack[j];
1790 State *scur = s[cur]; 1796 State *scur = s[cur];
1791 QArray<int>& outs = scur->outs; 1797 QArray<int>& outs = scur->outs;
1792 for ( k = 0; k < (int) outs.size(); k++ ) { 1798 for ( k = 0; k < (int) outs.size(); k++ ) {
1793 int next = outs[k]; 1799 int next = outs[k];
1794 State *snext = s[next]; 1800 State *snext = s[next];
1795 bool in = TRUE; 1801 bool in = TRUE;
1796#ifndef QT_NO_REGEXP_BACKREF 1802#ifndef QT_NO_REGEXP_BACKREF
1797 int needSomeSleep = 0; 1803 int needSomeSleep = 0;
1798#endif 1804#endif
1799 1805
1800 /* 1806 /*
1801 First, check if the anchors are anchored properly. 1807 First, check if the anchors are anchored properly.
1802 */ 1808 */
1803 if ( scur->anchors != 0 ) { 1809 if ( scur->anchors != 0 ) {
1804 int a = at( *scur->anchors, next ); 1810 int a = at( *scur->anchors, next );
1805 if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) ) 1811 if ( a != 0 && !testAnchor(i, a, mmCurCapBegin + j * ncap) )
1806 in = FALSE; 1812 in = FALSE;
1807 } 1813 }
1808 /* 1814 /*
1809 If indeed they are, check if the input character is correct 1815 If indeed they are, check if the input character is correct
1810 for this transition. 1816 for this transition.
1811 */ 1817 */
1812 if ( in ) { 1818 if ( in ) {
1813 m = snext->match; 1819 m = snext->match;
1814 if ( (m & (CharClassBit | BackRefBit)) == 0 ) { 1820 if ( (m & (CharClassBit | BackRefBit)) == 0 ) {
1815 if ( cs ) 1821 if ( cs )
1816 in = ( m == ch ); 1822 in = ( m == ch );
1817 else 1823 else
1818 in = ( QChar(m).lower() == QChar(ch).lower() ); 1824 in = ( QChar(m).lower() == QChar(ch).lower() );
1819 } else if ( next == FinalState ) { 1825 } else if ( next == FinalState ) {
1820 mmMatchedLen = i; 1826 mmMatchedLen = i;
1821 match = mmMinimal; 1827 match = mmMinimal;
1822 in = TRUE; 1828 in = TRUE;
1823 } else if ( (m & CharClassBit) != 0 ) { 1829 } else if ( (m & CharClassBit) != 0 ) {
1824#ifndef QT_NO_REGEXP_CCLASS 1830#ifndef QT_NO_REGEXP_CCLASS
1825 const CharClass *cc = cl[m ^ CharClassBit]; 1831 const CharClass *cc = cl[m ^ CharClassBit];
1826 if ( cs ) 1832 if ( cs )
1827 in = cc->in( ch ); 1833 in = cc->in( ch );
1828 else if ( cc->negative() ) 1834 else if ( cc->negative() )
1829 in = cc->in( QChar(ch).lower() ) && 1835 in = cc->in( QChar(ch).lower() ) &&
1830 cc->in( QChar(ch).upper() ); 1836 cc->in( QChar(ch).upper() );
1831 else 1837 else
1832 in = cc->in( QChar(ch).lower() ) || 1838 in = cc->in( QChar(ch).lower() ) ||
1833 cc->in( QChar(ch).upper() ); 1839 cc->in( QChar(ch).upper() );
1834#endif 1840#endif
1835#ifndef QT_NO_REGEXP_BACKREF 1841#ifndef QT_NO_REGEXP_BACKREF
1836 } else { /* ( (m & BackRefBit) != 0 ) */ 1842 } else { /* ( (m & BackRefBit) != 0 ) */
1837 int bref = m ^ BackRefBit; 1843 int bref = m ^ BackRefBit;
1838 int ell = j * ncap + ( bref - 1 ); 1844 int ell = j * ncap + ( bref - 1 );
1839 1845
1840 in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture; 1846 in = bref <= ncap && mmCurCapBegin[ell] != EmptyCapture;
1841 if ( in ) { 1847 if ( in ) {
1842 if ( cs ) 1848 if ( cs )
1843 in = ( mmIn[mmPos + mmCurCapBegin[ell]] 1849 in = ( mmIn[mmPos + mmCurCapBegin[ell]]
1844 == QChar(ch) ); 1850 == QChar(ch) );
1845 else 1851 else
1846 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower() 1852 in = ( mmIn[mmPos + mmCurCapBegin[ell]].lower()
1847 == QChar(ch).lower() ); 1853 == QChar(ch).lower() );
1848 } 1854 }
1849 1855
1850 if ( in ) { 1856 if ( in ) {
1851 int delta; 1857 int delta;
1852 if ( mmCurCapEnd[ell] == EmptyCapture ) 1858 if ( mmCurCapEnd[ell] == EmptyCapture )
1853 delta = i - mmCurCapBegin[ell]; 1859 delta = i - mmCurCapBegin[ell];
1854 else 1860 else
1855 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell]; 1861 delta = mmCurCapEnd[ell] - mmCurCapBegin[ell];
1856 1862
1857 in = ( delta <= mmLen - mmPos ); 1863 in = ( delta <= mmLen - mmPos );
1858 if ( in && delta > 1 ) { 1864 if ( in && delta > 1 ) {
1859 int n; 1865 int n;
1860 if ( cs ) { 1866 if ( cs ) {
1861 for ( n = 1; n < delta; n++ ) { 1867 for ( n = 1; n < delta; n++ ) {
1862 if ( mmIn[mmPos + 1868 if ( mmIn[mmPos +
1863 mmCurCapBegin[ell] + n] != 1869 mmCurCapBegin[ell] + n] !=
1864 mmIn[mmPos + i + n] ) 1870 mmIn[mmPos + i + n] )
1865 break; 1871 break;
1866 } 1872 }
1867 } else { 1873 } else {
1868 for ( n = 1; n < delta; n++ ) { 1874 for ( n = 1; n < delta; n++ ) {
1869 QChar a = mmIn[mmPos + 1875 QChar a = mmIn[mmPos +
1870 mmCurCapBegin[ell] + n]; 1876 mmCurCapBegin[ell] + n];
1871 QChar b = mmIn[mmPos + i + n]; 1877 QChar b = mmIn[mmPos + i + n];
1872 if ( a.lower() != b.lower() ) 1878 if ( a.lower() != b.lower() )
1873 break; 1879 break;
1874 } 1880 }
1875 } 1881 }
1876 in = ( n == delta ); 1882 in = ( n == delta );
1877 if ( in ) 1883 if ( in )
1878 needSomeSleep = delta - 1; 1884 needSomeSleep = delta - 1;
1879 } 1885 }
1880 } 1886 }
1881#endif 1887#endif
1882 } 1888 }
1883 } 1889 }
1884 1890
1885 /* 1891 /*
1886 All is right. We must now update our data structures. 1892 All is right. We must now update our data structures.
1887 */ 1893 */
1888 if ( in ) { 1894 if ( in ) {
1889#ifndef QT_NO_REGEXP_CAPTURE 1895#ifndef QT_NO_REGEXP_CAPTURE
1890 int *capBegin, *capEnd; 1896 int *capBegin, *capEnd;
1891#endif 1897#endif
1892 /* 1898 /*
1893 If the next state was not encountered yet, all is fine. 1899 If the next state was not encountered yet, all is fine.
1894 */ 1900 */
1895 if ( (m = mmInNextStack[next]) == -1 ) { 1901 if ( (m = mmInNextStack[next]) == -1 ) {
1896 m = nnext++; 1902 m = nnext++;
1897 mmNextStack[m] = next; 1903 mmNextStack[m] = next;
1898 mmInNextStack[next] = m; 1904 mmInNextStack[next] = m;
1899#ifndef QT_NO_REGEXP_CAPTURE 1905#ifndef QT_NO_REGEXP_CAPTURE
1900 capBegin = mmNextCapBegin + m * ncap; 1906 capBegin = mmNextCapBegin + m * ncap;
1901 capEnd = mmNextCapEnd + m * ncap; 1907 capEnd = mmNextCapEnd + m * ncap;
1902 1908
1903 /* 1909 /*
1904 Otherwise, we'll first maintain captures in temporary 1910 Otherwise, we'll first maintain captures in temporary
1905 arrays, and decide at the end whether it's best to keep 1911 arrays, and decide at the end whether it's best to keep
1906 the previous capture zones or the new ones. 1912 the previous capture zones or the new ones.
1907 */ 1913 */
1908 } else { 1914 } else {
1909 capBegin = mmTempCapBegin; 1915 capBegin = mmTempCapBegin;
1910 capEnd = mmTempCapEnd; 1916 capEnd = mmTempCapEnd;
1911#endif 1917#endif
1912 } 1918 }
1913 1919
1914#ifndef QT_NO_REGEXP_CAPTURE 1920#ifndef QT_NO_REGEXP_CAPTURE
1915 /* 1921 /*
1916 Updating the capture zones is much of a task. 1922 Updating the capture zones is much of a task.
1917 */ 1923 */
1918 if ( ncap > 0 ) { 1924 if ( ncap > 0 ) {
1919 memcpy( capBegin, mmCurCapBegin + j * ncap, 1925 memcpy( capBegin, mmCurCapBegin + j * ncap,
1920 ncap * sizeof(int) ); 1926 ncap * sizeof(int) );
1921 memcpy( capEnd, mmCurCapEnd + j * ncap, 1927 memcpy( capEnd, mmCurCapEnd + j * ncap,
1922 ncap * sizeof(int) ); 1928 ncap * sizeof(int) );
1923 int c = scur->atom, n = snext->atom; 1929 int c = scur->atom, n = snext->atom;
1924 int p = -1, q = -1; 1930 int p = -1, q = -1;
1925 int cap; 1931 int cap;
1926 1932
1927 /* 1933 /*
1928 Lemma 1. For any x in the range [0..nf), we have 1934 Lemma 1. For any x in the range [0..nf), we have
1929 f[x].parent < x. 1935 f[x].parent < x.
1930 1936
1931 Proof. By looking at startAtom(), it is clear that 1937 Proof. By looking at startAtom(), it is clear that
1932 cf < nf holds all the time, and thus that 1938 cf < nf holds all the time, and thus that
1933 f[nf].parent < nf. 1939 f[nf].parent < nf.
1934 */ 1940 */
1935 1941
1936 /* 1942 /*
1937 If we are reentering an atom, we empty all capture 1943 If we are reentering an atom, we empty all capture
1938 zones inside it. 1944 zones inside it.
1939 */ 1945 */
1940 if ( scur->reenter != 0 && 1946 if ( scur->reenter != 0 &&
1941 (q = at(*scur->reenter, next)) != 0 ) { 1947 (q = at(*scur->reenter, next)) != 0 ) {
1942 QBitArray b; 1948 QBitArray b;
1943 b.fill( FALSE, nf ); 1949 b.fill( FALSE, nf );
1944 b.setBit( q, TRUE ); 1950 b.setBit( q, TRUE );
1945 for ( int ell = q + 1; ell < nf; ell++ ) { 1951 for ( int ell = q + 1; ell < nf; ell++ ) {
1946 if ( b.testBit(f[ell].parent) ) { 1952 if ( b.testBit(f[ell].parent) ) {
1947 b.setBit( ell, TRUE ); 1953 b.setBit( ell, TRUE );
1948 cap = f[ell].capture; 1954 cap = f[ell].capture;
1949 if ( cap >= 0 ) { 1955 if ( cap >= 0 ) {
1950 capBegin[cap] = EmptyCapture; 1956 capBegin[cap] = EmptyCapture;
1951 capEnd[cap] = EmptyCapture; 1957 capEnd[cap] = EmptyCapture;
1952 } 1958 }
1953 } 1959 }
1954 } 1960 }
1955 p = f[q].parent; 1961 p = f[q].parent;
1956 1962
1957 /* 1963 /*
1958 Otherwise, close the capture zones we are leaving. 1964 Otherwise, close the capture zones we are leaving.
1959 We are leaving f[c].capture, f[f[c].parent].capture, 1965 We are leaving f[c].capture, f[f[c].parent].capture,
1960 f[f[f[c].parent].parent].capture, ..., until 1966 f[f[f[c].parent].parent].capture, ..., until
1961 f[x].capture, with x such that f[x].parent is the 1967 f[x].capture, with x such that f[x].parent is the
1962 youngest common ancestor for c and n. 1968 youngest common ancestor for c and n.
1963 1969
1964 We go up along c's and n's ancestry until we find x. 1970 We go up along c's and n's ancestry until we find x.
1965 */ 1971 */
1966 } else { 1972 } else {
1967 p = c; 1973 p = c;
1968 q = n; 1974 q = n;
1969 while ( p != q ) { 1975 while ( p != q ) {
1970 if ( p > q ) { 1976 if ( p > q ) {
1971 cap = f[p].capture; 1977 cap = f[p].capture;
1972 if ( cap >= 0 ) { 1978 if ( cap >= 0 ) {
1973 if ( capBegin[cap] == i ) { 1979 if ( capBegin[cap] == i ) {
1974 capBegin[cap] = EmptyCapture; 1980 capBegin[cap] = EmptyCapture;
1975 capEnd[cap] = EmptyCapture; 1981 capEnd[cap] = EmptyCapture;
1976 } else { 1982 } else {
1977 capEnd[cap] = i; 1983 capEnd[cap] = i;
1978 } 1984 }
1979 } 1985 }
1980 p = f[p].parent; 1986 p = f[p].parent;
1981 } else { 1987 } else {
1982 q = f[q].parent; 1988 q = f[q].parent;
1983 } 1989 }
1984 } 1990 }
1985 } 1991 }
1986 1992
1987 /* 1993 /*
1988 In any case, we now open the capture zones we are 1994 In any case, we now open the capture zones we are
1989 entering. We work upwards from n until we reach p 1995 entering. We work upwards from n until we reach p
1990 (the parent of the atom we reenter or the youngest 1996 (the parent of the atom we reenter or the youngest
1991 common ancestor). 1997 common ancestor).
1992 */ 1998 */
1993 while ( n > p ) { 1999 while ( n > p ) {
1994 cap = f[n].capture; 2000 cap = f[n].capture;
1995 if ( cap >= 0 ) { 2001 if ( cap >= 0 ) {
1996 capBegin[cap] = i; 2002 capBegin[cap] = i;
1997 capEnd[cap] = EmptyCapture; 2003 capEnd[cap] = EmptyCapture;
1998 } 2004 }
1999 n = f[n].parent; 2005 n = f[n].parent;
2000 } 2006 }
2001 /* 2007 /*
2002 If the next state was already in mmNextStack, we must 2008 If the next state was already in mmNextStack, we must
2003 choose carefully which capture zones we want to keep. 2009 choose carefully which capture zones we want to keep.
2004 */ 2010 */
2005 if ( capBegin == mmTempCapBegin && 2011 if ( capBegin == mmTempCapBegin &&
2006 isBetterCapture(capBegin, capEnd, 2012 isBetterCapture(capBegin, capEnd,
2007 mmNextCapBegin + m * ncap, 2013 mmNextCapBegin + m * ncap,
2008 mmNextCapEnd + m * ncap) ) { 2014 mmNextCapEnd + m * ncap) ) {
2009 memcpy( mmNextCapBegin + m * ncap, capBegin, 2015 memcpy( mmNextCapBegin + m * ncap, capBegin,
2010 ncap * sizeof(int) ); 2016 ncap * sizeof(int) );
2011 memcpy( mmNextCapEnd + m * ncap, capEnd, 2017 memcpy( mmNextCapEnd + m * ncap, capEnd,
2012 ncap * sizeof(int) ); 2018 ncap * sizeof(int) );
2013 } 2019 }
2014 } 2020 }
2015#ifndef QT_NO_REGEXP_BACKREF 2021#ifndef QT_NO_REGEXP_BACKREF
2016 /* 2022 /*
2017 We are done with updating the capture zones. It's now 2023 We are done with updating the capture zones. It's now
2018 time to put the next state to sleep, if it needs to, and 2024 time to put the next state to sleep, if it needs to, and
2019 to remove it from mmNextStack. 2025 to remove it from mmNextStack.
2020 */ 2026 */
2021 if ( needSomeSleep > 0 ) { 2027 if ( needSomeSleep > 0 ) {
2022 zzZ = new int[1 + 2 * ncap]; 2028 zzZ = new int[1 + 2 * ncap];
2023 zzZ[0] = next; 2029 zzZ[0] = next;
2024 if ( ncap > 0 ) { 2030 if ( ncap > 0 ) {
2025 memcpy( zzZ + 1, capBegin, ncap * sizeof(int) ); 2031 memcpy( zzZ + 1, capBegin, ncap * sizeof(int) );
2026 memcpy( zzZ + 1 + ncap, capEnd, 2032 memcpy( zzZ + 1 + ncap, capEnd,
2027 ncap * sizeof(int) ); 2033 ncap * sizeof(int) );
2028 } 2034 }
2029 mmInNextStack[mmNextStack[--nnext]] = -1; 2035 mmInNextStack[mmNextStack[--nnext]] = -1;
2030 mmSleeping.insert( i + needSomeSleep, zzZ ); 2036 mmSleeping.insert( i + needSomeSleep, zzZ );
2031 } 2037 }
2032#endif 2038#endif
2033#endif 2039#endif
2034 } 2040 }
2035 } 2041 }
2036 } 2042 }
2037#ifndef QT_NO_REGEXP_CAPTURE 2043#ifndef QT_NO_REGEXP_CAPTURE
2038 /* 2044 /*
2039 If we reached the final state, hurray! Copy the captured zone. 2045 If we reached the final state, hurray! Copy the captured zone.
2040 */ 2046 */
2041 if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) { 2047 if ( ncap > 0 && (m = mmInNextStack[FinalState]) != -1 ) {
2042 memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) ); 2048 memcpy( mmCapBegin, mmNextCapBegin + m * ncap, ncap * sizeof(int) );
2043 memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) ); 2049 memcpy( mmCapEnd, mmNextCapEnd + m * ncap, ncap * sizeof(int) );
2044 } 2050 }
2045#ifndef QT_NO_REGEXP_BACKREF 2051#ifndef QT_NO_REGEXP_BACKREF
2046 /* 2052 /*
2047 It's time to wake up the sleepers. 2053 It's time to wake up the sleepers.
2048 */ 2054 */
2049 if ( mmSleeping.count() > 0 ) { 2055 if ( mmSleeping.count() > 0 ) {
2050 while ( (zzZ = mmSleeping.take(i)) != 0 ) { 2056 while ( (zzZ = mmSleeping.take(i)) != 0 ) {
2051 int next = zzZ[0]; 2057 int next = zzZ[0];
2052 int *capBegin = zzZ + 1; 2058 int *capBegin = zzZ + 1;
2053 int *capEnd = zzZ + 1 + ncap; 2059 int *capEnd = zzZ + 1 + ncap;
2054 bool copyOver = TRUE; 2060 bool copyOver = TRUE;
2055 2061
2056 if ( (m = mmInNextStack[zzZ[0]]) == -1 ) { 2062 if ( (m = mmInNextStack[zzZ[0]]) == -1 ) {
2057 m = nnext++; 2063 m = nnext++;
2058 mmNextStack[m] = next; 2064 mmNextStack[m] = next;
2059 mmInNextStack[next] = m; 2065 mmInNextStack[next] = m;
2060 } else { 2066 } else {
2061 copyOver = isBetterCapture( mmNextCapBegin + m * ncap, 2067 copyOver = isBetterCapture( mmNextCapBegin + m * ncap,
2062 mmNextCapEnd + m * ncap, 2068 mmNextCapEnd + m * ncap,
2063 capBegin, capEnd ); 2069 capBegin, capEnd );
2064 } 2070 }
2065 if ( copyOver ) { 2071 if ( copyOver ) {
2066 memcpy( mmNextCapBegin + m * ncap, capBegin, 2072 memcpy( mmNextCapBegin + m * ncap, capBegin,
2067 ncap * sizeof(int) ); 2073 ncap * sizeof(int) );
2068 memcpy( mmNextCapEnd + m * ncap, capEnd, 2074 memcpy( mmNextCapEnd + m * ncap, capEnd,
2069 ncap * sizeof(int) ); 2075 ncap * sizeof(int) );
2070 } 2076 }
2071 delete[] zzZ; 2077 delete[] zzZ;
2072 } 2078 }
2073 } 2079 }
2074#endif 2080#endif
2075#endif 2081#endif
2076 for ( j = 0; j < nnext; j++ ) 2082 for ( j = 0; j < nnext; j++ )
2077 mmInNextStack[mmNextStack[j]] = -1; 2083 mmInNextStack[mmNextStack[j]] = -1;
2078 2084
2079 qSwap( mmCurStack, mmNextStack ); 2085 qSwap( mmCurStack, mmNextStack );
2080#ifndef QT_NO_REGEXP_CAPTURE 2086#ifndef QT_NO_REGEXP_CAPTURE
2081 qSwap( mmCurCapBegin, mmNextCapBegin ); 2087 qSwap( mmCurCapBegin, mmNextCapBegin );
2082 qSwap( mmCurCapEnd, mmNextCapEnd ); 2088 qSwap( mmCurCapEnd, mmNextCapEnd );
2083#endif 2089#endif
2084 ncur = nnext; 2090 ncur = nnext;
2085 nnext = 0; 2091 nnext = 0;
2086 i++; 2092 i++;
2087 } 2093 }
2088 2094
2089#ifndef QT_NO_REGEXP_BACKREF 2095#ifndef QT_NO_REGEXP_BACKREF
2090 /* 2096 /*
2091 If minimal matching is enabled, we might have some sleepers left. 2097 If minimal matching is enabled, we might have some sleepers left.
2092 */ 2098 */
2093 while ( !mmSleeping.isEmpty() ) { 2099 while ( !mmSleeping.isEmpty() ) {
2094 zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) ); 2100 zzZ = mmSleeping.take( *QIntDictIterator<int>(mmSleeping) );
2095 delete[] zzZ; 2101 delete[] zzZ;
2096 } 2102 }
2097#endif 2103#endif
2098 2104
2099 match = ( mmMatchedLen >= 0 ); 2105 match = ( mmMatchedLen >= 0 );
2100 if ( !match ) 2106 if ( !match )
2101 mmMatchedLen = i - 1; 2107 mmMatchedLen = i - 1;
2102 return match; 2108 return match;
2103} 2109}
2104 2110
2105#ifndef QT_NO_REGEXP_CCLASS 2111#ifndef QT_NO_REGEXP_CCLASS
2106 2112
2107QRegExpEngine::CharClass::CharClass() 2113QRegExpEngine::CharClass::CharClass()
@@ -2110,13 +2116,13 @@ QRegExpEngine::CharClass::CharClass()
2110 , occ1( *noOccurrences ) 2116 , occ1( *noOccurrences )
2111#endif 2117#endif
2112{ 2118{
2113} 2119}
2114 2120
2115QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=( 2121QRegExpEngine::CharClass& QRegExpEngine::CharClass::operator=(
2116 const CharClass& cc ) 2122 const CharClass& cc )
2117{ 2123{
2118 c = cc.c; 2124 c = cc.c;
2119 r = cc.r.copy(); 2125 r = cc.r.copy();
2120 n = cc.n; 2126 n = cc.n;
2121#ifndef QT_NO_REGEXP_OPTIM 2127#ifndef QT_NO_REGEXP_OPTIM
2122 occ1 = cc.occ1; 2128 occ1 = cc.occ1;
@@ -2147,65 +2153,65 @@ void QRegExpEngine::CharClass::addCategories( int cats )
2147#endif 2153#endif
2148} 2154}
2149 2155
2150void QRegExpEngine::CharClass::addRange( ushort from, ushort to ) 2156void QRegExpEngine::CharClass::addRange( ushort from, ushort to )
2151{ 2157{
2152 if ( from > to ) 2158 if ( from > to )
2153 qSwap( from, to ); 2159 qSwap( from, to );
2154 int n = r.size(); 2160 int n = r.size();
2155 r.resize( n + 1 ); 2161 r.resize( n + 1 );
2156 r[n].from = from; 2162 r[n].from = from;
2157 r[n].to = to; 2163 r[n].to = to;
2158 2164
2159#ifndef QT_NO_REGEXP_OPTIM 2165#ifndef QT_NO_REGEXP_OPTIM
2160 int i; 2166 int i;
2161 2167
2162 if ( to - from < NumBadChars ) { 2168 if ( to - from < NumBadChars ) {
2163 occ1.detach(); 2169 occ1.detach();
2164 if ( from % NumBadChars <= to % NumBadChars ) { 2170 if ( from % NumBadChars <= to % NumBadChars ) {
2165 for ( i = from % NumBadChars; i <= to % NumBadChars; i++ ) 2171 for ( i = from % NumBadChars; i <= to % NumBadChars; i++ )
2166 occ1[i] = 0; 2172 occ1[i] = 0;
2167 } else {
2168 for ( i = 0; i <= to % NumBadChars; i++ )
2169 occ1[i] = 0;
2170 for ( i = from % NumBadChars; i < NumBadChars; i++ )
2171 occ1[i] = 0;
2172 }
2173 } else { 2173 } else {
2174 occ1 = *firstOccurrenceAtZero; 2174 for ( i = 0; i <= to % NumBadChars; i++ )
2175 occ1[i] = 0;
2176 for ( i = from % NumBadChars; i < NumBadChars; i++ )
2177 occ1[i] = 0;
2178 }
2179 } else {
2180 occ1 = *firstOccurrenceAtZero;
2175 } 2181 }
2176#endif 2182#endif
2177} 2183}
2178 2184
2179bool QRegExpEngine::CharClass::in( QChar ch ) const 2185bool QRegExpEngine::CharClass::in( QChar ch ) const
2180{ 2186{
2181#ifndef QT_NO_REGEXP_OPTIM 2187#ifndef QT_NO_REGEXP_OPTIM
2182 if ( occ1[BadChar(ch)] == NoOccurrence ) 2188 if ( occ1[BadChar(ch)] == NoOccurrence )
2183 return n; 2189 return n;
2184#endif 2190#endif
2185 2191
2186 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 ) 2192 if ( c != 0 && (c & (1 << (int) ch.category())) != 0 )
2187 return !n; 2193 return !n;
2188 for ( int i = 0; i < (int) r.size(); i++ ) { 2194 for ( int i = 0; i < (int) r.size(); i++ ) {
2189 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to ) 2195 if ( ch.unicode() >= r[i].from && ch.unicode() <= r[i].to )
2190 return !n; 2196 return !n;
2191 } 2197 }
2192 return n; 2198 return n;
2193} 2199}
2194 2200
2195#if defined(QT_DEBUG) 2201#if defined(QT_DEBUG)
2196void QRegExpEngine::CharClass::dump() const 2202void QRegExpEngine::CharClass::dump() const
2197{ 2203{
2198 int i; 2204 int i;
2199 qDebug( " %stive character class", n ? "nega" : "posi" ); 2205 odebug << " " << (n ? "nega" : "posi") << "tive character class" << oendl;
2200#ifndef QT_NO_REGEXP_CCLASS 2206#ifndef QT_NO_REGEXP_CCLASS
2201 if ( c != 0 ) 2207 if ( c != 0 )
2202 qDebug( " categories 0x%.8x", c ); 2208 odebug << QString().sprintf(" categories 0x%.8x", c ) << oendl;
2203#endif 2209#endif
2204 for ( i = 0; i < (int) r.size(); i++ ) 2210 for ( i = 0; i < (int) r.size(); i++ )
2205 qDebug( " 0x%.4x through 0x%.4x", r[i].from, r[i].to ); 2211 odebug << QString().sprintf(" 0x%.4x through 0x%.4x", r[i].from, r[i].to ) << oendl;
2206} 2212}
2207#endif 2213#endif
2208#endif 2214#endif
2209 2215
2210QRegExpEngine::Box::Box( QRegExpEngine *engine ) 2216QRegExpEngine::Box::Box( QRegExpEngine *engine )
2211 : eng( engine ), skipanchors( 0 ) 2217 : eng( engine ), skipanchors( 0 )
@@ -2272,88 +2278,88 @@ void QRegExpEngine::Box::set( int bref )
2272{ 2278{
2273 ls.resize( 1 ); 2279 ls.resize( 1 );
2274 ls[0] = eng->createState( bref ); 2280 ls[0] = eng->createState( bref );
2275 rs = ls; 2281 rs = ls;
2276 rs.detach(); 2282 rs.detach();
2277 if ( bref >= 1 && bref <= MaxBackRefs ) 2283 if ( bref >= 1 && bref <= MaxBackRefs )
2278 skipanchors = Anchor_BackRef0Empty << bref; 2284 skipanchors = Anchor_BackRef0Empty << bref;
2279#ifndef QT_NO_REGEXP_OPTIM 2285#ifndef QT_NO_REGEXP_OPTIM
2280 maxl = InftyLen; 2286 maxl = InftyLen;
2281#endif 2287#endif
2282 minl = 0; 2288 minl = 0;
2283} 2289}
2284#endif 2290#endif
2285 2291
2286void QRegExpEngine::Box::cat( const Box& b ) 2292void QRegExpEngine::Box::cat( const Box& b )
2287{ 2293{
2288 eng->addCatTransitions( rs, b.ls ); 2294 eng->addCatTransitions( rs, b.ls );
2289 addAnchorsToEngine( b ); 2295 addAnchorsToEngine( b );
2290 if ( minl == 0 ) { 2296 if ( minl == 0 ) {
2291 mergeInto( &lanchors, b.lanchors ); 2297 mergeInto( &lanchors, b.lanchors );
2292 if ( skipanchors != 0 ) { 2298 if ( skipanchors != 0 ) {
2293 for ( int i = 0; i < (int) b.ls.size(); i++ ) { 2299 for ( int i = 0; i < (int) b.ls.size(); i++ ) {
2294 int a = eng->anchorConcatenation( at(lanchors, b.ls[i]), 2300 int a = eng->anchorConcatenation( at(lanchors, b.ls[i]),
2295 skipanchors ); 2301 skipanchors );
2296 lanchors.insert( b.ls[i], a ); 2302 lanchors.insert( b.ls[i], a );
2297 } 2303 }
2298 } 2304 }
2299 mergeInto( &ls, b.ls ); 2305 mergeInto( &ls, b.ls );
2300 } 2306 }
2301 if ( b.minl == 0 ) { 2307 if ( b.minl == 0 ) {
2302 mergeInto( &ranchors, b.ranchors ); 2308 mergeInto( &ranchors, b.ranchors );
2303 if ( b.skipanchors != 0 ) { 2309 if ( b.skipanchors != 0 ) {
2304 for ( int i = 0; i < (int) rs.size(); i++ ) { 2310 for ( int i = 0; i < (int) rs.size(); i++ ) {
2305 int a = eng->anchorConcatenation( at(ranchors, rs[i]), 2311 int a = eng->anchorConcatenation( at(ranchors, rs[i]),
2306 b.skipanchors ); 2312 b.skipanchors );
2307 ranchors.insert( rs[i], a ); 2313 ranchors.insert( rs[i], a );
2308 } 2314 }
2309 } 2315 }
2310 mergeInto( &rs, b.rs ); 2316 mergeInto( &rs, b.rs );
2311 } else { 2317 } else {
2312 ranchors = b.ranchors; 2318 ranchors = b.ranchors;
2313 rs = b.rs; 2319 rs = b.rs;
2314 } 2320 }
2315 2321
2316#ifndef QT_NO_REGEXP_OPTIM 2322#ifndef QT_NO_REGEXP_OPTIM
2317 if ( maxl != InftyLen ) { 2323 if ( maxl != InftyLen ) {
2318 if ( rightStr.length() + b.leftStr.length() > 2324 if ( rightStr.length() + b.leftStr.length() >
2319 QMAX(str.length(), b.str.length()) ) { 2325 QMAX(str.length(), b.str.length()) ) {
2320 earlyStart = minl - rightStr.length(); 2326 earlyStart = minl - rightStr.length();
2321 lateStart = maxl - rightStr.length(); 2327 lateStart = maxl - rightStr.length();
2322 str = rightStr + b.leftStr; 2328 str = rightStr + b.leftStr;
2323 } else if ( b.str.length() > str.length() ) { 2329 } else if ( b.str.length() > str.length() ) {
2324 earlyStart = minl + b.earlyStart; 2330 earlyStart = minl + b.earlyStart;
2325 lateStart = maxl + b.lateStart; 2331 lateStart = maxl + b.lateStart;
2326 str = b.str; 2332 str = b.str;
2327 } 2333 }
2328 } 2334 }
2329 2335
2330 if ( (int) leftStr.length() == maxl ) 2336 if ( (int) leftStr.length() == maxl )
2331 leftStr += b.leftStr; 2337 leftStr += b.leftStr;
2332 if ( (int) b.rightStr.length() == b.maxl ) 2338 if ( (int) b.rightStr.length() == b.maxl )
2333 rightStr += b.rightStr; 2339 rightStr += b.rightStr;
2334 else 2340 else
2335 rightStr = b.rightStr; 2341 rightStr = b.rightStr;
2336 2342
2337 if ( maxl == InftyLen || b.maxl == InftyLen ) 2343 if ( maxl == InftyLen || b.maxl == InftyLen )
2338 maxl = InftyLen; 2344 maxl = InftyLen;
2339 else 2345 else
2340 maxl += b.maxl; 2346 maxl += b.maxl;
2341 2347
2342 occ1.detach(); 2348 occ1.detach();
2343 for ( int i = 0; i < NumBadChars; i++ ) { 2349 for ( int i = 0; i < NumBadChars; i++ ) {
2344 if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] ) 2350 if ( b.occ1[i] != NoOccurrence && minl + b.occ1[i] < occ1[i] )
2345 occ1[i] = minl + b.occ1[i]; 2351 occ1[i] = minl + b.occ1[i];
2346 } 2352 }
2347#endif 2353#endif
2348 2354
2349 minl += b.minl; 2355 minl += b.minl;
2350 if ( minl == 0 ) 2356 if ( minl == 0 )
2351 skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors ); 2357 skipanchors = eng->anchorConcatenation( skipanchors, b.skipanchors );
2352 else 2358 else
2353 skipanchors = 0; 2359 skipanchors = 0;
2354} 2360}
2355 2361
2356void QRegExpEngine::Box::orx( const Box& b ) 2362void QRegExpEngine::Box::orx( const Box& b )
2357{ 2363{
2358 mergeInto( &ls, b.ls ); 2364 mergeInto( &ls, b.ls );
2359 mergeInto( &lanchors, b.lanchors ); 2365 mergeInto( &lanchors, b.lanchors );
@@ -2361,25 +2367,25 @@ void QRegExpEngine::Box::orx( const Box& b )
2361 mergeInto( &ranchors, b.ranchors ); 2367 mergeInto( &ranchors, b.ranchors );
2362 skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors ); 2368 skipanchors = eng->anchorAlternation( skipanchors, b.skipanchors );
2363 2369
2364#ifndef QT_NO_REGEXP_OPTIM 2370#ifndef QT_NO_REGEXP_OPTIM
2365 occ1.detach(); 2371 occ1.detach();
2366 for ( int i = 0; i < NumBadChars; i++ ) { 2372 for ( int i = 0; i < NumBadChars; i++ ) {
2367 if ( occ1[i] > b.occ1[i] ) 2373 if ( occ1[i] > b.occ1[i] )
2368 occ1[i] = b.occ1[i]; 2374 occ1[i] = b.occ1[i];
2369 } 2375 }
2370 earlyStart = 0; 2376 earlyStart = 0;
2371 lateStart = 0; 2377 lateStart = 0;
2372 str = QString::null; 2378 str = QString::null;
2373 leftStr = QString::null; 2379 leftStr = QString::null;
2374 rightStr = QString::null; 2380 rightStr = QString::null;
2375 if ( b.maxl > maxl ) 2381 if ( b.maxl > maxl )
2376 maxl = b.maxl; 2382 maxl = b.maxl;
2377#endif 2383#endif
2378 if ( b.minl < minl ) 2384 if ( b.minl < minl )
2379 minl = b.minl; 2385 minl = b.minl;
2380} 2386}
2381 2387
2382void QRegExpEngine::Box::plus( int atom ) 2388void QRegExpEngine::Box::plus( int atom )
2383{ 2389{
2384#ifndef QT_NO_REGEXP_CAPTURE 2390#ifndef QT_NO_REGEXP_CAPTURE
2385 eng->addPlusTransitions( rs, ls, atom ); 2391 eng->addPlusTransitions( rs, ls, atom );
@@ -2406,18 +2412,18 @@ void QRegExpEngine::Box::opt()
2406 minl = 0; 2412 minl = 0;
2407} 2413}
2408 2414
2409void QRegExpEngine::Box::catAnchor( int a ) 2415void QRegExpEngine::Box::catAnchor( int a )
2410{ 2416{
2411 if ( a != 0 ) { 2417 if ( a != 0 ) {
2412 for ( int i = 0; i < (int) rs.size(); i++ ) { 2418 for ( int i = 0; i < (int) rs.size(); i++ ) {
2413 a = eng->anchorConcatenation( at(ranchors, rs[i]), a ); 2419 a = eng->anchorConcatenation( at(ranchors, rs[i]), a );
2414 ranchors.insert( rs[i], a ); 2420 ranchors.insert( rs[i], a );
2415 } 2421 }
2416 if ( minl == 0 ) 2422 if ( minl == 0 )
2417 skipanchors = eng->anchorConcatenation( skipanchors, a ); 2423 skipanchors = eng->anchorConcatenation( skipanchors, a );
2418 } 2424 }
2419} 2425}
2420 2426
2421#ifndef QT_NO_REGEXP_OPTIM 2427#ifndef QT_NO_REGEXP_OPTIM
2422void QRegExpEngine::Box::setupHeuristics() 2428void QRegExpEngine::Box::setupHeuristics()
2423{ 2429{
@@ -2430,52 +2436,52 @@ void QRegExpEngine::Box::setupHeuristics()
2430 2436
2431 We waited until here before normalizing these cases (instead of doing it 2437 We waited until here before normalizing these cases (instead of doing it
2432 in Box::orx()) because sometimes things improve by themselves; consider 2438 in Box::orx()) because sometimes things improve by themselves; consider
2433 (112|1)34. 2439 (112|1)34.
2434 */ 2440 */
2435 for ( int i = 0; i < NumBadChars; i++ ) { 2441 for ( int i = 0; i < NumBadChars; i++ ) {
2436 if ( occ1[i] != NoOccurrence && occ1[i] >= minl ) 2442 if ( occ1[i] != NoOccurrence && occ1[i] >= minl )
2437 occ1[i] = minl; 2443 occ1[i] = minl;
2438 } 2444 }
2439 eng->setupBadCharHeuristic( minl, occ1 ); 2445 eng->setupBadCharHeuristic( minl, occ1 );
2440 2446
2441 eng->heuristicallyChooseHeuristic(); 2447 eng->heuristicallyChooseHeuristic();
2442} 2448}
2443#endif 2449#endif
2444 2450
2445#if defined(QT_DEBUG) 2451#if defined(QT_DEBUG)
2446void QRegExpEngine::Box::dump() const 2452void QRegExpEngine::Box::dump() const
2447{ 2453{
2448 int i; 2454 int i;
2449 qDebug( "Box of at least %d character%s", minl, minl == 1 ? "" : "s" ); 2455 odebug << "Box of at least " << minl << " character" << (minl == 1 ? "" : "s") << oendl;
2450 qDebug( " Left states:" ); 2456 odebug << " Left states:" << oendl;
2451 for ( i = 0; i < (int) ls.size(); i++ ) { 2457 for ( i = 0; i < (int) ls.size(); i++ ) {
2452 if ( at(lanchors, ls[i]) == 0 ) 2458 if ( at(lanchors, ls[i]) == 0 )
2453 qDebug( " %d", ls[i] ); 2459 odebug << " " << ls[i] << oendl;
2454 else 2460 else
2455 qDebug( " %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]] ); 2461 odebug << " " << ls[i] << QString().sprintf(" [anchors 0x%.8x]", lanchors[ls[i]]) << oendl;
2456 } 2462 }
2457 qDebug( " Right states:" ); 2463 odebug << " Right states:" << oendl;
2458 for ( i = 0; i < (int) rs.size(); i++ ) { 2464 for ( i = 0; i < (int) rs.size(); i++ ) {
2459 if ( at(ranchors, ls[i]) == 0 ) 2465 if ( at(ranchors, ls[i]) == 0 )
2460 qDebug( " %d", rs[i] ); 2466 odebug << " " << rs[i] << oendl;
2461 else 2467 else
2462 qDebug( " %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]] ); 2468 odebug << " " << rs[i] << QString().sprintf(" [anchors 0x%.8x]", ranchors[rs[i]]) << oendl;
2463 } 2469 }
2464 qDebug( " Skip anchors: 0x%.8x", skipanchors ); 2470 odebug << QString().sprintf(" Skip anchors: 0x%.8x", skipanchors) << oendl;
2465} 2471}
2466#endif 2472#endif
2467 2473
2468void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const 2474void QRegExpEngine::Box::addAnchorsToEngine( const Box& to ) const
2469{ 2475{
2470 for ( int i = 0; i < (int) to.ls.size(); i++ ) { 2476 for ( int i = 0; i < (int) to.ls.size(); i++ ) {
2471 for ( int j = 0; j < (int) rs.size(); j++ ) { 2477 for ( int j = 0; j < (int) rs.size(); j++ ) {
2472 int a = eng->anchorConcatenation( at(ranchors, rs[j]), 2478 int a = eng->anchorConcatenation( at(ranchors, rs[j]),
2473 at(to.lanchors, to.ls[i]) ); 2479 at(to.lanchors, to.ls[i]) );
2474 eng->addAnchors( rs[j], to.ls[i], a ); 2480 eng->addAnchors( rs[j], to.ls[i], a );
2475 } 2481 }
2476 } 2482 }
2477} 2483}
2478 2484
2479int QRegExpEngine::getChar() 2485int QRegExpEngine::getChar()
2480{ 2486{
2481 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode(); 2487 return ( yyPos == yyLen ) ? EOS : yyIn[yyPos++].unicode();
@@ -2490,137 +2496,137 @@ int QRegExpEngine::getEscape()
2490 int i; 2496 int i;
2491#endif 2497#endif
2492 ushort val; 2498 ushort val;
2493 int prevCh = yyCh; 2499 int prevCh = yyCh;
2494 2500
2495 if ( prevCh == EOS ) { 2501 if ( prevCh == EOS ) {
2496 yyError = TRUE; 2502 yyError = TRUE;
2497 return Tok_Char | '\\'; 2503 return Tok_Char | '\\';
2498 } 2504 }
2499 yyCh = getChar(); 2505 yyCh = getChar();
2500#ifndef QT_NO_REGEXP_ESCAPE 2506#ifndef QT_NO_REGEXP_ESCAPE
2501 if ( (prevCh & ~0xff) == 0 ) { 2507 if ( (prevCh & ~0xff) == 0 ) {
2502 const char *p = strchr( tab, prevCh ); 2508 const char *p = strchr( tab, prevCh );
2503 if ( p != 0 ) 2509 if ( p != 0 )
2504 return Tok_Char | backTab[p - tab]; 2510 return Tok_Char | backTab[p - tab];
2505 } 2511 }
2506#endif 2512#endif
2507 2513
2508 switch ( prevCh ) { 2514 switch ( prevCh ) {
2509#ifndef QT_NO_REGEXP_ESCAPE 2515#ifndef QT_NO_REGEXP_ESCAPE
2510 case '0': 2516 case '0':
2511 val = 0; 2517 val = 0;
2512 for ( i = 0; i < 3; i++ ) { 2518 for ( i = 0; i < 3; i++ ) {
2513 if ( yyCh >= '0' && yyCh <= '7' ) 2519 if ( yyCh >= '0' && yyCh <= '7' )
2514 val = ( val << 3 ) | ( yyCh - '0' ); 2520 val = ( val << 3 ) | ( yyCh - '0' );
2515 else 2521 else
2516 break; 2522 break;
2517 yyCh = getChar(); 2523 yyCh = getChar();
2518 } 2524 }
2519 if ( (val & ~0377) != 0 ) 2525 if ( (val & ~0377) != 0 )
2520 yyError = TRUE; 2526 yyError = TRUE;
2521 return Tok_Char | val; 2527 return Tok_Char | val;
2522#endif 2528#endif
2523#ifndef QT_NO_REGEXP_ESCAPE 2529#ifndef QT_NO_REGEXP_ESCAPE
2524 case 'B': 2530 case 'B':
2525 return Tok_NonWord; 2531 return Tok_NonWord;
2526#endif 2532#endif
2527#ifndef QT_NO_REGEXP_CCLASS 2533#ifndef QT_NO_REGEXP_CCLASS
2528 case 'D': 2534 case 'D':
2529 // see QChar::isDigit() 2535 // see QChar::isDigit()
2530 yyCharClass->addCategories( 0x7fffffef ); 2536 yyCharClass->addCategories( 0x7fffffef );
2531 return Tok_CharClass; 2537 return Tok_CharClass;
2532 case 'S': 2538 case 'S':
2533 // see QChar::isSpace() 2539 // see QChar::isSpace()
2534 yyCharClass->addCategories( 0x7ffff87f ); 2540 yyCharClass->addCategories( 0x7ffff87f );
2535 yyCharClass->addRange( 0x0000, 0x0008 ); 2541 yyCharClass->addRange( 0x0000, 0x0008 );
2536 yyCharClass->addRange( 0x000e, 0x001f ); 2542 yyCharClass->addRange( 0x000e, 0x001f );
2537 yyCharClass->addRange( 0x007f, 0x009f ); 2543 yyCharClass->addRange( 0x007f, 0x009f );
2538 return Tok_CharClass; 2544 return Tok_CharClass;
2539 case 'W': 2545 case 'W':
2540 // see QChar::isLetterOrNumber() 2546 // see QChar::isLetterOrNumber()
2541 yyCharClass->addCategories( 0x7ff07f8f ); 2547 yyCharClass->addCategories( 0x7ff07f8f );
2542 return Tok_CharClass; 2548 return Tok_CharClass;
2543#endif 2549#endif
2544#ifndef QT_NO_REGEXP_ESCAPE 2550#ifndef QT_NO_REGEXP_ESCAPE
2545 case 'b': 2551 case 'b':
2546 return Tok_Word; 2552 return Tok_Word;
2547#endif 2553#endif
2548#ifndef QT_NO_REGEXP_CCLASS 2554#ifndef QT_NO_REGEXP_CCLASS
2549 case 'd': 2555 case 'd':
2550 // see QChar::isDigit() 2556 // see QChar::isDigit()
2551 yyCharClass->addCategories( 0x00000010 ); 2557 yyCharClass->addCategories( 0x00000010 );
2552 return Tok_CharClass; 2558 return Tok_CharClass;
2553 case 's': 2559 case 's':
2554 // see QChar::isSpace() 2560 // see QChar::isSpace()
2555 yyCharClass->addCategories( 0x00000380 ); 2561 yyCharClass->addCategories( 0x00000380 );
2556 yyCharClass->addRange( 0x0009, 0x000d ); 2562 yyCharClass->addRange( 0x0009, 0x000d );
2557 return Tok_CharClass; 2563 return Tok_CharClass;
2558 case 'w': 2564 case 'w':
2559 // see QChar::isLetterOrNumber() 2565 // see QChar::isLetterOrNumber()
2560 yyCharClass->addCategories( 0x000f8070 ); 2566 yyCharClass->addCategories( 0x000f8070 );
2561 return Tok_CharClass; 2567 return Tok_CharClass;
2562#endif 2568#endif
2563#ifndef QT_NO_REGEXP_ESCAPE 2569#ifndef QT_NO_REGEXP_ESCAPE
2564 case 'x': 2570 case 'x':
2565 val = 0; 2571 val = 0;
2566 for ( i = 0; i < 4; i++ ) { 2572 for ( i = 0; i < 4; i++ ) {
2567 low = QChar( yyCh ).lower(); 2573 low = QChar( yyCh ).lower();
2568 if ( low >= '0' && low <= '9' ) 2574 if ( low >= '0' && low <= '9' )
2569 val = ( val << 4 ) | ( low - '0' ); 2575 val = ( val << 4 ) | ( low - '0' );
2570 else if ( low >= 'a' && low <= 'f' ) 2576 else if ( low >= 'a' && low <= 'f' )
2571 val = ( val << 4 ) | ( low - 'a' + 10 ); 2577 val = ( val << 4 ) | ( low - 'a' + 10 );
2572 else 2578 else
2573 break; 2579 break;
2574 yyCh = getChar(); 2580 yyCh = getChar();
2575 } 2581 }
2576 return Tok_Char | val; 2582 return Tok_Char | val;
2577#endif 2583#endif
2578 default: 2584 default:
2579 if ( prevCh >= '1' && prevCh <= '9' ) { 2585 if ( prevCh >= '1' && prevCh <= '9' ) {
2580#ifndef QT_NO_REGEXP_BACKREF 2586#ifndef QT_NO_REGEXP_BACKREF
2581 val = prevCh - '0'; 2587 val = prevCh - '0';
2582 while ( yyCh >= '0' && yyCh <= '9' ) { 2588 while ( yyCh >= '0' && yyCh <= '9' ) {
2583 val = ( val *= 10 ) | ( yyCh - '0' ); 2589 val = ( val *= 10 ) | ( yyCh - '0' );
2584 yyCh = getChar(); 2590 yyCh = getChar();
2585 } 2591 }
2586 return Tok_BackRef | val; 2592 return Tok_BackRef | val;
2587#else 2593#else
2588 yyError = TRUE; 2594 yyError = TRUE;
2589#endif 2595#endif
2590 } 2596 }
2591 return Tok_Char | prevCh; 2597 return Tok_Char | prevCh;
2592 } 2598 }
2593} 2599}
2594 2600
2595#ifndef QT_NO_REGEXP_INTERVAL 2601#ifndef QT_NO_REGEXP_INTERVAL
2596int QRegExpEngine::getRep( int def ) 2602int QRegExpEngine::getRep( int def )
2597{ 2603{
2598 if ( yyCh >= '0' && yyCh <= '9' ) { 2604 if ( yyCh >= '0' && yyCh <= '9' ) {
2599 int rep = 0; 2605 int rep = 0;
2600 do { 2606 do {
2601 rep = 10 * rep + yyCh - '0'; 2607 rep = 10 * rep + yyCh - '0';
2602 if ( rep >= InftyRep ) { 2608 if ( rep >= InftyRep ) {
2603 yyError = TRUE; 2609 yyError = TRUE;
2604 rep = def; 2610 rep = def;
2605 } 2611 }
2606 yyCh = getChar(); 2612 yyCh = getChar();
2607 } while ( yyCh >= '0' && yyCh <= '9' ); 2613 } while ( yyCh >= '0' && yyCh <= '9' );
2608 return rep; 2614 return rep;
2609 } else { 2615 } else {
2610 return def; 2616 return def;
2611 } 2617 }
2612} 2618}
2613#endif 2619#endif
2614 2620
2615#ifndef QT_NO_REGEXP_LOOKAHEAD 2621#ifndef QT_NO_REGEXP_LOOKAHEAD
2616void QRegExpEngine::skipChars( int n ) 2622void QRegExpEngine::skipChars( int n )
2617{ 2623{
2618 if ( n > 0 ) { 2624 if ( n > 0 ) {
2619 yyPos += n - 1; 2625 yyPos += n - 1;
2620 yyCh = getChar(); 2626 yyCh = getChar();
2621 } 2627 }
2622} 2628}
2623#endif 2629#endif
2624 2630
2625void QRegExpEngine::startTokenizer( const QChar *rx, int len ) 2631void QRegExpEngine::startTokenizer( const QChar *rx, int len )
2626{ 2632{
@@ -2651,147 +2657,147 @@ int QRegExpEngine::getToken()
2651#endif 2657#endif
2652 yyMinRep = 0; 2658 yyMinRep = 0;
2653 yyMaxRep = 0; 2659 yyMaxRep = 0;
2654 yyCh = getChar(); 2660 yyCh = getChar();
2655 switch ( prevCh ) { 2661 switch ( prevCh ) {
2656 case EOS: 2662 case EOS:
2657 yyPos0 = yyPos; 2663 yyPos0 = yyPos;
2658 return Tok_Eos; 2664 return Tok_Eos;
2659 case '$': 2665 case '$':
2660 return Tok_Dollar; 2666 return Tok_Dollar;
2661 case '(': 2667 case '(':
2662 if ( yyCh == '?' ) { 2668 if ( yyCh == '?' ) {
2663 prevCh = getChar(); 2669 prevCh = getChar();
2664 yyCh = getChar(); 2670 yyCh = getChar();
2665 switch ( prevCh ) { 2671 switch ( prevCh ) {
2666#ifndef QT_NO_REGEXP_LOOKAHEAD 2672#ifndef QT_NO_REGEXP_LOOKAHEAD
2667 case '!': 2673 case '!':
2668 return Tok_NegLookahead; 2674 return Tok_NegLookahead;
2669 case '=': 2675 case '=':
2670 return Tok_PosLookahead; 2676 return Tok_PosLookahead;
2671#endif 2677#endif
2672 case ':': 2678 case ':':
2673 return Tok_MagicLeftParen; 2679 return Tok_MagicLeftParen;
2674 default: 2680 default:
2675 yyError = TRUE; 2681 yyError = TRUE;
2676 return Tok_MagicLeftParen; 2682 return Tok_MagicLeftParen;
2677 } 2683 }
2678 } else { 2684 } else {
2679 return Tok_LeftParen; 2685 return Tok_LeftParen;
2680 } 2686 }
2681 case ')': 2687 case ')':
2682 return Tok_RightParen; 2688 return Tok_RightParen;
2683 case '*': 2689 case '*':
2684 yyMinRep = 0; 2690 yyMinRep = 0;
2685 yyMaxRep = InftyRep; 2691 yyMaxRep = InftyRep;
2686 return Tok_Quantifier; 2692 return Tok_Quantifier;
2687 case '+': 2693 case '+':
2688 yyMinRep = 1; 2694 yyMinRep = 1;
2689 yyMaxRep = InftyRep; 2695 yyMaxRep = InftyRep;
2690 return Tok_Quantifier; 2696 return Tok_Quantifier;
2691 case '.': 2697 case '.':
2692#ifndef QT_NO_REGEXP_CCLASS 2698#ifndef QT_NO_REGEXP_CCLASS
2693 yyCharClass->setNegative( TRUE ); 2699 yyCharClass->setNegative( TRUE );
2694#endif 2700#endif
2695 return Tok_CharClass; 2701 return Tok_CharClass;
2696 case '?': 2702 case '?':
2697 yyMinRep = 0; 2703 yyMinRep = 0;
2698 yyMaxRep = 1; 2704 yyMaxRep = 1;
2699 return Tok_Quantifier; 2705 return Tok_Quantifier;
2700 case '[': 2706 case '[':
2701#ifndef QT_NO_REGEXP_CCLASS 2707#ifndef QT_NO_REGEXP_CCLASS
2702 if ( yyCh == '^' ) { 2708 if ( yyCh == '^' ) {
2703 yyCharClass->setNegative( TRUE ); 2709 yyCharClass->setNegative( TRUE );
2704 yyCh = getChar(); 2710 yyCh = getChar();
2705 } 2711 }
2706 charPending = FALSE; 2712 charPending = FALSE;
2707 rangePending = FALSE; 2713 rangePending = FALSE;
2708 do { 2714 do {
2709 if ( yyCh == '-' && charPending && !rangePending ) { 2715 if ( yyCh == '-' && charPending && !rangePending ) {
2710 rangePending = TRUE; 2716 rangePending = TRUE;
2711 yyCh = getChar(); 2717 yyCh = getChar();
2712 } else { 2718 } else {
2713 if ( charPending && !rangePending ) { 2719 if ( charPending && !rangePending ) {
2714 yyCharClass->addSingleton( pendingCh ); 2720 yyCharClass->addSingleton( pendingCh );
2715 charPending = FALSE; 2721 charPending = FALSE;
2716 } 2722 }
2717 if ( yyCh == '\\' ) { 2723 if ( yyCh == '\\' ) {
2718 yyCh = getChar(); 2724 yyCh = getChar();
2719 tok = getEscape(); 2725 tok = getEscape();
2720 if ( tok == Tok_Word ) 2726 if ( tok == Tok_Word )
2721 tok = '\b'; 2727 tok = '\b';
2722 } else { 2728 } else {
2723 tok = Tok_Char | yyCh; 2729 tok = Tok_Char | yyCh;
2724 yyCh = getChar(); 2730 yyCh = getChar();
2725 } 2731 }
2726 if ( tok == Tok_CharClass ) { 2732 if ( tok == Tok_CharClass ) {
2727 if ( rangePending ) { 2733 if ( rangePending ) {
2728 yyCharClass->addSingleton( '-' ); 2734 yyCharClass->addSingleton( '-' );
2729 yyCharClass->addSingleton( pendingCh ); 2735 yyCharClass->addSingleton( pendingCh );
2730 charPending = FALSE; 2736 charPending = FALSE;
2731 rangePending = FALSE; 2737 rangePending = FALSE;
2732 } 2738 }
2733 } else if ( (tok & Tok_Char) != 0 ) { 2739 } else if ( (tok & Tok_Char) != 0 ) {
2734 if ( rangePending ) { 2740 if ( rangePending ) {
2735 yyCharClass->addRange( pendingCh, tok ^ Tok_Char ); 2741 yyCharClass->addRange( pendingCh, tok ^ Tok_Char );
2736 charPending = FALSE; 2742 charPending = FALSE;
2737 rangePending = FALSE; 2743 rangePending = FALSE;
2738 } else { 2744 } else {
2739 pendingCh = tok ^ Tok_Char; 2745 pendingCh = tok ^ Tok_Char;
2740 charPending = TRUE; 2746 charPending = TRUE;
2741 } 2747 }
2742 } else { 2748 } else {
2743 yyError = TRUE; 2749 yyError = TRUE;
2744 } 2750 }
2745 } 2751 }
2746 } while ( yyCh != ']' && yyCh != EOS ); 2752 } while ( yyCh != ']' && yyCh != EOS );
2747 if ( rangePending ) 2753 if ( rangePending )
2748 yyCharClass->addSingleton( '-' ); 2754 yyCharClass->addSingleton( '-' );
2749 if ( charPending ) 2755 if ( charPending )
2750 yyCharClass->addSingleton( pendingCh ); 2756 yyCharClass->addSingleton( pendingCh );
2751 if ( yyCh == EOS ) 2757 if ( yyCh == EOS )
2752 yyError = TRUE; 2758 yyError = TRUE;
2753 else 2759 else
2754 yyCh = getChar(); 2760 yyCh = getChar();
2755 return Tok_CharClass; 2761 return Tok_CharClass;
2756#else 2762#else
2757 yyError = TRUE; 2763 yyError = TRUE;
2758 return Tok_Char | '['; 2764 return Tok_Char | '[';
2759#endif 2765#endif
2760 case '\\': 2766 case '\\':
2761 return getEscape(); 2767 return getEscape();
2762 case ']': 2768 case ']':
2763 yyError = TRUE; 2769 yyError = TRUE;
2764 return Tok_Char | ']'; 2770 return Tok_Char | ']';
2765 case '^': 2771 case '^':
2766 return Tok_Caret; 2772 return Tok_Caret;
2767#ifndef QT_NO_REGEXP_INTERVAL 2773#ifndef QT_NO_REGEXP_INTERVAL
2768 case '{': 2774 case '{':
2769 yyMinRep = getRep( 0 ); 2775 yyMinRep = getRep( 0 );
2770 yyMaxRep = yyMinRep; 2776 yyMaxRep = yyMinRep;
2771 if ( yyCh == ',' ) { 2777 if ( yyCh == ',' ) {
2772 yyCh = getChar(); 2778 yyCh = getChar();
2773 yyMaxRep = getRep( InftyRep ); 2779 yyMaxRep = getRep( InftyRep );
2774 } 2780 }
2775 if ( yyMaxRep < yyMinRep ) 2781 if ( yyMaxRep < yyMinRep )
2776 qSwap( yyMinRep, yyMaxRep ); 2782 qSwap( yyMinRep, yyMaxRep );
2777 if ( yyCh != '}' ) 2783 if ( yyCh != '}' )
2778 yyError = TRUE; 2784 yyError = TRUE;
2779 yyCh = getChar(); 2785 yyCh = getChar();
2780 return Tok_Quantifier; 2786 return Tok_Quantifier;
2781#else 2787#else
2782 yyError = TRUE; 2788 yyError = TRUE;
2783 return Tok_Char | '{'; 2789 return Tok_Char | '{';
2784#endif 2790#endif
2785 case '|': 2791 case '|':
2786 return Tok_Bar; 2792 return Tok_Bar;
2787 case '}': 2793 case '}':
2788 yyError = TRUE; 2794 yyError = TRUE;
2789 return Tok_Char | '}'; 2795 return Tok_Char | '}';
2790 default: 2796 default:
2791 return Tok_Char | prevCh; 2797 return Tok_Char | prevCh;
2792 } 2798 }
2793} 2799}
2794 2800
2795int QRegExpEngine::parse( const QChar *pattern, int len ) 2801int QRegExpEngine::parse( const QChar *pattern, int len )
2796{ 2802{
2797 valid = TRUE; 2803 valid = TRUE;
@@ -2825,13 +2831,13 @@ int QRegExpEngine::parse( const QChar *pattern, int len )
2825 delete yyCharClass; 2831 delete yyCharClass;
2826 yyCharClass = 0; 2832 yyCharClass = 0;
2827 2833
2828 realncap = ncap; 2834 realncap = ncap;
2829#ifndef QT_NO_REGEXP_BACKREF 2835#ifndef QT_NO_REGEXP_BACKREF
2830 if ( nbrefs > ncap ) 2836 if ( nbrefs > ncap )
2831 ncap = nbrefs; 2837 ncap = nbrefs;
2832#endif 2838#endif
2833 2839
2834 mmCaptured.resize( 2 + 2 * realncap ); 2840 mmCaptured.resize( 2 + 2 * realncap );
2835 mmCapturedNoMatch.fill( -1, 2 + 2 * realncap ); 2841 mmCapturedNoMatch.fill( -1, 2 + 2 * realncap );
2836 2842
2837 /* 2843 /*
@@ -2860,30 +2866,30 @@ int QRegExpEngine::parse( const QChar *pattern, int len )
2860 mmCapBegin = mmTempCapBegin + 2 * ncap; 2866 mmCapBegin = mmTempCapBegin + 2 * ncap;
2861 mmCapEnd = mmTempCapBegin + 3 * ncap; 2867 mmCapEnd = mmTempCapBegin + 3 * ncap;
2862 2868
2863 mmSlideTab = mmTempCapBegin + 4 * ncap; 2869 mmSlideTab = mmTempCapBegin + 4 * ncap;
2864 2870
2865 if ( yyError ) 2871 if ( yyError )
2866 return -1; 2872 return -1;
2867 2873
2868#ifndef QT_NO_REGEXP_OPTIM 2874#ifndef QT_NO_REGEXP_OPTIM
2869 State *sinit = s[InitialState]; 2875 State *sinit = s[InitialState];
2870 caretAnchored = ( sinit->anchors != 0 ); 2876 caretAnchored = ( sinit->anchors != 0 );
2871 if ( caretAnchored ) { 2877 if ( caretAnchored ) {
2872 QMap<int, int>& anchors = *sinit->anchors; 2878 QMap<int, int>& anchors = *sinit->anchors;
2873 QMap<int, int>::ConstIterator a; 2879 QMap<int, int>::ConstIterator a;
2874 for ( a = anchors.begin(); a != anchors.end(); ++a ) { 2880 for ( a = anchors.begin(); a != anchors.end(); ++a ) {
2875#ifndef QT_NO_REGEXP_ANCHOR_ALT 2881#ifndef QT_NO_REGEXP_ANCHOR_ALT
2876 if ( (*a & Anchor_Alternation) != 0 ) 2882 if ( (*a & Anchor_Alternation) != 0 )
2877 break; 2883 break;
2878#endif 2884#endif
2879 if ( (*a & Anchor_Caret) == 0 ) { 2885 if ( (*a & Anchor_Caret) == 0 ) {
2880 caretAnchored = FALSE; 2886 caretAnchored = FALSE;
2881 break; 2887 break;
2882 } 2888 }
2883 } 2889 }
2884 } 2890 }
2885#endif 2891#endif
2886 return yyPos0; 2892 return yyPos0;
2887} 2893}
2888 2894
2889void QRegExpEngine::parseAtom( Box *box ) 2895void QRegExpEngine::parseAtom( Box *box )
@@ -2893,60 +2899,60 @@ void QRegExpEngine::parseAtom( Box *box )
2893 bool neg; 2899 bool neg;
2894 int len; 2900 int len;
2895#endif 2901#endif
2896 2902
2897 switch ( yyTok ) { 2903 switch ( yyTok ) {
2898 case Tok_Dollar: 2904 case Tok_Dollar:
2899 box->catAnchor( Anchor_Dollar ); 2905 box->catAnchor( Anchor_Dollar );
2900 break; 2906 break;
2901 case Tok_Caret: 2907 case Tok_Caret:
2902 box->catAnchor( Anchor_Caret ); 2908 box->catAnchor( Anchor_Caret );
2903 break; 2909 break;
2904#ifndef QT_NO_REGEXP_LOOKAHEAD 2910#ifndef QT_NO_REGEXP_LOOKAHEAD
2905 case Tok_PosLookahead: 2911 case Tok_PosLookahead:
2906 case Tok_NegLookahead: 2912 case Tok_NegLookahead:
2907 neg = ( yyTok == Tok_NegLookahead ); 2913 neg = ( yyTok == Tok_NegLookahead );
2908 eng = new QRegExpEngine( cs ); 2914 eng = new QRegExpEngine( cs );
2909 len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 ); 2915 len = eng->parse( yyIn + yyPos - 1, yyLen - yyPos + 1 );
2910 if ( len >= 0 ) 2916 if ( len >= 0 )
2911 skipChars( len ); 2917 skipChars( len );
2912 else 2918 else
2913 yyError = TRUE; 2919 yyError = TRUE;
2914 box->catAnchor( addLookahead(eng, neg) ); 2920 box->catAnchor( addLookahead(eng, neg) );
2915 yyTok = getToken(); 2921 yyTok = getToken();
2916 if ( yyTok != Tok_RightParen ) 2922 if ( yyTok != Tok_RightParen )
2917 yyError = TRUE; 2923 yyError = TRUE;
2918 break; 2924 break;
2919#endif 2925#endif
2920#ifndef QT_NO_REGEXP_ESCAPE 2926#ifndef QT_NO_REGEXP_ESCAPE
2921 case Tok_Word: 2927 case Tok_Word:
2922 box->catAnchor( Anchor_Word ); 2928 box->catAnchor( Anchor_Word );
2923 break; 2929 break;
2924 case Tok_NonWord: 2930 case Tok_NonWord:
2925 box->catAnchor( Anchor_NonWord ); 2931 box->catAnchor( Anchor_NonWord );
2926 break; 2932 break;
2927#endif 2933#endif
2928 case Tok_LeftParen: 2934 case Tok_LeftParen:
2929 case Tok_MagicLeftParen: 2935 case Tok_MagicLeftParen:
2930 yyTok = getToken(); 2936 yyTok = getToken();
2931 parseExpression( box ); 2937 parseExpression( box );
2932 if ( yyTok != Tok_RightParen ) 2938 if ( yyTok != Tok_RightParen )
2933 yyError = TRUE; 2939 yyError = TRUE;
2934 break; 2940 break;
2935 case Tok_CharClass: 2941 case Tok_CharClass:
2936 box->set( *yyCharClass ); 2942 box->set( *yyCharClass );
2937 break; 2943 break;
2938 default: 2944 default:
2939 if ( (yyTok & Tok_Char) != 0 ) 2945 if ( (yyTok & Tok_Char) != 0 )
2940 box->set( QChar(yyTok ^ Tok_Char) ); 2946 box->set( QChar(yyTok ^ Tok_Char) );
2941#ifndef QT_NO_REGEXP_BACKREF 2947#ifndef QT_NO_REGEXP_BACKREF
2942 else if ( (yyTok & Tok_BackRef) != 0 ) 2948 else if ( (yyTok & Tok_BackRef) != 0 )
2943 box->set( yyTok ^ Tok_BackRef ); 2949 box->set( yyTok ^ Tok_BackRef );
2944#endif 2950#endif
2945 else 2951 else
2946 yyError = TRUE; 2952 yyError = TRUE;
2947 } 2953 }
2948 yyTok = getToken(); 2954 yyTok = getToken();
2949} 2955}
2950 2956
2951void QRegExpEngine::parseFactor( Box *box ) 2957void QRegExpEngine::parseFactor( Box *box )
2952{ 2958{
@@ -2955,98 +2961,98 @@ void QRegExpEngine::parseFactor( Box *box )
2955#else 2961#else
2956 static const int atom = 0; 2962 static const int atom = 0;
2957#endif 2963#endif
2958 2964
2959#ifndef QT_NO_REGEXP_INTERVAL 2965#ifndef QT_NO_REGEXP_INTERVAL
2960#define YYREDO() \ 2966#define YYREDO() \
2961 yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \ 2967 yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
2962 *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok 2968 *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
2963 2969
2964 const QChar *in = yyIn; 2970 const QChar *in = yyIn;
2965 int pos0 = yyPos0; 2971 int pos0 = yyPos0;
2966 int pos = yyPos; 2972 int pos = yyPos;
2967 int len = yyLen; 2973 int len = yyLen;
2968 int ch = yyCh; 2974 int ch = yyCh;
2969 CharClass charClass; 2975 CharClass charClass;
2970 if ( yyTok == Tok_CharClass ) 2976 if ( yyTok == Tok_CharClass )
2971 charClass = *yyCharClass; 2977 charClass = *yyCharClass;
2972 int tok = yyTok; 2978 int tok = yyTok;
2973 bool mayCapture = yyMayCapture; 2979 bool mayCapture = yyMayCapture;
2974#endif 2980#endif
2975 2981
2976 parseAtom( box ); 2982 parseAtom( box );
2977#ifndef QT_NO_REGEXP_CAPTURE 2983#ifndef QT_NO_REGEXP_CAPTURE
2978 finishAtom( atom ); 2984 finishAtom( atom );
2979#endif 2985#endif
2980 2986
2981 if ( yyTok == Tok_Quantifier ) { 2987 if ( yyTok == Tok_Quantifier ) {
2982 if ( yyMaxRep == InftyRep ) { 2988 if ( yyMaxRep == InftyRep ) {
2983 box->plus( atom ); 2989 box->plus( atom );
2984#ifndef QT_NO_REGEXP_INTERVAL 2990#ifndef QT_NO_REGEXP_INTERVAL
2985 } else if ( yyMaxRep == 0 ) { 2991 } else if ( yyMaxRep == 0 ) {
2986 box->clear(); 2992 box->clear();
2987#endif 2993#endif
2988 } 2994 }
2989 if ( yyMinRep == 0 ) 2995 if ( yyMinRep == 0 )
2990 box->opt(); 2996 box->opt();
2991 2997
2992#ifndef QT_NO_REGEXP_INTERVAL 2998#ifndef QT_NO_REGEXP_INTERVAL
2993 yyMayCapture = FALSE; 2999 yyMayCapture = FALSE;
2994 int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1; 3000 int alpha = ( yyMinRep == 0 ) ? 0 : yyMinRep - 1;
2995 int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 ); 3001 int beta = ( yyMaxRep == InftyRep ) ? 0 : yyMaxRep - ( alpha + 1 );
2996 3002
2997 Box rightBox( this ); 3003 Box rightBox( this );
2998 int i; 3004 int i;
2999 3005
3000 for ( i = 0; i < beta; i++ ) { 3006 for ( i = 0; i < beta; i++ ) {
3001 YYREDO(); 3007 YYREDO();
3002 Box leftBox( this ); 3008 Box leftBox( this );
3003 parseAtom( &leftBox ); 3009 parseAtom( &leftBox );
3004 leftBox.cat( rightBox ); 3010 leftBox.cat( rightBox );
3005 leftBox.opt(); 3011 leftBox.opt();
3006 rightBox = leftBox; 3012 rightBox = leftBox;
3007 } 3013 }
3008 for ( i = 0; i < alpha; i++ ) { 3014 for ( i = 0; i < alpha; i++ ) {
3009 YYREDO(); 3015 YYREDO();
3010 Box leftBox( this ); 3016 Box leftBox( this );
3011 parseAtom( &leftBox ); 3017 parseAtom( &leftBox );
3012 leftBox.cat( rightBox ); 3018 leftBox.cat( rightBox );
3013 rightBox = leftBox; 3019 rightBox = leftBox;
3014 } 3020 }
3015 rightBox.cat( *box ); 3021 rightBox.cat( *box );
3016 *box = rightBox; 3022 *box = rightBox;
3017#endif 3023#endif
3018 yyTok = getToken(); 3024 yyTok = getToken();
3019#ifndef QT_NO_REGEXP_INTERVAL 3025#ifndef QT_NO_REGEXP_INTERVAL
3020 yyMayCapture = mayCapture; 3026 yyMayCapture = mayCapture;
3021#endif 3027#endif
3022 } 3028 }
3023#undef YYREDO 3029#undef YYREDO
3024} 3030}
3025 3031
3026void QRegExpEngine::parseTerm( Box *box ) 3032void QRegExpEngine::parseTerm( Box *box )
3027{ 3033{
3028#ifndef QT_NO_REGEXP_OPTIM 3034#ifndef QT_NO_REGEXP_OPTIM
3029 if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) 3035 if ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar )
3030 parseFactor( box ); 3036 parseFactor( box );
3031#endif 3037#endif
3032 while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) { 3038 while ( yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar ) {
3033 Box rightBox( this ); 3039 Box rightBox( this );
3034 parseFactor( &rightBox ); 3040 parseFactor( &rightBox );
3035 box->cat( rightBox ); 3041 box->cat( rightBox );
3036 } 3042 }
3037} 3043}
3038 3044
3039void QRegExpEngine::parseExpression( Box *box ) 3045void QRegExpEngine::parseExpression( Box *box )
3040{ 3046{
3041 parseTerm( box ); 3047 parseTerm( box );
3042 while ( yyTok == Tok_Bar ) { 3048 while ( yyTok == Tok_Bar ) {
3043 Box rightBox( this ); 3049 Box rightBox( this );
3044 yyTok = getToken(); 3050 yyTok = getToken();
3045 parseTerm( &rightBox ); 3051 parseTerm( &rightBox );
3046 box->orx( rightBox ); 3052 box->orx( rightBox );
3047 } 3053 }
3048} 3054}
3049 3055
3050/* 3056/*
3051 The class QRegExpPrivate contains the private data of a regular expression 3057 The class QRegExpPrivate contains the private data of a regular expression
3052 other than the automaton. It makes it possible for many QRegExp objects to 3058 other than the automaton. It makes it possible for many QRegExp objects to
@@ -3074,39 +3080,39 @@ static QCache<QRegExpEngine> *engineCache = 0;
3074#endif 3080#endif
3075 3081
3076static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive ) 3082static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
3077{ 3083{
3078#ifndef QT_NO_REGEXP_OPTIM 3084#ifndef QT_NO_REGEXP_OPTIM
3079 if ( engineCache != 0 ) { 3085 if ( engineCache != 0 ) {
3080 QRegExpEngine *eng = engineCache->take( pattern ); 3086 QRegExpEngine *eng = engineCache->take( pattern );
3081 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) { 3087 if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
3082 delete eng; 3088 delete eng;
3083 } else { 3089 } else {
3084 eng->ref(); 3090 eng->ref();
3085 return eng; 3091 return eng;
3086 } 3092 }
3087 } 3093 }
3088#endif 3094#endif
3089 return new QRegExpEngine( pattern, caseSensitive ); 3095 return new QRegExpEngine( pattern, caseSensitive );
3090} 3096}
3091 3097
3092static void derefEngine( QRegExpEngine *eng, const QString& pattern ) 3098static void derefEngine( QRegExpEngine *eng, const QString& pattern )
3093{ 3099{
3094 if ( eng != 0 && eng->deref() ) { 3100 if ( eng != 0 && eng->deref() ) {
3095#ifndef QT_NO_REGEXP_OPTIM 3101#ifndef QT_NO_REGEXP_OPTIM
3096 if ( engineCache == 0 ) { 3102 if ( engineCache == 0 ) {
3097 engineCache = new QCache<QRegExpEngine>; 3103 engineCache = new QCache<QRegExpEngine>;
3098 engineCache->setAutoDelete( TRUE ); 3104 engineCache->setAutoDelete( TRUE );
3099 } 3105 }
3100 if ( !pattern.isNull() && 3106 if ( !pattern.isNull() &&
3101 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) ) 3107 engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
3102 return; 3108 return;
3103#else 3109#else
3104 Q_UNUSED( pattern ); 3110 Q_UNUSED( pattern );
3105#endif 3111#endif
3106 delete eng; 3112 delete eng;
3107 } 3113 }
3108} 3114}
3109 3115
3110/*! 3116/*!
3111 Constructs an empty regexp. 3117 Constructs an empty regexp.
3112 3118
@@ -3198,17 +3204,17 @@ QRegExp3& QRegExp3::operator=( const QRegExp3& rx )
3198 and the same settings for case sensitivity, wildcard and minimal 3204 and the same settings for case sensitivity, wildcard and minimal
3199 matching. 3205 matching.
3200*/ 3206*/
3201bool QRegExp3::operator==( const QRegExp3& rx ) const 3207bool QRegExp3::operator==( const QRegExp3& rx ) const
3202{ 3208{
3203 return priv->pattern == rx.priv->pattern && 3209 return priv->pattern == rx.priv->pattern &&
3204 eng->caseSensitive() == rx.eng->caseSensitive() && 3210 eng->caseSensitive() == rx.eng->caseSensitive() &&
3205#ifndef QT_NO_REGEXP_WILDCARD 3211#ifndef QT_NO_REGEXP_WILDCARD
3206 priv->wc == rx.priv->wc && 3212 priv->wc == rx.priv->wc &&
3207#endif 3213#endif
3208 priv->min == rx.priv->min; 3214 priv->min == rx.priv->min;
3209} 3215}
3210 3216
3211/*! \fn bool QRegExp3::operator!=( const QRegExp& rx ) const 3217/*! \fn bool QRegExp3::operator!=( const QRegExp& rx ) const
3212 3218
3213 Returns TRUE if this regular expression is not equal to \a rx, otherwise 3219 Returns TRUE if this regular expression is not equal to \a rx, otherwise
3214 FALSE. 3220 FALSE.
@@ -3269,14 +3275,14 @@ QString QRegExp3::pattern() const
3269 3275
3270 \sa pattern() 3276 \sa pattern()
3271*/ 3277*/
3272void QRegExp3::setPattern( const QString& pattern ) 3278void QRegExp3::setPattern( const QString& pattern )
3273{ 3279{
3274 if ( priv->pattern != pattern ) { 3280 if ( priv->pattern != pattern ) {
3275 priv->pattern = pattern; 3281 priv->pattern = pattern;
3276 compile( caseSensitive() ); 3282 compile( caseSensitive() );
3277 } 3283 }
3278} 3284}
3279 3285
3280/*! 3286/*!
3281 Returns TRUE if case sensitivity is enabled, otherwise FALSE. The default is 3287 Returns TRUE if case sensitivity is enabled, otherwise FALSE. The default is
3282 TRUE. 3288 TRUE.
@@ -3296,13 +3302,13 @@ bool QRegExp3::caseSensitive() const
3296 3302
3297 \sa caseSensitive() 3303 \sa caseSensitive()
3298*/ 3304*/
3299void QRegExp3::setCaseSensitive( bool sensitive ) 3305void QRegExp3::setCaseSensitive( bool sensitive )
3300{ 3306{
3301 if ( sensitive != eng->caseSensitive() ) 3307 if ( sensitive != eng->caseSensitive() )
3302 compile( sensitive ); 3308 compile( sensitive );
3303} 3309}
3304 3310
3305#ifndef QT_NO_REGEXP_WILDCARD 3311#ifndef QT_NO_REGEXP_WILDCARD
3306/*! 3312/*!
3307 Returns TRUE if wildcard mode is enabled, otherwise FALSE. The default is 3313 Returns TRUE if wildcard mode is enabled, otherwise FALSE. The default is
3308 FALSE. 3314 FALSE.
@@ -3325,14 +3331,14 @@ bool QRegExp3::wildcard() const
3325 3331
3326 \sa wildcard() 3332 \sa wildcard()
3327*/ 3333*/
3328void QRegExp3::setWildcard( bool wildcard ) 3334void QRegExp3::setWildcard( bool wildcard )
3329{ 3335{
3330 if ( wildcard != priv->wc ) { 3336 if ( wildcard != priv->wc ) {
3331 priv->wc = wildcard; 3337 priv->wc = wildcard;
3332 compile( caseSensitive() ); 3338 compile( caseSensitive() );
3333 } 3339 }
3334} 3340}
3335#endif 3341#endif
3336 3342
3337/*! Returns TRUE if minimal (non-greedy) matching is enabled, otherwise 3343/*! Returns TRUE if minimal (non-greedy) matching is enabled, otherwise
3338 returns FALSE. 3344 returns FALSE.
@@ -3388,29 +3394,29 @@ bool QRegExp3::exactMatch( const QString& str )
3388 priv->t = str; 3394 priv->t = str;
3389 priv->capturedCache.clear(); 3395 priv->capturedCache.clear();
3390#endif 3396#endif
3391 3397
3392 priv->captured = eng->match( str, 0, priv->min, TRUE ); 3398 priv->captured = eng->match( str, 0, priv->min, TRUE );
3393 if ( priv->captured[1] == (int) str.length() ) { 3399 if ( priv->captured[1] == (int) str.length() ) {
3394 return TRUE; 3400 return TRUE;
3395 } else { 3401 } else {
3396 priv->captured.detach(); 3402 priv->captured.detach();
3397 priv->captured[0] = 0; 3403 priv->captured[0] = 0;
3398 priv->captured[1] = eng->matchedLength(); 3404 priv->captured[1] = eng->matchedLength();
3399 return FALSE; 3405 return FALSE;
3400 } 3406 }
3401} 3407}
3402 3408
3403/*! \overload 3409/*! \overload
3404 3410
3405 This version does not set matchedLength(), capturedTexts() and friends. 3411 This version does not set matchedLength(), capturedTexts() and friends.
3406*/ 3412*/
3407bool QRegExp3::exactMatch( const QString& str ) const 3413bool QRegExp3::exactMatch( const QString& str ) const
3408{ 3414{
3409 return eng->match(str, 0, priv->min, TRUE)[0] == 0 && 3415 return eng->match(str, 0, priv->min, TRUE)[0] == 0 &&
3410 eng->matchedLength() == (int) str.length(); 3416 eng->matchedLength() == (int) str.length();
3411} 3417}
3412 3418
3413/*! \obsolete 3419/*! \obsolete
3414 3420
3415 Attempts to match in \a str, starting from position \a index. Returns the 3421 Attempts to match in \a str, starting from position \a index. Returns the
3416 position of the match, or -1 if there was no match. 3422 position of the match, or -1 if there was no match.
@@ -3426,35 +3432,35 @@ bool QRegExp3::exactMatch( const QString& str ) const
3426 If you really need the \a indexIsStart functionality, try this: 3432 If you really need the \a indexIsStart functionality, try this:
3427 3433
3428 \code 3434 \code
3429 QRegExp3 rx( "some pattern" ); 3435 QRegExp3 rx( "some pattern" );
3430 int pos = rx.search( str.mid( index ) ); 3436 int pos = rx.search( str.mid( index ) );
3431 if ( pos != -1 ) 3437 if ( pos != -1 )
3432 pos += index; 3438 pos += index;
3433 int len = rx.matchedLength(); 3439 int len = rx.matchedLength();
3434 \endcode 3440 \endcode
3435*/ 3441*/
3436#ifndef QT_NO_COMPAT 3442#ifndef QT_NO_COMPAT
3437int QRegExp3::match( const QString& str, int index, int *len, 3443int QRegExp3::match( const QString& str, int index, int *len,
3438 bool indexIsStart ) 3444 bool indexIsStart )
3439{ 3445{
3440 int pos; 3446 int pos;
3441 if ( indexIsStart ) { 3447 if ( indexIsStart ) {
3442 pos = search( str.mid(index) ); 3448 pos = search( str.mid(index) );
3443 if ( pos >= 0 ) { 3449 if ( pos >= 0 ) {
3444 pos += index; 3450 pos += index;
3445 if ( len != 0 ) 3451 if ( len != 0 )
3446 *len = matchedLength(); 3452 *len = matchedLength();
3447 } else { 3453 } else {
3448 if ( len != 0 ) 3454 if ( len != 0 )
3449 *len = 0; 3455 *len = 0;
3450 } 3456 }
3451 } else { 3457 } else {
3452 pos = search( str, index ); 3458 pos = search( str, index );
3453 if ( len != 0 ) 3459 if ( len != 0 )
3454 *len = matchedLength(); 3460 *len = matchedLength();
3455 } 3461 }
3456 return pos; 3462 return pos;
3457} 3463}
3458#endif 3464#endif
3459 3465
3460/*! 3466/*!
@@ -3471,64 +3477,64 @@ int QRegExp3::match( const QString& str, int index, int *len,
3471 \code 3477 \code
3472 QString str = "offsets: 1.23 .50 71.00 6.00"; 3478 QString str = "offsets: 1.23 .50 71.00 6.00";
3473 QRegExp3 rx( "\\d*\\.\\d+" ); // very simple floating point matching 3479 QRegExp3 rx( "\\d*\\.\\d+" ); // very simple floating point matching
3474 int count = 0; 3480 int count = 0;
3475 int pos = 0; 3481 int pos = 0;
3476 while ( pos >= 0 ) { 3482 while ( pos >= 0 ) {
3477 pos = rx.search( str, pos ); 3483 pos = rx.search( str, pos );
3478 count++; 3484 count++;
3479 } 3485 }
3480 // pos will be 9, 14, 18 and finally 24; count will end up as 4. 3486 // pos will be 9, 14, 18 and finally 24; count will end up as 4.
3481 \endcode 3487 \endcode
3482 3488
3483 \sa searchRev() match() matchedLength() capturedTexts() 3489 \sa searchRev() match() matchedLength() capturedTexts()
3484*/ 3490*/
3485// QChar versions 3491// QChar versions
3486 3492
3487#ifdef QCHAR_SUPPORT 3493#ifdef QCHAR_SUPPORT
3488const QString makeString(const QChar *str) 3494const QString makeString(const QChar *str)
3489{ 3495{
3490// A sentinel value checked in case the QChar *ptr is never null terminated 3496// A sentinel value checked in case the QChar *ptr is never null terminated
3491 const uint MAXLENGTH=65535; 3497 const uint MAXLENGTH=65535;
3492 3498
3493 const QChar *s=str; 3499 const QChar *s=str;
3494 uint i=0; 3500 uint i=0;
3495 while(i < MAXLENGTH && *s != QChar::null) { i++;s++ ;} 3501 while(i < MAXLENGTH && *s != QChar::null) { i++;s++ ;}
3496 return QString(str,i); 3502 return QString(str,i);
3497 3503
3498} 3504}
3499int QRegExp3::search(const QChar *str,int start) 3505int QRegExp3::search(const QChar *str,int start)
3500{ 3506{
3501 return search(makeString(str),start); 3507 return search(makeString(str),start);
3502} 3508}
3503int QRegExp3::search(const QChar *str,int start) const 3509int QRegExp3::search(const QChar *str,int start) const
3504{ 3510{
3505 return search(makeString(str),start); 3511 return search(makeString(str),start);
3506} 3512}
3507int QRegExp3::searchRev(const QChar *str,int start) 3513int QRegExp3::searchRev(const QChar *str,int start)
3508{ 3514{
3509 return searchRev(makeString(str),start); 3515 return searchRev(makeString(str),start);
3510} 3516}
3511int QRegExp3::searchRev(const QChar *str,int start) const 3517int QRegExp3::searchRev(const QChar *str,int start) const
3512{ 3518{
3513 return searchRev(makeString(str),start); 3519 return searchRev(makeString(str),start);
3514} 3520}
3515bool QRegExp3::exactMatch(const QChar *str) 3521bool QRegExp3::exactMatch(const QChar *str)
3516{ 3522{
3517 return exactMatch(makeString(str)); 3523 return exactMatch(makeString(str));
3518} 3524}
3519bool QRegExp3::exactMatch(const QChar *str) const 3525bool QRegExp3::exactMatch(const QChar *str) const
3520{ 3526{
3521 return exactMatch(makeString(str)); 3527 return exactMatch(makeString(str));
3522} 3528}
3523#endif // QCHAR_SUPPORT 3529#endif // QCHAR_SUPPORT
3524 3530
3525int QRegExp3::search( const QString& str, int start ) 3531int QRegExp3::search( const QString& str, int start )
3526{ 3532{
3527 if ( start < 0 ) 3533 if ( start < 0 )
3528 start += str.length(); 3534 start += str.length();
3529#ifndef QT_NO_REGEXP_CAPTURE 3535#ifndef QT_NO_REGEXP_CAPTURE
3530 priv->t = str; 3536 priv->t = str;
3531 priv->capturedCache.clear(); 3537 priv->capturedCache.clear();
3532#endif 3538#endif
3533 priv->captured = eng->match( str, start, priv->min, FALSE ); 3539 priv->captured = eng->match( str, start, priv->min, FALSE );
3534 return priv->captured[0]; 3540 return priv->captured[0];
@@ -3538,13 +3544,13 @@ int QRegExp3::search( const QString& str, int start )
3538 3544
3539 This version does not set matchedLength(), capturedTexts() and friends. 3545 This version does not set matchedLength(), capturedTexts() and friends.
3540*/ 3546*/
3541int QRegExp3::search( const QString& str, int start ) const 3547int QRegExp3::search( const QString& str, int start ) const
3542{ 3548{
3543 if ( start < 0 ) 3549 if ( start < 0 )
3544 start += str.length(); 3550 start += str.length();
3545 return eng->match( str, start, priv->min, FALSE )[0]; 3551 return eng->match( str, start, priv->min, FALSE )[0];
3546} 3552}
3547 3553
3548/*! 3554/*!
3549 Attempts to find a match backwards in \a str from position \a start. If 3555 Attempts to find a match backwards in \a str from position \a start. If
3550 \a start is -1 (the default), the search starts at the last character; if -2, 3556 \a start is -1 (the default), the search starts at the last character; if -2,
@@ -3556,47 +3562,47 @@ int QRegExp3::search( const QString& str, int start ) const
3556 3562
3557 \sa search() matchedLength() capturedTexts() 3563 \sa search() matchedLength() capturedTexts()
3558*/ 3564*/
3559int QRegExp3::searchRev( const QString& str, int start ) 3565int QRegExp3::searchRev( const QString& str, int start )
3560{ 3566{
3561 if ( start < 0 ) 3567 if ( start < 0 )
3562 start += str.length(); 3568 start += str.length();
3563#ifndef QT_NO_REGEXP_CAPTURE 3569#ifndef QT_NO_REGEXP_CAPTURE
3564 priv->t = str; 3570 priv->t = str;
3565 priv->capturedCache.clear(); 3571 priv->capturedCache.clear();
3566#endif 3572#endif
3567 if ( start < 0 || start > (int) str.length() ) { 3573 if ( start < 0 || start > (int) str.length() ) {
3568 priv->captured.detach(); 3574 priv->captured.detach();
3569 priv->captured.fill( -1 ); 3575 priv->captured.fill( -1 );
3570 return -1; 3576 return -1;
3571 } 3577 }
3572 3578
3573 while ( start >= 0 ) { 3579 while ( start >= 0 ) {
3574 priv->captured = eng->match( str, start, priv->min, TRUE ); 3580 priv->captured = eng->match( str, start, priv->min, TRUE );
3575 if ( priv->captured[0] == start ) 3581 if ( priv->captured[0] == start )
3576 return start; 3582 return start;
3577 start--; 3583 start--;
3578 } 3584 }
3579 return -1; 3585 return -1;
3580} 3586}
3581 3587
3582/*! \overload 3588/*! \overload
3583 3589
3584 This version does not set matchedLength(), capturedText() and friends. 3590 This version does not set matchedLength(), capturedText() and friends.
3585*/ 3591*/
3586int QRegExp3::searchRev( const QString& str, int start ) const 3592int QRegExp3::searchRev( const QString& str, int start ) const
3587{ 3593{
3588 if ( start < 0 ) 3594 if ( start < 0 )
3589 start += str.length(); 3595 start += str.length();
3590 if ( start < 0 || start > (int) str.length() ) 3596 if ( start < 0 || start > (int) str.length() )
3591 return -1; 3597 return -1;
3592 3598
3593 while ( start >= 0 ) { 3599 while ( start >= 0 ) {
3594 if ( eng->match(str, start, priv->min, TRUE)[0] == start ) 3600 if ( eng->match(str, start, priv->min, TRUE)[0] == start )
3595 return start; 3601 return start;
3596 start--; 3602 start--;
3597 } 3603 }
3598 return -1; 3604 return -1;
3599} 3605}
3600 3606
3601/*! 3607/*!
3602 Returns the length of the last matched string, or -1 if there was no match. 3608 Returns the length of the last matched string, or -1 if there was no match.
@@ -3653,37 +3659,37 @@ int QRegExp3::matchedLength()
3653 3659
3654 \sa cap() pos() 3660 \sa cap() pos()
3655*/ 3661*/
3656QStringList QRegExp3::capturedTexts() 3662QStringList QRegExp3::capturedTexts()
3657{ 3663{
3658 if ( priv->capturedCache.isEmpty() ) { 3664 if ( priv->capturedCache.isEmpty() ) {
3659 for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) { 3665 for ( int i = 0; i < (int) priv->captured.size(); i += 2 ) {
3660 QString m; 3666 QString m;
3661 if ( priv->captured[i + 1] == 0 ) 3667 if ( priv->captured[i + 1] == 0 )
3662 m = QString::fromLatin1( "" ); 3668 m = QString::fromLatin1( "" );
3663 else if ( priv->captured[i] >= 0 ) 3669 else if ( priv->captured[i] >= 0 )
3664 m = priv->t.mid( priv->captured[i], 3670 m = priv->t.mid( priv->captured[i],
3665 priv->captured[i + 1] ); 3671 priv->captured[i + 1] );
3666 priv->capturedCache.append( m ); 3672 priv->capturedCache.append( m );
3667 } 3673 }
3668 priv->t = QString::null; 3674 priv->t = QString::null;
3669 } 3675 }
3670 return priv->capturedCache; 3676 return priv->capturedCache;
3671} 3677}
3672 3678
3673/*! Returns the text captured by the \a nth subexpression. The entire match 3679/*! Returns the text captured by the \a nth subexpression. The entire match
3674 has index 0 and the parenthesised subexpressions have indices starting 3680 has index 0 and the parenthesised subexpressions have indices starting
3675 from 1 (excluding non-capturing parenthesis). 3681 from 1 (excluding non-capturing parenthesis).
3676 3682
3677 \code 3683 \code
3678 QRegExp3 rxlen( "(\\d+)(?:\\s*)(cm|inch)" ); 3684 QRegExp3 rxlen( "(\\d+)(?:\\s*)(cm|inch)" );
3679 int pos = rxlen.search( "Length: 189cm" ); 3685 int pos = rxlen.search( "Length: 189cm" );
3680 if ( pos > -1 ) { 3686 if ( pos > -1 ) {
3681 QString value = rxlen.cap( 1 );// "189" 3687 QString value = rxlen.cap( 1 ); // "189"
3682 QString unit = rxlen.cap( 2 ); // "cm" 3688 QString unit = rxlen.cap( 2 ); // "cm"
3683 // ... 3689 // ...
3684 } 3690 }
3685 \endcode 3691 \endcode
3686 3692
3687 <a name="cap_in_a_loop"> 3693 <a name="cap_in_a_loop">
3688 Some patterns may lead to a number of matches which cannot be 3694 Some patterns may lead to a number of matches which cannot be
3689 determined in advance, for example:</a> 3695 determined in advance, for example:</a>
@@ -3691,17 +3697,17 @@ QStringList QRegExp3::capturedTexts()
3691 \code 3697 \code
3692 QRegExp3 rx( "(\\d+)" ); 3698 QRegExp3 rx( "(\\d+)" );
3693 str = "Offsets: 12 14 99 231 7"; 3699 str = "Offsets: 12 14 99 231 7";
3694 QStringList list; 3700 QStringList list;
3695 pos = 0; 3701 pos = 0;
3696 while ( pos >= 0 ) { 3702 while ( pos >= 0 ) {
3697 pos = rx.search( str, pos ); 3703 pos = rx.search( str, pos );
3698 if ( pos > -1 ) { 3704 if ( pos > -1 ) {
3699 list += rx.cap( 1 ); 3705 list += rx.cap( 1 );
3700 pos += rx.matchedLength(); 3706 pos += rx.matchedLength();
3701 } 3707 }
3702 } 3708 }
3703 // list contains: ( "12", "14", "99", "231", "7" ). 3709 // list contains: ( "12", "14", "99", "231", "7" ).
3704 \endcode 3710 \endcode
3705 3711
3706 The order of elements matched by cap() is as follows. The first 3712 The order of elements matched by cap() is as follows. The first
3707 element, cap( 0 ), is the entire matching string. Each subsequent 3713 element, cap( 0 ), is the entire matching string. Each subsequent
@@ -3711,25 +3717,25 @@ QStringList QRegExp3::capturedTexts()
3711 3717
3712 \sa search() pos() capturedTexts() 3718 \sa search() pos() capturedTexts()
3713*/ 3719*/
3714QString QRegExp3::cap( int nth ) 3720QString QRegExp3::cap( int nth )
3715{ 3721{
3716 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 ) 3722 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
3717 return QString::null; 3723 return QString::null;
3718 else 3724 else
3719 return capturedTexts()[nth]; 3725 return capturedTexts()[nth];
3720} 3726}
3721 3727
3722/*! Returns the position of the \a nth captured text in the 3728/*! Returns the position of the \a nth captured text in the
3723 searched string. If \a nth is 0 (the default), pos() returns the 3729 searched string. If \a nth is 0 (the default), pos() returns the
3724 position of the whole match. 3730 position of the whole match.
3725 3731
3726 Example: 3732 Example:
3727 \code 3733 \code
3728 QRegExp3 rx( "/([a-z]+)/([a-z]+)" ); 3734 QRegExp3 rx( "/([a-z]+)/([a-z]+)" );
3729 rx.search( "Output /dev/null" );// Returns 7 (position of /dev/null) 3735 rx.search( "Output /dev/null" ); // Returns 7 (position of /dev/null)
3730 rx.pos( 0 ); // Returns 7 (position of /dev/null) 3736 rx.pos( 0 ); // Returns 7 (position of /dev/null)
3731 rx.pos( 1 ); // Returns 8 (position of dev) 3737 rx.pos( 1 ); // Returns 8 (position of dev)
3732 rx.pos( 2 ); // Returns 12 (position of null) 3738 rx.pos( 2 ); // Returns 12 (position of null)
3733 \endcode 3739 \endcode
3734 3740
3735 Note that pos() returns -1 for zero-length matches. (For example, if 3741 Note that pos() returns -1 for zero-length matches. (For example, if
@@ -3738,28 +3744,28 @@ QString QRegExp3::cap( int nth )
3738 3744
3739 \sa capturedTexts() cap() 3745 \sa capturedTexts() cap()
3740*/ 3746*/
3741int QRegExp3::pos( int nth ) 3747int QRegExp3::pos( int nth )
3742{ 3748{
3743 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 ) 3749 if ( nth < 0 || nth >= (int) priv->captured.size() / 2 )
3744 return -1; 3750 return -1;
3745 else 3751 else
3746 return priv->captured[2 * nth]; 3752 return priv->captured[2 * nth];
3747} 3753}
3748#endif 3754#endif
3749 3755
3750void QRegExp3::compile( bool caseSensitive ) 3756void QRegExp3::compile( bool caseSensitive )
3751{ 3757{
3752 derefEngine( eng, priv->rxpattern ); 3758 derefEngine( eng, priv->rxpattern );
3753#ifndef QT_NO_REGEXP_WILDCARD 3759#ifndef QT_NO_REGEXP_WILDCARD
3754 if ( priv->wc ) 3760 if ( priv->wc )
3755 priv->rxpattern = wc2rx( priv->pattern ); 3761 priv->rxpattern = wc2rx( priv->pattern );
3756 else 3762 else
3757#endif 3763#endif
3758 priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" ) 3764 priv->rxpattern = priv->pattern.isNull() ? QString::fromLatin1( "" )
3759 : priv->pattern; 3765 : priv->pattern;
3760 eng = newEngine( priv->rxpattern, caseSensitive ); 3766 eng = newEngine( priv->rxpattern, caseSensitive );
3761#ifndef QT_NO_REGEXP_CAPTURE 3767#ifndef QT_NO_REGEXP_CAPTURE
3762 priv->t = QString::null; 3768 priv->t = QString::null;
3763 priv->capturedCache.clear(); 3769 priv->capturedCache.clear();
3764#endif 3770#endif
3765 priv->captured.detach(); 3771 priv->captured.detach();