1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp
index 500efed..0c1f060 100644
--- a/qmake/tools/qregexp.cpp
+++ b/qmake/tools/qregexp.cpp
@@ -254,33 +254,33 @@
     \row \i <b>\\t</b>
 	 \i This matches the ASCII horizontal tab character (HT, 0x09).
     \row \i <b>\\v</b>
 	 \i This matches the ASCII vertical tab character (VT, 0x0B).
     \row \i <b>\\xhhhh</b>
 	 \i This matches the Unicode character corresponding to the
 	 hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo
 	 (i.e., \zero ooo) matches the ASCII/Latin-1 character
 	 corresponding to the octal number ooo (between 0 and 0377).
     \row \i <b>. (dot)</b>
 	 \i This matches any character (including newline).
     \row \i <b>\\d</b>
-	 \i This matches a digit (see QChar::isDigit()).
+	 \i This matches a digit (QChar::isDigit()).
     \row \i <b>\\D</b>
 	 \i This matches a non-digit.
     \row \i <b>\\s</b>
-	 \i This matches a whitespace (see QChar::isSpace()).
+	 \i This matches a whitespace (QChar::isSpace()).
     \row \i <b>\\S</b>
 	 \i This matches a non-whitespace.
     \row \i <b>\\w</b>
-	 \i This matches a word character (see QChar::isLetterOrNumber()).
+	 \i This matches a word character (QChar::isLetterOrNumber() or '_').
     \row \i <b>\\W</b>
 	 \i This matches a non-word character.
     \row \i <b>\\n</b>
 	 \i The n-th \link #capturing-text backreference \endlink,
 	 e.g. \1, \2, etc.
     \endtable
 
     \e {Note that the C++ compiler transforms backslashes in strings
     so to include a <b>\\</b> in a regexp you will need to enter it
     twice, i.e. <b>\\\\</b>.}
 
     \target sets-of-characters
@@ -538,25 +538,32 @@
     Because QRegExp is string oriented there are no \A, \Z or \z
     assertions. The \G assertion is not supported but can be emulated
     in a loop.
 
     Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
     equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
     ... correspond to cap(1) or capturedTexts()[1], cap(2) or
     capturedTexts()[2], etc.
 
     To substitute a pattern use QString::replace().
 
     Perl's extended \c{/x} syntax is not supported, nor are
-    regexp comments (?#comment) or directives, e.g. (?i).
+    directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
+    the other hand, C++'s rules for literal strings can be used to
+    achieve the same:
+    \code
+    QRegExp mark( "\\b" // word boundary
+		  "[Mm]ark" // the word we want to match
+		);
+    \endcode
 
     Both zero-width positive and zero-width negative lookahead
     assertions (?=pattern) and (?!pattern) are supported with the same
     syntax as Perl. Perl's lookbehind assertions, "independent"
     subexpressions and conditional expressions are not supported.
 
     Non-capturing parentheses are also supported, with the same
     (?:pattern) syntax.
 
     See QStringList::split() and QStringList::join() for equivalents
     to Perl's split and join functions.
 
@@ -668,29 +675,29 @@
     QStringList split() function can take a separator string or regexp
     as an argument and split a string accordingly.
 
     \code
     QStringList field = QStringList::split( "\t", str );
     \endcode
 
     Here field[0] is the company, field[1] the web address and so on.
 
     To imitate the matching of a shell we can use wildcard mode.
 
     \code
-    QRegExp rx( "*.html" );     // invalid regexp: * doesn't quantify anything
-    rx.setWildcard( TRUE );     // now it's a valid wildcard regexp
-    rx.search( "index.html" );  // returns 0 (matched at position 0)
-    rx.search( "default.htm" ); // returns -1 (no match)
-    rx.search( "readme.txt" );  // returns -1 (no match)
+    QRegExp rx( "*.html" );         // invalid regexp: * doesn't quantify anything
+    rx.setWildcard( TRUE );         // now it's a valid wildcard regexp
+    rx.exactMatch( "index.html" );  // returns TRUE
+    rx.exactMatch( "default.htm" ); // returns FALSE
+    rx.exactMatch( "readme.txt" );  // returns FALSE
     \endcode
 
     Wildcard matching can be convenient because of its simplicity, but
     any wildcard regexp can be defined using full regexps, e.g.
     <b>.*\.html$</b>. Notice that we can't match both \c .html and \c
     .htm files with a wildcard unless we use <b>*.htm*</b> which will
     also match 'test.html.bak'. A full regexp gives us the precision
     we need, <b>.*\\.html?$</b>.
 
     QRegExp can match case insensitively using setCaseSensitive(), and
     can use non-greedy matching, see setMinimal(). By default QRegExp
     uses full regexps but this can be changed with setWildcard().
@@ -706,24 +713,29 @@
     \target member-function-documentation
 */
 
 const int NumBadChars = 64;
 #define BadChar( ch ) ( (ch).unicode() % NumBadChars )
 
 const int NoOccurrence = INT_MAX;
 const int EmptyCapture = INT_MAX;
 const int InftyLen = INT_MAX;
 const int InftyRep = 1025;
 const int EOS = -1;
 
+static bool isWord( QChar ch )
+{
+    return ch.isLetterOrNumber() || ch == QChar( '_' );
+}
+
 /*
   Merges two QMemArrays of ints and puts the result into the first one.
 */
 static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
 {
     int asize = a->size();
     int bsize = b.size();
     if ( asize == 0 ) {
 	*a = b.copy();
 #ifndef QT_NO_REGEXP_OPTIM
     } else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
 	a->resize( asize + 1 );
@@ -1671,27 +1683,27 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
 	if ( mmPos + i != mmCaretPos )
 	    return FALSE;
     }
     if ( (a & Anchor_Dollar) != 0 ) {
 	if ( mmPos + i != mmLen )
 	    return FALSE;
     }
 #ifndef QT_NO_REGEXP_ESCAPE
     if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
 	bool before = FALSE;
 	bool after = FALSE;
 	if ( mmPos + i != 0 )
-	    before = mmIn[mmPos + i - 1].isLetterOrNumber();
+	    before = isWord( mmIn[mmPos + i - 1] );
 	if ( mmPos + i != mmLen )
-	    after = mmIn[mmPos + i].isLetterOrNumber();
+	    after = isWord( mmIn[mmPos + i] );
 	if ( (a & Anchor_Word) != 0 && (before == after) )
 	    return FALSE;
 	if ( (a & Anchor_NonWord) != 0 && (before != after) )
 	    return FALSE;
     }
 #endif
 #ifndef QT_NO_REGEXP_LOOKAHEAD
     bool catchx = TRUE;
 
     if ( (a & Anchor_LookaheadMask) != 0 ) {
 	QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
 					   mmLen - mmPos - i );
@@ -2623,44 +2635,52 @@ int QRegExpEngine::getEscape()
 	// see QChar::isDigit()
 	yyCharClass->addCategories( 0x7fffffef );
 	return Tok_CharClass;
     case 'S':
 	// see QChar::isSpace()
 	yyCharClass->addCategories( 0x7ffff87f );
 	yyCharClass->addRange( 0x0000, 0x0008 );
 	yyCharClass->addRange( 0x000e, 0x001f );
 	yyCharClass->addRange( 0x007f, 0x009f );
 	return Tok_CharClass;
     case 'W':
 	// see QChar::isLetterOrNumber()
-	yyCharClass->addCategories( 0x7ff07f8f );
+	yyCharClass->addCategories( 0x7fe07f8f );
+	yyCharClass->addRange( 0x203f, 0x2040 );
+	yyCharClass->addSingleton( 0x2040 );
+	yyCharClass->addSingleton( 0x30fb );
+	yyCharClass->addRange( 0xfe33, 0xfe34 );
+	yyCharClass->addRange( 0xfe4d, 0xfe4f );
+	yyCharClass->addSingleton( 0xff3f );
+	yyCharClass->addSingleton( 0xff65 );
 	return Tok_CharClass;
 #endif
 #ifndef QT_NO_REGEXP_ESCAPE
     case 'b':
 	return Tok_Word;
 #endif
 #ifndef QT_NO_REGEXP_CCLASS
     case 'd':
 	// see QChar::isDigit()
 	yyCharClass->addCategories( 0x00000010 );
 	return Tok_CharClass;
     case 's':
 	// see QChar::isSpace()
 	yyCharClass->addCategories( 0x00000380 );
 	yyCharClass->addRange( 0x0009, 0x000d );
 	return Tok_CharClass;
     case 'w':
 	// see QChar::isLetterOrNumber()
 	yyCharClass->addCategories( 0x000f8070 );
+	yyCharClass->addSingleton( 0x005f ); // '_'
 	return Tok_CharClass;
 #endif
 #ifndef QT_NO_REGEXP_ESCAPE
     case 'x':
 	val = 0;
 	for ( i = 0; i < 4; i++ ) {
 	    low = QChar( yyCh ).lower();
 	    if ( low >= '0' && low <= '9' )
 		val = ( val << 4 ) | ( low - '0' );
 	    else if ( low >= 'a' && low <= 'f' )
 		val = ( val << 4 ) | ( low - 'a' + 10 );
 	    else
@@ -3174,45 +3194,47 @@ struct QRegExpPrivate
 };
 
 #ifndef QT_NO_REGEXP_OPTIM
 static QCache<QRegExpEngine> *engineCache = 0;
 static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
 #endif
 
 static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
 {
 #ifndef QT_NO_REGEXP_OPTIM
     if ( engineCache != 0 ) {
 #ifdef QT_THREAD_SUPPORT
-	QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+	QMutexLocker locker( qt_global_mutexpool ?
+			     qt_global_mutexpool->get( &engineCache ) : 0 );
 #endif
 	QRegExpEngine *eng = engineCache->take( pattern );
 	if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
 	    delete eng;
 	} else {
 	    eng->ref();
 	    return eng;
 	}
     }
 #endif
     return new QRegExpEngine( pattern, caseSensitive );
 }
 
 static void derefEngine( QRegExpEngine *eng, const QString& pattern )
 {
-    if ( eng != 0 && eng->deref() ) {
-#ifndef QT_NO_REGEXP_OPTIM
 #ifdef QT_THREAD_SUPPORT
-	QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+    QMutexLocker locker( qt_global_mutexpool ?
+			 qt_global_mutexpool->get( &engineCache ) : 0 );
 #endif
+    if ( eng != 0 && eng->deref() ) {
+#ifndef QT_NO_REGEXP_OPTIM
 	if ( engineCache == 0 ) {
 	    engineCache = new QCache<QRegExpEngine>;
 	    engineCache->setAutoDelete( TRUE );
 	    cleanup_cache.set( &engineCache );
 	}
 	if ( !pattern.isNull() &&
 	     engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
 	    return;
 #else
 	Q_UNUSED( pattern );
 #endif
 	delete eng;
@@ -3556,31 +3578,24 @@ bool QRegExp::exactMatch( const QString& str ) const
   \sa QString::mid() QConstString
 */
 int QRegExp::match( const QString& str, int index, int *len,
 		    bool indexIsStart ) const
 {
     int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
     if ( len != 0 )
 	*len = matchedLength();
     return pos;
 }
 #endif // QT_NO_COMPAT
 
-/*!
-    \overload
-
-    This convenience function searches with a \c CaretMode of \c
-    CaretAtZero which is the most common usage.
-*/
-
 int QRegExp::search( const QString& str, int offset ) const
 {
     return search( str, offset, CaretAtZero );
 }
 
 /*!
     Attempts to find a match in \a str from position \a offset (0 by
     default). If \a offset is -1, the search starts at the last
     character; if -2, at the next to last character; etc.
 
     Returns the position of the first match, or -1 if there was no
     match.
@@ -3616,31 +3631,24 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
     if ( offset < 0 )
 	offset += str.length();
 #ifndef QT_NO_REGEXP_CAPTURE
     priv->t = str;
     priv->capturedCache.clear();
 #endif
     priv->captured = eng->match( str, offset, priv->min, FALSE,
 				 caretIndex(offset, caretMode) );
     return priv->captured[0];
 }
 
 
-/*!
-    \overload
-
-    This convenience function searches with a \c CaretMode of \c
-    CaretAtZero which is the most common usage.
-*/
-
 int QRegExp::searchRev( const QString& str, int offset ) const
 {
     return searchRev( str, offset, CaretAtZero );
 }
 
 /*!
     Attempts to find a match backwards in \a str from position \a
     offset. If \a offset is -1 (the default), the search starts at the
     last character; if -2, at the next to last character; etc.
 
     Returns the position of the first match, or -1 if there was no
     match.
@@ -3685,25 +3693,25 @@ int QRegExp::searchRev( const QString& str, int offset,
 /*!
     Returns the length of the last matched string, or -1 if there was
     no match.
 
     \sa exactMatch() search() searchRev()
 */
 int QRegExp::matchedLength() const
 {
     return priv->captured[1];
 }
 
 #ifndef QT_NO_REGEXP_CAPTURE
-/*! 
+/*!
   Returns the number of captures contained in the regular expression.
  */
 int QRegExp::numCaptures() const
 {
     return eng->numCaptures();
 }
 
 
 
 /*!
     Returns a list of the captured text strings.