summaryrefslogtreecommitdiff
path: root/qmake/tools/qregexp.cpp
Side-by-side diff
Diffstat (limited to 'qmake/tools/qregexp.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--qmake/tools/qregexp.cpp70
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp
index 500efed..0c1f060 100644
--- a/qmake/tools/qregexp.cpp
+++ b/qmake/tools/qregexp.cpp
@@ -260,21 +260,21 @@
hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo
(i.e., \zero ooo) matches the ASCII/Latin-1 character
corresponding to the octal number ooo (between 0 and 0377).
\row \i <b>. (dot)</b>
\i This matches any character (including newline).
\row \i <b>\\d</b>
- \i This matches a digit (see QChar::isDigit()).
+ \i This matches a digit (QChar::isDigit()).
\row \i <b>\\D</b>
\i This matches a non-digit.
\row \i <b>\\s</b>
- \i This matches a whitespace (see QChar::isSpace()).
+ \i This matches a whitespace (QChar::isSpace()).
\row \i <b>\\S</b>
\i This matches a non-whitespace.
\row \i <b>\\w</b>
- \i This matches a word character (see QChar::isLetterOrNumber()).
+ \i This matches a word character (QChar::isLetterOrNumber() or '_').
\row \i <b>\\W</b>
\i This matches a non-word character.
\row \i <b>\\n</b>
\i The n-th \link #capturing-text backreference \endlink,
e.g. \1, \2, etc.
\endtable
@@ -544,13 +544,20 @@
... correspond to cap(1) or capturedTexts()[1], cap(2) or
capturedTexts()[2], etc.
To substitute a pattern use QString::replace().
Perl's extended \c{/x} syntax is not supported, nor are
- regexp comments (?#comment) or directives, e.g. (?i).
+ directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
+ the other hand, C++'s rules for literal strings can be used to
+ achieve the same:
+ \code
+ QRegExp mark( "\\b" // word boundary
+ "[Mm]ark" // the word we want to match
+ );
+ \endcode
Both zero-width positive and zero-width negative lookahead
assertions (?=pattern) and (?!pattern) are supported with the same
syntax as Perl. Perl's lookbehind assertions, "independent"
subexpressions and conditional expressions are not supported.
@@ -674,17 +681,17 @@
Here field[0] is the company, field[1] the web address and so on.
To imitate the matching of a shell we can use wildcard mode.
\code
- QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
- rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
- rx.search( "index.html" ); // returns 0 (matched at position 0)
- rx.search( "default.htm" ); // returns -1 (no match)
- rx.search( "readme.txt" ); // returns -1 (no match)
+ QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
+ rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
+ rx.exactMatch( "index.html" ); // returns TRUE
+ rx.exactMatch( "default.htm" ); // returns FALSE
+ rx.exactMatch( "readme.txt" ); // returns FALSE
\endcode
Wildcard matching can be convenient because of its simplicity, but
any wildcard regexp can be defined using full regexps, e.g.
<b>.*\.html$</b>. Notice that we can't match both \c .html and \c
.htm files with a wildcard unless we use <b>*.htm*</b> which will
@@ -712,12 +719,17 @@ const int NumBadChars = 64;
const int NoOccurrence = INT_MAX;
const int EmptyCapture = INT_MAX;
const int InftyLen = INT_MAX;
const int InftyRep = 1025;
const int EOS = -1;
+static bool isWord( QChar ch )
+{
+ return ch.isLetterOrNumber() || ch == QChar( '_' );
+}
+
/*
Merges two QMemArrays of ints and puts the result into the first one.
*/
static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
{
int asize = a->size();
@@ -1677,15 +1689,15 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
}
#ifndef QT_NO_REGEXP_ESCAPE
if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
bool before = FALSE;
bool after = FALSE;
if ( mmPos + i != 0 )
- before = mmIn[mmPos + i - 1].isLetterOrNumber();
+ before = isWord( mmIn[mmPos + i - 1] );
if ( mmPos + i != mmLen )
- after = mmIn[mmPos + i].isLetterOrNumber();
+ after = isWord( mmIn[mmPos + i] );
if ( (a & Anchor_Word) != 0 && (before == after) )
return FALSE;
if ( (a & Anchor_NonWord) != 0 && (before != after) )
return FALSE;
}
#endif
@@ -2629,13 +2641,20 @@ int QRegExpEngine::getEscape()
yyCharClass->addRange( 0x0000, 0x0008 );
yyCharClass->addRange( 0x000e, 0x001f );
yyCharClass->addRange( 0x007f, 0x009f );
return Tok_CharClass;
case 'W':
// see QChar::isLetterOrNumber()
- yyCharClass->addCategories( 0x7ff07f8f );
+ yyCharClass->addCategories( 0x7fe07f8f );
+ yyCharClass->addRange( 0x203f, 0x2040 );
+ yyCharClass->addSingleton( 0x2040 );
+ yyCharClass->addSingleton( 0x30fb );
+ yyCharClass->addRange( 0xfe33, 0xfe34 );
+ yyCharClass->addRange( 0xfe4d, 0xfe4f );
+ yyCharClass->addSingleton( 0xff3f );
+ yyCharClass->addSingleton( 0xff65 );
return Tok_CharClass;
#endif
#ifndef QT_NO_REGEXP_ESCAPE
case 'b':
return Tok_Word;
#endif
@@ -2649,12 +2668,13 @@ int QRegExpEngine::getEscape()
yyCharClass->addCategories( 0x00000380 );
yyCharClass->addRange( 0x0009, 0x000d );
return Tok_CharClass;
case 'w':
// see QChar::isLetterOrNumber()
yyCharClass->addCategories( 0x000f8070 );
+ yyCharClass->addSingleton( 0x005f ); // '_'
return Tok_CharClass;
#endif
#ifndef QT_NO_REGEXP_ESCAPE
case 'x':
val = 0;
for ( i = 0; i < 4; i++ ) {
@@ -3180,13 +3200,14 @@ static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
{
#ifndef QT_NO_REGEXP_OPTIM
if ( engineCache != 0 ) {
#ifdef QT_THREAD_SUPPORT
- QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+ QMutexLocker locker( qt_global_mutexpool ?
+ qt_global_mutexpool->get( &engineCache ) : 0 );
#endif
QRegExpEngine *eng = engineCache->take( pattern );
if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
delete eng;
} else {
eng->ref();
@@ -3196,17 +3217,18 @@ static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
#endif
return new QRegExpEngine( pattern, caseSensitive );
}
static void derefEngine( QRegExpEngine *eng, const QString& pattern )
{
- if ( eng != 0 && eng->deref() ) {
-#ifndef QT_NO_REGEXP_OPTIM
#ifdef QT_THREAD_SUPPORT
- QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+ QMutexLocker locker( qt_global_mutexpool ?
+ qt_global_mutexpool->get( &engineCache ) : 0 );
#endif
+ if ( eng != 0 && eng->deref() ) {
+#ifndef QT_NO_REGEXP_OPTIM
if ( engineCache == 0 ) {
engineCache = new QCache<QRegExpEngine>;
engineCache->setAutoDelete( TRUE );
cleanup_cache.set( &engineCache );
}
if ( !pattern.isNull() &&
@@ -3562,19 +3584,12 @@ int QRegExp::match( const QString& str, int index, int *len,
if ( len != 0 )
*len = matchedLength();
return pos;
}
#endif // QT_NO_COMPAT
-/*!
- \overload
-
- This convenience function searches with a \c CaretMode of \c
- CaretAtZero which is the most common usage.
-*/
-
int QRegExp::search( const QString& str, int offset ) const
{
return search( str, offset, CaretAtZero );
}
/*!
@@ -3622,19 +3637,12 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
priv->captured = eng->match( str, offset, priv->min, FALSE,
caretIndex(offset, caretMode) );
return priv->captured[0];
}
-/*!
- \overload
-
- This convenience function searches with a \c CaretMode of \c
- CaretAtZero which is the most common usage.
-*/
-
int QRegExp::searchRev( const QString& str, int offset ) const
{
return searchRev( str, offset, CaretAtZero );
}
/*!
@@ -3691,13 +3699,13 @@ int QRegExp::searchRev( const QString& str, int offset,
int QRegExp::matchedLength() const
{
return priv->captured[1];
}
#ifndef QT_NO_REGEXP_CAPTURE
-/*!
+/*!
Returns the number of captures contained in the regular expression.
*/
int QRegExp::numCaptures() const
{
return eng->numCaptures();
}