summaryrefslogtreecommitdiff
path: root/qmake/tools/qregexp.cpp
Side-by-side diff
Diffstat (limited to 'qmake/tools/qregexp.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--qmake/tools/qregexp.cpp70
1 files changed, 39 insertions, 31 deletions
diff --git a/qmake/tools/qregexp.cpp b/qmake/tools/qregexp.cpp
index 500efed..0c1f060 100644
--- a/qmake/tools/qregexp.cpp
+++ b/qmake/tools/qregexp.cpp
@@ -254,33 +254,33 @@
\row \i <b>\\t</b>
\i This matches the ASCII horizontal tab character (HT, 0x09).
\row \i <b>\\v</b>
\i This matches the ASCII vertical tab character (VT, 0x0B).
\row \i <b>\\xhhhh</b>
\i This matches the Unicode character corresponding to the
hexadecimal number hhhh (between 0x0000 and 0xFFFF). \0ooo
(i.e., \zero ooo) matches the ASCII/Latin-1 character
corresponding to the octal number ooo (between 0 and 0377).
\row \i <b>. (dot)</b>
\i This matches any character (including newline).
\row \i <b>\\d</b>
- \i This matches a digit (see QChar::isDigit()).
+ \i This matches a digit (QChar::isDigit()).
\row \i <b>\\D</b>
\i This matches a non-digit.
\row \i <b>\\s</b>
- \i This matches a whitespace (see QChar::isSpace()).
+ \i This matches a whitespace (QChar::isSpace()).
\row \i <b>\\S</b>
\i This matches a non-whitespace.
\row \i <b>\\w</b>
- \i This matches a word character (see QChar::isLetterOrNumber()).
+ \i This matches a word character (QChar::isLetterOrNumber() or '_').
\row \i <b>\\W</b>
\i This matches a non-word character.
\row \i <b>\\n</b>
\i The n-th \link #capturing-text backreference \endlink,
e.g. \1, \2, etc.
\endtable
\e {Note that the C++ compiler transforms backslashes in strings
so to include a <b>\\</b> in a regexp you will need to enter it
twice, i.e. <b>\\\\</b>.}
\target sets-of-characters
@@ -538,25 +538,32 @@
Because QRegExp is string oriented there are no \A, \Z or \z
assertions. The \G assertion is not supported but can be emulated
in a loop.
Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
... correspond to cap(1) or capturedTexts()[1], cap(2) or
capturedTexts()[2], etc.
To substitute a pattern use QString::replace().
Perl's extended \c{/x} syntax is not supported, nor are
- regexp comments (?#comment) or directives, e.g. (?i).
+ directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
+ the other hand, C++'s rules for literal strings can be used to
+ achieve the same:
+ \code
+ QRegExp mark( "\\b" // word boundary
+ "[Mm]ark" // the word we want to match
+ );
+ \endcode
Both zero-width positive and zero-width negative lookahead
assertions (?=pattern) and (?!pattern) are supported with the same
syntax as Perl. Perl's lookbehind assertions, "independent"
subexpressions and conditional expressions are not supported.
Non-capturing parentheses are also supported, with the same
(?:pattern) syntax.
See QStringList::split() and QStringList::join() for equivalents
to Perl's split and join functions.
@@ -668,29 +675,29 @@
QStringList split() function can take a separator string or regexp
as an argument and split a string accordingly.
\code
QStringList field = QStringList::split( "\t", str );
\endcode
Here field[0] is the company, field[1] the web address and so on.
To imitate the matching of a shell we can use wildcard mode.
\code
- QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
- rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
- rx.search( "index.html" ); // returns 0 (matched at position 0)
- rx.search( "default.htm" ); // returns -1 (no match)
- rx.search( "readme.txt" ); // returns -1 (no match)
+ QRegExp rx( "*.html" ); // invalid regexp: * doesn't quantify anything
+ rx.setWildcard( TRUE ); // now it's a valid wildcard regexp
+ rx.exactMatch( "index.html" ); // returns TRUE
+ rx.exactMatch( "default.htm" ); // returns FALSE
+ rx.exactMatch( "readme.txt" ); // returns FALSE
\endcode
Wildcard matching can be convenient because of its simplicity, but
any wildcard regexp can be defined using full regexps, e.g.
<b>.*\.html$</b>. Notice that we can't match both \c .html and \c
.htm files with a wildcard unless we use <b>*.htm*</b> which will
also match 'test.html.bak'. A full regexp gives us the precision
we need, <b>.*\\.html?$</b>.
QRegExp can match case insensitively using setCaseSensitive(), and
can use non-greedy matching, see setMinimal(). By default QRegExp
uses full regexps but this can be changed with setWildcard().
@@ -706,24 +713,29 @@
\target member-function-documentation
*/
const int NumBadChars = 64;
#define BadChar( ch ) ( (ch).unicode() % NumBadChars )
const int NoOccurrence = INT_MAX;
const int EmptyCapture = INT_MAX;
const int InftyLen = INT_MAX;
const int InftyRep = 1025;
const int EOS = -1;
+static bool isWord( QChar ch )
+{
+ return ch.isLetterOrNumber() || ch == QChar( '_' );
+}
+
/*
Merges two QMemArrays of ints and puts the result into the first one.
*/
static void mergeInto( QMemArray<int> *a, const QMemArray<int>& b )
{
int asize = a->size();
int bsize = b.size();
if ( asize == 0 ) {
*a = b.copy();
#ifndef QT_NO_REGEXP_OPTIM
} else if ( bsize == 1 && (*a)[asize - 1] < b[0] ) {
a->resize( asize + 1 );
@@ -1671,27 +1683,27 @@ bool QRegExpEngine::testAnchor( int i, int a, const int *capBegin )
if ( mmPos + i != mmCaretPos )
return FALSE;
}
if ( (a & Anchor_Dollar) != 0 ) {
if ( mmPos + i != mmLen )
return FALSE;
}
#ifndef QT_NO_REGEXP_ESCAPE
if ( (a & (Anchor_Word | Anchor_NonWord)) != 0 ) {
bool before = FALSE;
bool after = FALSE;
if ( mmPos + i != 0 )
- before = mmIn[mmPos + i - 1].isLetterOrNumber();
+ before = isWord( mmIn[mmPos + i - 1] );
if ( mmPos + i != mmLen )
- after = mmIn[mmPos + i].isLetterOrNumber();
+ after = isWord( mmIn[mmPos + i] );
if ( (a & Anchor_Word) != 0 && (before == after) )
return FALSE;
if ( (a & Anchor_NonWord) != 0 && (before != after) )
return FALSE;
}
#endif
#ifndef QT_NO_REGEXP_LOOKAHEAD
bool catchx = TRUE;
if ( (a & Anchor_LookaheadMask) != 0 ) {
QConstString cstr = QConstString( (QChar *) mmIn + mmPos + i,
mmLen - mmPos - i );
@@ -2623,44 +2635,52 @@ int QRegExpEngine::getEscape()
// see QChar::isDigit()
yyCharClass->addCategories( 0x7fffffef );
return Tok_CharClass;
case 'S':
// see QChar::isSpace()
yyCharClass->addCategories( 0x7ffff87f );
yyCharClass->addRange( 0x0000, 0x0008 );
yyCharClass->addRange( 0x000e, 0x001f );
yyCharClass->addRange( 0x007f, 0x009f );
return Tok_CharClass;
case 'W':
// see QChar::isLetterOrNumber()
- yyCharClass->addCategories( 0x7ff07f8f );
+ yyCharClass->addCategories( 0x7fe07f8f );
+ yyCharClass->addRange( 0x203f, 0x2040 );
+ yyCharClass->addSingleton( 0x2040 );
+ yyCharClass->addSingleton( 0x30fb );
+ yyCharClass->addRange( 0xfe33, 0xfe34 );
+ yyCharClass->addRange( 0xfe4d, 0xfe4f );
+ yyCharClass->addSingleton( 0xff3f );
+ yyCharClass->addSingleton( 0xff65 );
return Tok_CharClass;
#endif
#ifndef QT_NO_REGEXP_ESCAPE
case 'b':
return Tok_Word;
#endif
#ifndef QT_NO_REGEXP_CCLASS
case 'd':
// see QChar::isDigit()
yyCharClass->addCategories( 0x00000010 );
return Tok_CharClass;
case 's':
// see QChar::isSpace()
yyCharClass->addCategories( 0x00000380 );
yyCharClass->addRange( 0x0009, 0x000d );
return Tok_CharClass;
case 'w':
// see QChar::isLetterOrNumber()
yyCharClass->addCategories( 0x000f8070 );
+ yyCharClass->addSingleton( 0x005f ); // '_'
return Tok_CharClass;
#endif
#ifndef QT_NO_REGEXP_ESCAPE
case 'x':
val = 0;
for ( i = 0; i < 4; i++ ) {
low = QChar( yyCh ).lower();
if ( low >= '0' && low <= '9' )
val = ( val << 4 ) | ( low - '0' );
else if ( low >= 'a' && low <= 'f' )
val = ( val << 4 ) | ( low - 'a' + 10 );
else
@@ -3174,45 +3194,47 @@ struct QRegExpPrivate
};
#ifndef QT_NO_REGEXP_OPTIM
static QCache<QRegExpEngine> *engineCache = 0;
static QSingleCleanupHandler<QCache<QRegExpEngine> > cleanup_cache;
#endif
static QRegExpEngine *newEngine( const QString& pattern, bool caseSensitive )
{
#ifndef QT_NO_REGEXP_OPTIM
if ( engineCache != 0 ) {
#ifdef QT_THREAD_SUPPORT
- QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+ QMutexLocker locker( qt_global_mutexpool ?
+ qt_global_mutexpool->get( &engineCache ) : 0 );
#endif
QRegExpEngine *eng = engineCache->take( pattern );
if ( eng == 0 || eng->caseSensitive() != caseSensitive ) {
delete eng;
} else {
eng->ref();
return eng;
}
}
#endif
return new QRegExpEngine( pattern, caseSensitive );
}
static void derefEngine( QRegExpEngine *eng, const QString& pattern )
{
- if ( eng != 0 && eng->deref() ) {
-#ifndef QT_NO_REGEXP_OPTIM
#ifdef QT_THREAD_SUPPORT
- QMutexLocker locker( qt_global_mutexpool->get( &engineCache ) );
+ QMutexLocker locker( qt_global_mutexpool ?
+ qt_global_mutexpool->get( &engineCache ) : 0 );
#endif
+ if ( eng != 0 && eng->deref() ) {
+#ifndef QT_NO_REGEXP_OPTIM
if ( engineCache == 0 ) {
engineCache = new QCache<QRegExpEngine>;
engineCache->setAutoDelete( TRUE );
cleanup_cache.set( &engineCache );
}
if ( !pattern.isNull() &&
engineCache->insert(pattern, eng, 4 + pattern.length() / 4) )
return;
#else
Q_UNUSED( pattern );
#endif
delete eng;
@@ -3556,31 +3578,24 @@ bool QRegExp::exactMatch( const QString& str ) const
\sa QString::mid() QConstString
*/
int QRegExp::match( const QString& str, int index, int *len,
bool indexIsStart ) const
{
int pos = search( str, index, indexIsStart ? CaretAtOffset : CaretAtZero );
if ( len != 0 )
*len = matchedLength();
return pos;
}
#endif // QT_NO_COMPAT
-/*!
- \overload
-
- This convenience function searches with a \c CaretMode of \c
- CaretAtZero which is the most common usage.
-*/
-
int QRegExp::search( const QString& str, int offset ) const
{
return search( str, offset, CaretAtZero );
}
/*!
Attempts to find a match in \a str from position \a offset (0 by
default). If \a offset is -1, the search starts at the last
character; if -2, at the next to last character; etc.
Returns the position of the first match, or -1 if there was no
match.
@@ -3616,31 +3631,24 @@ int QRegExp::search( const QString& str, int offset, CaretMode caretMode ) const
if ( offset < 0 )
offset += str.length();
#ifndef QT_NO_REGEXP_CAPTURE
priv->t = str;
priv->capturedCache.clear();
#endif
priv->captured = eng->match( str, offset, priv->min, FALSE,
caretIndex(offset, caretMode) );
return priv->captured[0];
}
-/*!
- \overload
-
- This convenience function searches with a \c CaretMode of \c
- CaretAtZero which is the most common usage.
-*/
-
int QRegExp::searchRev( const QString& str, int offset ) const
{
return searchRev( str, offset, CaretAtZero );
}
/*!
Attempts to find a match backwards in \a str from position \a
offset. If \a offset is -1 (the default), the search starts at the
last character; if -2, at the next to last character; etc.
Returns the position of the first match, or -1 if there was no
match.
@@ -3685,25 +3693,25 @@ int QRegExp::searchRev( const QString& str, int offset,
/*!
Returns the length of the last matched string, or -1 if there was
no match.
\sa exactMatch() search() searchRev()
*/
int QRegExp::matchedLength() const
{
return priv->captured[1];
}
#ifndef QT_NO_REGEXP_CAPTURE
-/*!
+/*!
Returns the number of captures contained in the regular expression.
*/
int QRegExp::numCaptures() const
{
return eng->numCaptures();
}
/*!
Returns a list of the captured text strings.