initial osx portosx/0.0

Signed-off-by: Michael Krelin <hacker@klever.net>
author: Michael Krelin <hacker@klever.net> 2012-12-08 21:19:17 (UTC)
committer: Michael Krelin <hacker@klever.net> 2012-12-11 21:59:29 (UTC)
commit: 8808689fe340bec6e90ab13dd502292b0579cf1f (patch) (side-by-side diff)
tree: 45b7c863151341f687b74e40bffcbd7ab5468783 /shared-code/RegEx.cpp
parent: 6e7e413ca364d79673e523c09767c18e7cff1bec (diff)
download: pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.zip
pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.tar.gz
pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.tar.bz2
1 files changed, 0 insertions, 1697 deletions
diff --git a/shared-code/RegEx.cpp b/shared-code/RegEx.cpp
deleted file mode 100644
index b7bab62..0000000
--- a/shared-code/RegEx.cpp
+++ b/dev/null
@@ -1,1697 +0,0 @@
-#include "../stdafx.h"
-#include "RegEx.h"
-
-#define	isWordableChar(c) (isalnum(c) || (c)=='_')
-
-BOOL CRegEx::Compile(LPCTSTR regex,int flags)
-{
-	ASSERT(!((flags&regExtended) && (flags&regLiteral)));
-	m_Matches.RemoveAll();
-	m_Strip.RemoveAll();
-	m_Strip.SetSize(0,15);
-	m_Pattern=regex;
-	m_ParsePointer=0;
-	m_Error=0;
-	m_Sets.RemoveAll();
-	m_Flags=flags;
-	m_iFlags=0;
-	m_BOLs=m_EOLs=0;
-	m_Subexps = 0;
-	m_Categories=1;	// 0 is 'everything else'
-	m_bBackRefs=FALSE;
-	memset(m_Category,0,sizeof(m_Category));
-	
-	// Go ahead.
-	m_Error || m_Strip.Add(CSop(CSop::opEnd));
-	if(flags&regExtended){
-		ParseERE();
-	}else if(flags&regLiteral){
-		ParseLiteral();
-	}else{
-		ParseBRE();
-	}
-	m_Error || m_Strip.Add(CSop(CSop::opEnd));
-	Categorize();
-	m_Strip.FreeExtra();
-	FigureMust();
-	m_Pluses=CountPluses();
-	if(m_iFlags&iflagsBad){
-		m_Error || (m_Error=regeAssert);
-		// ??? point to nuls? ;-)
-	}
-	// We may wish to free some memory here if we're erroneous (ie. m_Error..)
-	m_ParseParens.RemoveAll();
-#ifdef _DEBUG
-	if(m_Error){
-	CString tmp;
-		tmp.Format("RE: ParseError: %d\n",m_Error);
-		TRACE0(tmp);
-	}
-//	DumpStrip(afxDump);
-#endif
-	return (m_bCompiled=(!m_Error));
-}
-
-BOOL CRegEx::Match(LPCTSTR src,int flags)
-{
-	if(!m_bCompiled)
-		return FALSE;
-	if(m_iFlags&iflagsBad)
-		return FALSE;
-	m_Input=src;
-	m_mFlags=flags;
-	m_mPointer=m_Input;
-	m_mBegin=m_Input;
-	m_mEnd=&m_mBegin[m_Input.GetLength()];
-	ASSERT(m_mPointer<=m_mEnd);
-	m_Matches.RemoveAll();
-	if(!m_Must.IsEmpty()){
-		if(m_Input.Find(m_Must)<0)
-			return FALSE;
-	}
-	// Go ahead..
-int stripLen = m_Strip.GetSize();
-	m_mLastPos.SetSize(0);
-	for(int tmp=0;tmp<stripLen;tmp++)
-		m_Strip[tmp].m_MatchData=0;
-LPCTSTR beginp = m_mBegin;
-LPCTSTR endp;
-	for(;;){
-		endp = MatchFast(beginp);
-		if(!endp)
-			return FALSE;
-		if((m_mFlags&regNoSubExpressions) && !m_bBackRefs)
-			break;
-		ASSERT(m_cOldP);
-		for(;;){
-			endp = MatchSlow(m_cOldP,m_mEnd,1,stripLen-1);
-			if(endp)
-				break;
-			ASSERT(m_cOldP<m_mEnd);
-			m_cOldP++;
-		}
-		if((m_mFlags&regOneMatch) && !m_bBackRefs)
-			break;
-		// Oh, his, we want the subexpression..
-		m_Matches.SetSize(m_Subexps+1);
-	LPCTSTR dp;
-		if(!m_bBackRefs && !(m_mFlags&regBackRefs)){
-			dp = MatchDissect(m_cOldP,endp,1,stripLen-1);
-		}else{
-			if(m_Pluses>0 && !m_mLastPos.GetSize())
-				m_mLastPos.SetSize(m_Pluses);
-			dp = MatchBackRef(m_cOldP,endp,1,stripLen-1,0);
-		}
-		if(dp)
-			break;
-		// Uh.. oh..  we couldn't find a subexpression-level match
-		ASSERT(m_bBackRefs);
-		ASSERT(m_Pluses==0 || m_mLastPos.GetSize());
-		for(;;){
-			if(dp || endp <= m_cOldP)
-				break;	// defeat.. ?
-			endp = MatchSlow(m_cOldP,endp-1,1,stripLen-1);
-			if(!endp)
-				break;	// defeat.. ?
-			// Try it on a shorter possibility..
-#ifdef _DEBUG
-			for(tmp=1;tmp<=m_Subexps;tmp++)
-				ASSERT(m_Matches[tmp].m_Begin<0 && m_Matches[tmp].m_End<0);
-#endif
-			dp = MatchBackRef(m_cOldP,endp,1,stripLen-1,0);
-		}
-		ASSERT((!dp) || dp==endp);
-		if(dp)	// Found a shorter one..
-			break;
-		// Despite initial appearances, there is no match here
-		beginp = m_cOldP+1;
-		ASSERT(beginp<=m_mEnd);
-	}
-	// Fill in the detail if so requested..
-	if(!(m_mFlags&regNoSubExpressions)){
-		if(!m_Matches.GetSize())
-			m_Matches.SetSize(1);
-		m_Matches[0].m_Begin=m_cOldP-m_mBegin;
-		m_Matches[0].m_End=endp-m_mBegin;
-	}
-	m_mLastPos.RemoveAll();
-	return TRUE;
-}
-
-CString CRegEx::Replace(LPCTSTR src,LPCTSTR rep,int flags)
-{
-	// ***
-	return CString();
-}
-
-void CRegEx::ParseERE(int stop)
-{
-UCHAR c;
-BOOL first=TRUE;
-int prevF, prevB;
-	for(;;){
-	int co = m_Strip.GetSize();
-		while(m_ParsePointer < m_Pattern.GetLength() && ((c=m_Pattern[m_ParsePointer])!='|') && c!=stop)
-			ParseEREexp();
-		if(m_Strip.GetSize()==co){
-			m_Error || (m_Error=regeEmpty);
-			// ??? point to nuls?
-		}
-		if(m_ParsePointer>=m_Pattern.GetLength() || m_Pattern[m_ParsePointer]!='|')
-			break;
-		else
-			m_ParsePointer++;
-		if(first){
-			StripInsert(co,CSop(CSop::opChoice0,m_Strip.GetSize()-co+1));
-			prevF = prevB = co;
-			first=FALSE;
-		}
-		m_Error || m_Strip.Add(CSop(CSop::opOr0,m_Strip.GetSize()-prevB));
-		prevB = m_Strip.GetSize()-1;
-		m_Error || (m_Strip[prevF].m_Operand=m_Strip.GetSize()-prevF);
-		prevF = m_Strip.GetSize();
-		m_Error || m_Strip.Add(CSop(CSop::opOr1,0));	// offset isn't really right.. very so..
-	}
-	if(!first){
-		m_Error || (m_Strip[prevF].m_Operand=m_Strip.GetSize()-prevF);
-		m_Error || m_Strip.Add(CSop(CSop::opChoice1,m_Strip.GetSize()-prevB));
-	}
-	ASSERT(m_ParsePointer >= m_Pattern.GetLength() || m_Pattern[m_ParsePointer]==stop);
-}
-
-void CRegEx::ParseEREexp()
-{
-	ASSERT(m_ParsePointer < m_Pattern.GetLength());
-UCHAR c = m_Pattern[m_ParsePointer++];
-int pos = m_Strip.GetSize();
-int subno;
-int count, count2;
-BOOL wascaret=FALSE;
-	switch(c){
-	case '(':
-		if(!(m_ParsePointer<m_Pattern.GetLength())){
-			TRACE0("RE: '(' at the end of the pattern\n");
-			if(!m_Error)
-				m_Error = regeParen;
-			// ??? point to nuls?
-		}
-		m_Subexps++;
-		subno=m_Subexps;
-		m_ParseParens.SetAtGrow(m_Subexps,CParenthesis(m_Strip.GetSize()));
-		m_Error || m_Strip.Add(CSop(CSop::opLeftParen,subno));
-		if(m_ParsePointer>=m_Pattern.GetLength() || m_Pattern[m_ParsePointer]!=')')
-			ParseERE(')');
-		VERIFY(m_ParseParens[m_Subexps].m_End = m_Strip.GetSize());
-		m_Error || m_Strip.Add(CSop(CSop::opRightParen,subno));
-		if(m_ParsePointer >= m_Pattern.GetLength() || m_Pattern[m_ParsePointer++]!=')'){
-			TRACE0("RE: No matching ')'\n");
-			if(!m_Error)
-				m_Error = regeParen;
-			// ??? point to nuls?
-		}
-		break;
-	case '^':
-		m_Error || m_Strip.Add(CSop(CSop::opBOL));
-		m_iFlags|=iflagsUseBOL;
-		m_BOLs++;
-		wascaret=TRUE;
-		break;
-	case '$':
-		m_Error || m_Strip.Add(CSop(CSop::opEOL));
-		m_iFlags|=iflagsUseEOL;
-		m_EOLs++;
-		break;
-	case '|':
-		TRACE0("RE: '|' outside of expression\n");
-		if(!m_Error)
-			m_Error = regeEmpty;
-		// ??? point to nuls?
-		break;
-	case '*':
-	case '+':
-	case '?':
-		TRACE0("RE: '*'/'+'/'?' with no previous expression\n");
-		if(!m_Error)
-			m_Error = regeBadRepeat;
-		// ??? point to nuls?
-		break;
-	case '.':
-		if(m_Flags&regNewLine)
-			EmitNonNewLineAny();
-		else
-			m_Error || m_Strip.Add(CSop(CSop::opAny));
-		break;
-	case '[':
-		ParseBracket();
-		break;
-	case '\\':
-		if(m_ParsePointer >= m_Pattern.GetLength()){
-			TRACE0("RE: '\\' at the end of the pattern\n");
-			if(!m_Error)
-				m_Error = regeEscape;
-			// ??? point to nuls?
-		}else{
-			c = m_Pattern[m_ParsePointer++];
-			EmitOrdinary(c);
-		}
-		break;
-	case '{':
-		if(m_ParsePointer >= m_Pattern.GetLength() || !isdigit(m_Pattern[m_ParsePointer])){
-			TRACE0("RE: '{' with no repeat count\n");
-			if(!m_Error)
-				m_Error = regeBadRepeat;
-			// ??? point to nuls?
-		}
-		// Fallthrough..
-	default:
-		EmitOrdinary(c);
-		break;
-	}
-	if(m_ParsePointer >= m_Pattern.GetLength())
-		return;
-	c = m_Pattern[m_ParsePointer];
-	// Call a '{' repetition if followed by a digit
-	if (!(c=='*' || c=='+' || c=='?' || ( c=='{' && (m_ParsePointer+1) < m_Pattern.GetLength() && isdigit(m_Pattern[m_ParsePointer+1])) ))
-		return; // No repetitor - done.
-	m_ParsePointer++;
-	if(wascaret){
-		TRACE0("RE: repetitive '^' detected\n");
-		if(!m_Error)
-			m_Error = regeBadRepeat;
-		// ??? point to nuls?
-	}
-	switch(c){
-	case '*':	// Implemeted as +?
-		// + expression
-		StripInsert(pos,CSop(CSop::opPlus0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opPlus1,m_Strip.GetSize()-pos));
-		// ? expression
-		StripInsert(pos,CSop(CSop::opQuest0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opQuest1,m_Strip.GetSize()-pos));
-		break;
-	case '+':
-		// + expression
-		StripInsert(pos,CSop(CSop::opPlus0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opPlus1,m_Strip.GetSize()-pos));
-		break;
-	case '?':
-		// Kludge - emit y? as (y|) until subtle bug gets fixed :-)
-		StripInsert(pos,CSop(CSop::opChoice0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opOr0,m_Strip.GetSize()-pos));
-		m_Error || (m_Strip[pos].m_Operand=m_Strip.GetSize()-pos);
-		m_Error || m_Strip.Add(CSop(CSop::opOr1,1));
-		m_Error || m_Strip.Add(CSop(CSop::opChoice1,2));
-		break;
-	case '{':
-		count = ParseCount();
-		if(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]==','){
-			m_ParsePointer++;
-			if(isdigit(m_Pattern[m_ParsePointer])){ // HHH Personally, I doubt it is always available
-				count2=ParseCount();
-				if(!(count<=count2)){
-					TRACE0("RE: Disbalanced counts in '{}' repeater\n");
-					m_Error || (m_Error=regeBadBrace);
-					// ??? point to nuls?
-				}
-			}else // Single number with comma
-				count2=256; // Infinity
-		}else // Single number
-			count2=count;
-		EmitRepeat(pos,count,count2);
-		if(m_ParsePointer >= m_Pattern.GetLength() || m_Pattern[m_ParsePointer]!='}'){
-			// No '}'..
-			TRACE0("RE: No immediately following '}' of '{' expression\n");
-			while(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]!='}')
-				m_ParsePointer++;
-			if(m_ParsePointer >= m_Pattern.GetLength()){
-				TRACE0("RE: No closing '}' found\n");
-				m_Error || (m_Error=regeBrace);
-			}else
-				m_Error || (m_Error=regeBadBrace);
-			// ??? point to nuls?
-		}else
-			m_ParsePointer++;
-		break;
-	}
-	if(m_ParsePointer >= m_Pattern.GetLength())
-		return;
-	c = m_Pattern[m_ParsePointer];
-	if(!(c=='*' || c=='+' || c=='?' || (c=='{' && (m_ParsePointer+1)<m_Pattern.GetLength() && isdigit(m_Pattern[m_ParsePointer+1]))))
-		return;
-	TRACE0("RE: Double repeater\n");
-	m_Error || (m_Error=regeBadRepeat);
-	// ??? point to nuls?
-}
-
-void CRegEx::StripInsert(int pos,CSop& sop)
-{
-	if(m_Error)
-		return;
-int sn = m_Strip.GetSize();
-	m_Strip.InsertAt(pos,sop);
-	for(int tmp=1;tmp<m_ParseParens.GetSize();tmp++){
-		if(m_ParseParens[tmp].m_Begin>=pos)
-			m_ParseParens[tmp].m_Begin++;
-		if(m_ParseParens[tmp].m_End>=pos)
-			m_ParseParens[tmp].m_End++;
-	}
-}
-
-void CRegEx::EmitOrdinary(UCHAR c)
-{
-	if(m_Flags&regIgnoreCase && isalpha(c) && (tolower(c) !=toupper(c))){
-		// Emit both cases
-	CString savePattern = m_Pattern;
-	int savePointer = m_ParsePointer;
-		m_Pattern=c;
-		m_Pattern+=']';
-		m_ParsePointer=0;
-		ParseBracket();
-		m_Pattern=savePattern;
-		m_ParsePointer=savePointer;
-	}else{
-		m_Error || m_Strip.Add(CSop(CSop::opChar,c));
-		if(!m_Category[(BYTE)c])
-			m_Category[(BYTE)c]=m_Categories++;
-	}
-}
-
-void CRegEx::EmitNonNewLineAny()
-{
-	// Kludges're going on and on..
-CString savePattern = m_Pattern;
-int savePointer = m_ParsePointer;
-	m_Pattern="^\n]";
-	m_ParsePointer=0;
-	ParseBracket();
-	m_Pattern=savePattern;
-	m_ParsePointer=savePointer;
-}
-
-int CRegEx::ParseCount()
-{
-BOOL nonEmpty=FALSE;
-int rv = 0;
-UCHAR c;
-	while(m_ParsePointer < m_Pattern.GetLength() && isdigit(c=m_Pattern[m_ParsePointer]) && rv <=255){
-		rv = rv*10 + c-'0';
-		nonEmpty=TRUE;
-		m_ParsePointer++;
-	}
-	if(rv>255 || !nonEmpty){
-		m_Error || (m_Error=regeBadBrace);
-		// ??? point to nuls?
-	}
-	return rv;
-}
-
-void CRegEx::ParseBracket()
-{
-	// Dept. of truly sickening special case kludges
-	if((m_ParsePointer+5) < m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,6).Compare("[:<]]")){
-		m_Error || m_Strip.Add(CSop(CSop::opBOW));
-		m_ParsePointer+=6;
-		return;
-	}
-	if((m_ParsePointer+5) < m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,6).Compare("[:>]]")){
-		m_Error || m_Strip.Add(CSop(CSop::opEOW));
-		m_ParsePointer+=6;
-		return;
-	}
-BOOL invert=TRUE;
-	if(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='^')
-		m_ParsePointer++;
-	else
-		invert=FALSE;
-CSet cset;
-	if(m_ParsePointer < m_Pattern.GetLength()){
-		switch(m_Pattern[m_ParsePointer]){
-		case ']':
-		case '-':
-			cset.Add(m_Pattern[m_ParsePointer]);
-			m_ParsePointer++;
-			break;
-		}
-	}
-	while(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]!=']' && !((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("-]")))
-		ParseBracketTerm(cset);
-	if(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='-'){
-		m_ParsePointer++;
-		cset.Add('-');
-	}
-	if(m_ParsePointer < m_Pattern.GetLength() && m_Pattern[m_ParsePointer]==']')
-		m_ParsePointer++;
-	else{
-		m_Error || (m_Error=regeBracket);
-		// ???  point to nuls?
-		return;
-	}
-	if(m_Flags&regIgnoreCase){
-		for(int tmp=CSet::size-1;tmp>=0;tmp--){
-			if(cset.IsIn(tmp) && isalpha(tmp) && (toupper(tmp)!=tolower(tmp)))
-				cset.Add(isupper(tmp)?tolower(tmp):toupper(tmp));
-		}
-		/*
-		if(!cset->m_Multis.IsEmpty())
-			cset.CollatingCase();
-		 */
-	}
-	if(invert){
-		for(int tmp=CSet::size-1;tmp>=0;tmp--)
-			if(cset.IsIn(tmp))
-				cset.Sub(tmp);
-			else
-				cset.Add(tmp);
-		if(m_Flags&regNewLine)
-			cset.Sub('\n');
-		/*
-		if(!cset.m_Multis.IsEmpty())
-			cset.CollatingInvert();
-		*/
-	}
-UCHAR c = cset.GetOnly();
-	if(c){
-		EmitOrdinary(c);
-	}else
-		m_Error || m_Strip.Add(CSop(CSop::opAnyOf,StoreSet(cset)));
-}
-
-void CRegEx::CSet::Add(UCHAR c)
-{
-	m_Set[(BYTE)c]=TRUE;
-	m_Hash+=c;
-}
-
-BOOL CRegEx::CSet::IsIn(UCHAR c)
-{
-	return m_Set[(BYTE)c];
-}
-
-void CRegEx::CSet::Sub(UCHAR c)
-{
-	m_Set[(BYTE)c]=FALSE;
-	m_Hash-=c;
-}
-
-UCHAR CRegEx::CSet::GetOnly()
-{
-int rv = 0;
-UCHAR only = 0;
-	for(int tmp=0;tmp<size;tmp++){
-		rv+=m_Set[tmp]?(only=tmp,1):0;
-	}
-	return (rv==1)?only:0;
-}
-
-int CRegEx::StoreSet(CSet& cset)
-{
-	for(int tmp=0;tmp<m_Sets.GetSize();tmp++)
-		if(m_Sets[tmp]==cset)
-			return tmp;
-	return m_Sets.Add(cset);
-}
-
-void CRegEx::ParseBracketTerm(CSet& cset)
-{
-UCHAR c;
-	switch((m_ParsePointer<m_Pattern.GetLength())?m_Pattern[m_ParsePointer]:0){
-	case '[':
-		c = ((m_ParsePointer+1)<m_Pattern.GetLength())?m_Pattern[m_ParsePointer+1]:0;
-		break;
-	case '-':
-		m_Error || (m_Error=regeRange);
-		// ??? point to nuls?
-		return;
-	default:
-		c = 0;
-		break;
-	}
-	switch(c){
-	case ':':	// Character class
-		m_ParsePointer+=2;
-		if(m_ParsePointer>=m_Pattern.GetLength()){
-			m_Error || (m_Error=regeBracket);
-			// ??? point to nuls?
-		}
-		c = m_Pattern[m_ParsePointer];
-		if(c== '-' || c==']'){
-			m_Error || (m_Error=regeCType);
-			// ??? point to nuls?
-		}
-		ParseBracketCClass(cset);
-		if(m_ParsePointer>=m_Pattern.GetLength()){
-			m_Error || (m_Error=regeBracket);
-			// ??? point to nuls?
-		}
-		if((m_ParsePointer+1)>=m_Pattern.GetLength() || (m_Pattern.Mid(m_ParsePointer,2).Compare(":]"))){
-			m_Error || (m_Error=regeCType);
-			// ??? point to nuls?
-		}else
-			m_ParsePointer+=2;
-		break;
-	case '=':	// Equivalence class
-		m_ParsePointer+=2;
-		if(m_ParsePointer >= m_Pattern.GetLength()){
-			m_Error || (m_Error=regeBracket);
-			// ??? point to nuls?
-		}
-		c = m_Pattern[m_ParsePointer];
-		if(c== '-' || c==']'){
-			m_Error || (m_Error=regeCollate);
-			// ??? point to nuls?
-		}
-		ParseBracketEClass(cset);
-		if((m_ParsePointer+1)>=m_Pattern.GetLength() || (m_Pattern.Mid(m_ParsePointer,2).Compare("=]"))){
-			m_Error || (m_Error=regeCollate);
-			// ??? point to nuls?
-		}else
-			m_ParsePointer+=2;
-		break;
-	default:	// Symbol, character or range
-		{
-		UCHAR start, finish;
-			start =  ParseBracketSymbol();
-			if((m_ParsePointer<m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='-') /*&& (m_ParsePointer+1)<m_Pattern.GetLength() && m_Pattern[m_ParsePointer+1]==']'*/){
-				// I believe the expression above is seetwo..
-				// range.
-				m_ParsePointer++;
-				if(m_ParsePointer<m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='-'){
-					m_ParsePointer++;
-					finish='-';
-				}else
-					finish=ParseBracketSymbol();
-			}else
-				finish=start;
-			if(((BYTE)start)>((BYTE)finish)){
-				m_Error || (m_Error=regeRange);
-				// ??? point to nuls?
-			}
-			for(int tmp=start;tmp<=(BYTE)finish;tmp++)
-				cset.Add(tmp);
-		}
-		break;
-	}
-}
-
-void CRegEx::ParseBracketCClass(CSet& cset)
-{
-static struct	{
-	char *className;
-	char *classChars;
-}	cc[] = {
-	{"alnum","ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"},
-	{"alpha","ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"},
-	{"blank"," \t"},
-	{"cntrl","\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177"},
-	{"digit","0123456789"},
-	{"graph","ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"},
-	{"lower","abcdefghijklmnopqrstuvwxyz"},
-	{"print","ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ "},
-	{"punct","!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"},
-	{"space","\t\n\v\f\r "},
-	{"upper","ABCDEFGHIJKLMNOPQRSTUVWXYZ"},
-	{"xdigit","0123456789ABCDEFabcdef"}
-};
-CString cclass;
-UCHAR c;
-	while(m_ParsePointer < m_Pattern.GetLength() && isalpha(c=m_Pattern[m_ParsePointer])){
-		cclass+=c;
-		m_ParsePointer++;
-	}
-char *classChars = NULL;
-	for(int tmp=0;tmp<(sizeof(cc)/sizeof(cc[0]));tmp++){
-		if(!cclass.CompareNoCase(cc[tmp].className)){
-			classChars=cc[tmp].classChars;
-			break;
-		}
-	}
-	if(!classChars){
-		m_Error || (m_Error=regeCType);
-		// ??? point to nuls?
-		return;
-	}
-	while(*classChars)
-		cset.Add(*(classChars++));
-	// --- multis
-}
-
-void CRegEx::ParseBracketEClass(CSet& cset)
-{
-	cset.Add(ParseBracketCollatingElement('='));;
-}
-
-UCHAR CRegEx::ParseBracketCollatingElement(UCHAR term)
-{
-static struct	{
-	char *entityName;
-	char entity;
-}	cc[] = { {"NUL",'\0'},{"SOH",'\001'},{"STX",'\002'},{"ETX",'\003'},{"EOT",'\004'},{"ENQ",'\005'},{"ACK",'\006'},{"BEL",'\007'},{"alert",'\007'},{"BS",'\010'},{"backspace",'\b'},{"HT",'\011'},{"tab",'\t'},{"LF",'\012'},{"newline",'\n'},{"VT",'\013'},{"vertical-tab",'\v'},{"FF",'\014'},{"form-feed",'\f'},{"CR",'\015'},{"carriage-return",'\r'},{"SO",'\016'},{"SI",'\017'},{"DLE",'\020'},{"DC1",'\021'},{"DC2",'\022'},{"DC3",'\023'},{"DC4",'\024'},{"NAK",'\025'},{"SYN",'\026'},{"ETB",'\027'},{"CAN",'\030'},{"EM",'\031'},{"SUB",'\032'},{"ESC",'\033'},{"IS4",'\034'},{"FS",'\034'},{"IS3",'\035'},{"GS",'\035'},{"IS2",'\036'},{"RS",'\036'},{"IS1",'\037'},{"US",'\037'},{"space",' '},{"exclamation-mark",'!'},{"quotation-mark",'"'},{"number-sign",'#'},{"dollar-sign",'$'},{"percent-sign",'%'},{"ampersand",'&'},{"apostrophe",'\''},{"left-parenthesis",'('},{"right-parenthesis",')'},{"asterisk",'*'},{"plus-sign",'+'},{"comma",','},{"hyphen",'-'},{"hyphen-minus",'-'},{"period",'.'},{"full-stop",'.'},{"slash",'/'},{"solidus",'/'},{"zero",'0'},{"one",'1'},{"two",'2'},{"three",'3'},{"four",'4'},{"five",'5'},{"six",'6'},{"seven",'7'},{"eight",'8'},{"nine",'9'},{"colon",':'},{"semicolon",';'},{"less-than-sign",'<'},{"equals-sign",'='},{"greater-than-sign",'>'},{"question-mark",'?'},{"commercial-at",'@'},{"left-square-bracket",'['},{"backslash",'\\'},{"reverse-solidus",'\\'},{"right-square-bracket",']'},{"circumflex",'^'},{"circumflex-accent",'^'},{"underscore",'_'},{"low-line",'_'},{"grave-accent",'`'},{"left-brace",'{'},{"left-curly-bracket",'{'},{"vertical-line",'|'},{"right-brace",'}'},{"right-curly-bracket",'}'},{"tilde",'~'},{"DEL",'\177'} };
-CString seeTwo;
-	seeTwo=term;
-	seeTwo+=']';
-CString entityName;
-	while(m_ParsePointer<m_Pattern.GetLength() && !((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare(seeTwo)))
-		entityName+=m_Pattern[m_ParsePointer++];
-	if(m_ParsePointer>=m_Pattern.GetLength()){
-		m_Error || (m_Error=regeBracket);
-		// ??? point to nuls?
-		return 0;
-	}
-	for(int tmp=0;tmp<(sizeof(cc)/sizeof(cc[0]));tmp++)
-		if(!entityName.CompareNoCase(cc[tmp].entityName))
-			return cc[tmp].entity;
-	if(entityName.GetLength()==1)
-		return entityName[0];
-	m_Error || (m_Error=regeCollate);
-	// ??? point to nuls?
-	return 0;
-}
-
-UCHAR CRegEx::ParseBracketSymbol()
-{
-	if(m_ParsePointer>=m_Pattern.GetLength()){
-		m_Error || (m_Error=regeBracket);
-		// ??? point to nuls?
-	}
-	if((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("[."))
-		m_ParsePointer+=2;
-	else
-		return m_Pattern[m_ParsePointer++];
-	// Collating symbol
-UCHAR rv = ParseBracketCollatingElement('.');
-	if((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("[."))
-		m_ParsePointer+=2;
-	else{
-		m_Error || (m_Error=regeCollate);
-		// ??? point to nuls?
-	}
-	return rv;
-}
-
-void CRegEx::EmitRepeat(int pos,int from,int to)
-{
-	if(m_Error)
-		return;
-	ASSERT(from<=to);
-int finish = m_Strip.GetSize();
-int copy;
-#define	N 2
-#define INF 3
-#define REP(f,t) ((f)*8+(t))
-#define MAP(n) (((n)<=1)?(n):((n)==256)?INF:N)
-	switch(REP(MAP(from),MAP(to))){
-	case REP(0,0):	// must be user doing ths??
-		m_Strip.SetSize(pos);
-		break;
-	case REP(0,1):	// as in '?'
-	case REP(0,N):	// as in '{1,n}?'
-	case REP(0,INF):	// as in '{1,}?'
-		// Kludge - emit y? as (y|) until something gets fixed..
-		StripInsert(pos,CSop(CSop::opChoice0,pos));
-		EmitRepeat(pos+1,1,to);
-		m_Error || m_Strip.Add(CSop(CSop::opOr0,m_Strip.GetSize()-pos));
-		m_Error || (m_Strip[pos].m_Operand=m_Strip.GetSize()-pos);
-		m_Error || m_Strip.Add(CSop(CSop::opOr1,1));
-		m_Error || m_Strip.Add(CSop(CSop::opChoice1,2));
-		break;
-	case REP(1,1):
-		break;
-	case REP(1,N):	// as in 'x?x{1,n-1}'
-		// Kludge again..
-		StripInsert(pos,CSop(CSop::opChoice0,pos));
-		m_Error || m_Strip.Add(CSop(CSop::opOr0,m_Strip.GetSize()-pos));
-		m_Error || (m_Strip[pos].m_Operand=m_Strip.GetSize()-pos);
-		m_Error || m_Strip.Add(CSop(CSop::opOr1,1));
-		m_Error || m_Strip.Add(CSop(CSop::opChoice1,2));
-		copy = StripDuplicate(pos+1,finish+1);
-		ASSERT(copy==(finish+4));
-		EmitRepeat(copy,1,to-1);
-		break;
-	case REP(1,INF): // as in '+'
-		StripInsert(pos,CSop(CSop::opPlus0,pos));
-		m_Error || m_Strip.Add(CSop(CSop::opPlus1,m_Strip.GetSize()-pos));
-		break;
-	case REP(N,N):	// as in 'xx{from-1,to-1}'
-		copy = StripDuplicate(pos,finish);
-		EmitRepeat(copy,from-1,to-1);
-		break;
-	case REP(N,INF): // as in 'xx{n-1,}'
-		copy = StripDuplicate(pos,finish);
-		EmitRepeat(copy,from-1,to);
-		break;
-#ifndef NDEBUG
-	default:
-		ASSERT(FALSE);
-		break;
-#endif
-	}
-#undef MAP
-#undef REP
-#undef INF
-#undef N
-}
-
-int CRegEx::StripDuplicate(int from,int to)
-{
-int rv = m_Strip.GetSize();
-	ASSERT(from<=to);
-	if(from==to)
-		return rv;
-	//  Maybe should be optimized for copying the whole thing.
-	for(int tmp=from;tmp<to;tmp++)
-		m_Strip.Add(m_Strip[tmp]);
-	return rv;
-}
-
-void CRegEx::Categorize()
-{
-	if(m_Error)
-		return;
-	for(int tmp=0;tmp<(sizeof(m_Category)/sizeof(m_Category[0]));tmp++)
-		if((!m_Category[tmp]) && IsInSets(tmp)){
-		int cat = m_Categories++;
-			m_Category[tmp]=cat;
-			for(int c=tmp+1;c<(sizeof(m_Category)/sizeof(m_Category[0]));c++)
-				if((!m_Category[c]) && IsInSameSets(tmp,c))
-					m_Category[c]=cat;
-		}
-}
-
-BOOL CRegEx::IsInSets(UCHAR c)
-{
-	for(int tmp=0;tmp<m_Sets.GetSize();tmp++)
-		if(m_Sets[tmp].IsIn(c))
-			return TRUE;
-	return FALSE;
-}
-
-BOOL CRegEx::IsInSameSets(UCHAR c1,UCHAR c2)
-{
-	for(int tmp=0;tmp<m_Sets.GetSize();tmp++)
-		if(m_Sets[tmp].IsIn(c1)!=m_Sets[tmp].IsIn(c2))
-			return FALSE;
-	return TRUE;
-}
-
-void CRegEx::FigureMust()
-{
-	if(m_Error)
-		return;
-	m_Must.Empty();
-int stripLen = m_Strip.GetSize();
-int seqStart, seqLength = 0;
-int mustStart, mustLength = 0;
-	for(int tmp=1;tmp<stripLen;tmp++){
-		switch(m_Strip[tmp].m_Operator){
-		case CSop::opChar:
-			if(!seqLength)
-				seqStart=tmp;
-			seqLength++;
-			break;
-		case CSop::opPlus0:
-		case CSop::opLeftParen:
-		case CSop::opRightParen:
-			break;	// Break, since they don't break the sequence
-		case CSop::opQuest0:
-		case CSop::opChoice0:
-			// These ones we skip.
-			do{
-				tmp+=m_Strip[tmp].m_Operand;
-				// I still think it could be ASSERTed..
-				if(m_Strip[tmp].m_Operator!=CSop::opQuest1 && m_Strip[tmp].m_Operator!=CSop::opChoice1 && m_Strip[tmp].m_Operator!=CSop::opOr1){
-					m_iFlags|=iflagsBad;
-					return;
-				}
-			}while(m_Strip[tmp].m_Operator!=CSop::opQuest1 && m_Strip[tmp].m_Operator!=CSop::opChoice1);
-			// Fallthrough..
-		default:
-			// End of sequence
-			if(seqLength>mustLength){
-				mustStart=seqStart;
-				mustLength=seqLength;
-			}
-			seqLength=0;
-			break;
-		}
-	}	// Hmm.. originally it's meant to be do while not opEnd.. 
-	if(!mustLength)
-		return;
-	// Turn into string, but, wait, personally I'm sure it could be put in the main loop.. or maybe not..
-	for(tmp=0;tmp<seqLength;tmp++){
-		while(m_Strip[seqStart+tmp].m_Operator!=CSop::opChar)
-			ASSERT(tmp<seqLength);
-		m_Must+=m_Strip[tmp].m_Operand;
-	}
-}
-
-int CRegEx::CountPluses()
-{
-	if(m_Error)
-		return 0;
-int stripLen = m_Strip.GetSize();
-int rv = 0;
-int nest = 0;
-	for(int tmp=0;tmp<stripLen;tmp++){
-		switch(m_Strip[tmp].m_Operator){
-		case CSop::opPlus0:
-			nest++;
-			break;
-		case CSop::opPlus1:
-			if(nest>rv)
-				rv = nest;
-			nest--;
-			break;
-		}
-	}	// Again, originally we were supposed to scan till opEnd..
-	if(nest)
-		m_iFlags|=iflagsBad;	// Could this be an ASSERTion?
-	return rv;
-}
-
-void CRegEx::ParseLiteral()
-{
-	if(!m_Pattern.GetLength()){
-		m_Error || (m_Error=regeEmpty);
-		// ??? point to nuls?
-	}
-	while(m_ParsePointer < m_Pattern.GetLength())
-		EmitOrdinary(m_Pattern[m_ParsePointer++]);
-}
-
-void CRegEx::ParseBRE(int stopa,int stopb)
-{
-int start = m_Strip.GetSize();
-BOOL first=TRUE;
-BOOL wasDollar=FALSE;
-	if(m_ParsePointer<m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='^'){
-		m_ParsePointer++;
-		m_Error || m_Strip.Add(CSop(CSop::opBOL,0));
-		m_iFlags|=iflagsUseBOL;
-		m_BOLs++;
-	}
-CString stopstr;
-	if(stopa){
-		stopstr+=stopa;
-		if(stopb)
-			stopstr+=stopb;
-	}
-	while(m_ParsePointer < m_Pattern.GetLength() && !((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare(stopstr))){
-		wasDollar = ParseBREexp(first);
-		first=FALSE;
-	}
-	if(wasDollar){	// Trailing anchor that was..
-		m_Strip.SetSize(m_Strip.GetSize()-1);
-		m_Error || m_Strip.Add(CSop(CSop::opEOL,0));
-		m_iFlags|=iflagsUseEOL;
-		m_EOLs++;
-	}
-	if(m_Strip.GetSize()==start){
-		m_Error || (m_Error=regeEmpty);
-		// ??? point to nuls?
-	}
-}
-
-BOOL CRegEx::ParseBREexp(BOOL ordinaryStar)
-{
-int subno;
-int pos = m_Strip.GetSize();
-	ASSERT(m_ParsePointer<m_Pattern.GetLength());
-int c = m_Pattern[m_ParsePointer++];
-	if(c=='\\'){
-		if(m_ParsePointer>=m_Pattern.GetLength()){
-			m_Error || (m_Error=regeEscape);
-			// ??? point to nuls
-		}else
-			c = 0x100|m_Pattern[m_ParsePointer++];
-	}
-	switch(c){
-	case '.':
-		if(m_Flags&regNewLine)
-			EmitNonNewLineAny();
-		else
-			m_Error || m_Strip.Add(CSop(CSop::opAny,0));
-		break;
-	case '[':
-		ParseBracket();
-		break;
-	case 0x100|'{':
-		m_Error || (m_Error=regeBadRepeat);
-		// ??? point to nuls?
-		break;
-	case 0x100|'(':
-		m_Subexps++;
-		subno=m_Subexps;
-		m_ParseParens.SetAtGrow(m_Subexps,CParenthesis(m_Strip.GetSize()));
-		m_Error || m_Strip.Add(CSop(CSop::opLeftParen,subno));
-		if(m_ParsePointer<m_Pattern.GetLength() && !((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("\\)")))
-			ParseBRE('\\',')');
-		VERIFY(m_ParseParens[m_Subexps].m_End = m_Strip.GetSize());
-		m_Error || m_Strip.Add(CSop(CSop::opRightParen,subno));
-		if((m_ParsePointer+1) < m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("\\)"))
-			m_ParsePointer+=2;
-		else{
-			m_Error || (m_Error=regeParen);
-			// ??? point to nuls?
-		}
-		break;
-	case 0x100|')':
-	case 0x100|'}':
-		// Can this possibly happen?!
-		m_Error || (m_Error=regeParen);
-		// ??? point to nuls?
-		break;
-	case 0x100|'1':
-	case 0x100|'2':
-	case 0x100|'3':
-	case 0x100|'4':
-	case 0x100|'5':
-	case 0x100|'6':
-	case 0x100|'7':
-	case 0x100|'8':
-	case 0x100|'9':
-		{
-		int i = (c&0xFF)-'0';
-			if(i < m_ParseParens.GetSize() && m_ParseParens[i].m_End){
-				m_Error || m_Strip.Add(CSop(CSop::opBackRef0,i));
-				ASSERT(m_ParseParens[i].m_Begin);
-				ASSERT(m_Strip[m_ParseParens[i].m_Begin].m_Operator==CSop::opLeftParen);
-				ASSERT(m_Strip[m_ParseParens[i].m_End].m_Operator==CSop::opRightParen);
-				StripDuplicate(m_ParseParens[i].m_Begin+1,m_ParseParens[i].m_End);
-				m_Error || m_Strip.Add(CSop(CSop::opBackRef1,i));
-			}else{
-				m_Error || (m_Error=regeSubReg);
-				// ??? point to nuls?
-			}
-			m_bBackRefs=TRUE;
-		}
-		break;
-	case '*':
-		if(!ordinaryStar){
-			m_Error || (m_Error=regeBadRepeat);
-			// ??? point to nuls?
-		}
-		// Fallthrough..
-	default:
-		EmitOrdinary(c&0xFF);
-		break;
-	}
-	if(m_ParsePointer<m_Pattern.GetLength() && m_Pattern[m_ParsePointer]=='*'){
-		m_ParsePointer++;
-		// as in '+?'
-		StripInsert(pos,CSop(CSop::opPlus0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opPlus1,m_Strip.GetSize()-pos));
-		StripInsert(pos,CSop(CSop::opQuest0,m_Strip.GetSize()-pos+1));
-		m_Error || m_Strip.Add(CSop(CSop::opQuest1,m_Strip.GetSize()-pos));
-	}else if ((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("\\{")){
-		m_ParsePointer+=2;
-	int count = ParseCount();
-	int count2;
-		if(m_ParsePointer<m_Pattern.GetLength() && m_Pattern[m_ParsePointer]==','){
-			m_ParsePointer++;
-			if(m_ParsePointer<m_Pattern.GetLength() && isdigit(m_Pattern[m_ParsePointer])){
-				count2=ParseCount();
-				if(count>count2){
-					m_Error || (m_Error=regeBadBrace);
-					// ??? poin to nuls?
-				}
-			}else // Single number with comma
-				count2=256;
-		}else // Single number
-			count2=count;
-		EmitRepeat(pos,count,count2);
-		if((m_ParsePointer+1)>=m_Pattern.GetLength() || m_Pattern.Mid(m_ParsePointer,2).Compare("\\}")){
-			while(m_ParsePointer<m_Pattern.GetLength() && !((m_ParsePointer+1)<m_Pattern.GetLength() && !m_Pattern.Mid(m_ParsePointer,2).Compare("\\}")))
-				m_ParsePointer++;
-			if(m_ParsePointer>=m_Pattern.GetLength()){
-				m_Error || (m_Error=regeBrace);
-				// ??? point to nuls?
-			}
-			m_Error || (m_Error=regeBadBrace);
-		}else
-			m_ParsePointer+=2;
-	}else if(c=='$')
-		return TRUE;
-	return FALSE;
-}
-
-CRegEx::CRegEx()
-{
-	m_bCompiled=FALSE;
-}
-
-LPCTSTR CRegEx::MatchFast(LPCTSTR begin)
-{
-	MatchStatesClear(CSop::stCurrent);
-	m_Strip[1].m_MatchData|=CSop::stCurrent;
-int stripLen = m_Strip.GetSize();
-	MatchStep(1,stripLen-1,CSop::stCurrent,charNothing,CSop::stCurrent);
-	MatchStatesCopy(CSop::stFresh,CSop::stCurrent);
-LPCTSTR coldp = NULL;
-LPCTSTR p = begin;
-int c = (begin==m_mBegin)?charOut:((int)(BYTE)m_mPointer[-1]);
-	for(;;){
-		// next character..
-	int lastc = c;
-		c = (p==m_mEnd)?charOut:(int)*(BYTE*)p;
-		if(MatchStatesEqual(CSop::stCurrent,CSop::stFresh))
-			coldp=p;
-		// Is there an EOL and/or BOL between lastc and c? - they ask..
-	int	flagc=0;
-	int	i = 0;
-		if((lastc=='\n' && m_Flags&regNewLine) || (lastc==charOut && !(m_mFlags&regNotBOL))){
-			flagc=charBOL;
-			i=m_BOLs;
-		}
-		if((c=='\n' && m_Flags&regNewLine) || (c==charOut && !(m_mFlags&regNotEOL))){
-			flagc=(flagc==charBOL)?charBOLEOL:charEOL;
-			i+=m_EOLs;
-		}
-		if(i){
-			for(;i>0;i--)
-				MatchStep(1,stripLen-1,CSop::stCurrent,flagc,CSop::stCurrent);
-		}
-		// What about a word boundary? - they wonder..
-		if((flagc==charBOL || (lastc!=charOut && !isWordableChar(c))) && (c!=charOut && isWordableChar(c)))
-			flagc = charBOW;
-		if((lastc!=charOut && isWordableChar(lastc)) && (flagc==charEOL || (c!=charOut && !isWordableChar(c))))
-			flagc = charEOW;
-		if(flagc==charBOW || flagc==charEOW){
-			MatchStep(1,stripLen-1,CSop::stCurrent,flagc,CSop::stCurrent);
-		}
-		// Are we done? Now WE wonder..
-		if((m_Strip[stripLen-1].m_MatchData&CSop::stCurrent) || p==m_mEnd)
-			break;	// They insist I need to note break out.. Okay, I do..
-		// Nope, we're not done. We have to face this character..
-		MatchStatesCopy(CSop::stTemp,CSop::stCurrent);
-		MatchStatesCopy(CSop::stCurrent,CSop::stFresh);
-		ASSERT(c!=charOut);
-		MatchStep(1,stripLen-1,CSop::stTemp,c,CSop::stCurrent);
-		p++;
-	}
-	ASSERT(coldp);
-	m_cOldP=coldp;	// *** I believe this variable can be changed 'in-place'
-	if(m_Strip[stripLen-1].m_MatchData&CSop::stCurrent)
-		return &p[1];
-	else
-		return NULL;
-}
-
-void CRegEx::MatchStatesClear(BYTE mask)
-{
-int stripLen = m_Strip.GetSize();
-	for(int tmp=0;tmp<stripLen;tmp++)
-		m_Strip[tmp].m_MatchData&=~mask;
-}
-
-void CRegEx::MatchStep(int from,int to,BYTE maskBefore,int charCode,BYTE maskAfter)
-{
-BOOL i;
-int look;
-int here = from;
-	for(int pc=from;pc!=to;pc++,here++){
-	CSop s=m_Strip[pc];
-		switch(s.m_Operator){
-		case CSop::opEnd:
-			ASSERT(pc==(to-1));
-			break;
-		case CSop::opChar:
-			// Only characters can match..
-			ASSERT((charCode<charOut) || charCode!=s.m_Operand);
-			if(charCode==s.m_Operand)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opBOL:
-			if(charCode==charBOL || charCode==charBOLEOL)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opEOL:
-			if(charCode==charEOL || charCode==charBOLEOL)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opBOW:
-			if(charCode==charBOW)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opEOW:
-			if(charCode==charEOW)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opAny:
-			if(charCode<charOut)
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opAnyOf:
-			if(charCode<charOut && m_Sets[s.m_Operand].m_Set[charCode])
-				m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskBefore)?maskAfter:0;
-			break;
-		case CSop::opBackRef0:	// Ignored here..
-		case CSop::opBackRef1:
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opPlus0:
-			// Forward, this is just an empty, comment says..
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opPlus1:
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			i =(m_Strip[here-s.m_Operand].m_MatchData&maskAfter)!=0;
-			m_Strip[here-s.m_Operand].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			if(!i && (m_Strip[here-s.m_Operand].m_MatchData&maskAfter)){
-				// oho, must reconsider loop body, comment says.. what's so 'oho' about it?
-				pc-=s.m_Operand+1;
-				here=pc;
-			}
-			break;
-		case CSop::opQuest0:
-			// two branches, both forward..
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			m_Strip[here+s.m_Operand].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opQuest1:
-			// just an empty.. aren't we tired of justanempties?
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opLeftParen: // they say it's not significan there..
-		case CSop::opRightParen:
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opChoice0:	// mark the first two branches..
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			ASSERT(m_Strip[pc+s.m_Operand].m_Operator==CSop::opOr1);
-			m_Strip[here+s.m_Operand].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-		case CSop::opOr0:	// done a branch, find the end of choice..
-			if(m_Strip[here].m_MatchData&maskAfter){
-				for(look=1;(s=m_Strip[pc+look]).m_Operator!=CSop::opChoice1;look+=s.m_Operand)
-					ASSERT(s.m_Operator==CSop::opOr1);
-				m_Strip[here+look].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			}
-			break;
-		case CSop::opOr1: // Propagate Choice's marking..
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			if(m_Strip[pc+s.m_Operand].m_Operator!=CSop::opChoice1){
-				ASSERT(m_Strip[pc+s.m_Operand].m_Operator==CSop::opOr1);
-				m_Strip[here+s.m_Operand].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			}
-			break;
-		case CSop::opChoice1: // Just empty.. :-)..
-			m_Strip[here+1].m_MatchData|=(m_Strip[here].m_MatchData&maskAfter)?maskAfter:0;
-			break;
-#ifdef _DEBUG
-		default:
-			ASSERT(FALSE);
-			break;
-#endif
-		}
-	}
-}
-
-void CRegEx::MatchStatesCopy(BYTE dst,BYTE src)
-{
-int stripLen = m_Strip.GetSize();
-	for(int tmp=0;tmp<stripLen;tmp++){
-		// I believe this can be optimized, easily..
-		m_Strip[tmp].m_MatchData&=~dst;
-		m_Strip[tmp].m_MatchData|=(m_Strip[tmp].m_MatchData&src)?dst:0;
-	}
-}
-
-BOOL CRegEx::MatchStatesEqual(BYTE m1,BYTE m2)
-{
-int stripLen = m_Strip.GetSize();
-	for(int tmp=0;tmp<stripLen;tmp++){
-	BYTE mm = m_Strip[tmp].m_MatchData;
-		if(((mm&m1) && (mm&m2)) || !(mm&(m1|m2)))
-			continue;
-		return FALSE;
-	}
-	return TRUE;
-}
-
-LPCTSTR CRegEx::MatchSlow(LPCTSTR begin,LPCTSTR end,int from,int to)
-{
-	MatchStatesClear(CSop::stCurrent);
-	m_Strip[from].m_MatchData|=CSop::stCurrent;
-	MatchStep(from,to,CSop::stCurrent,charNothing,CSop::stCurrent);
-LPCTSTR mp = NULL;
-int c = (m_mBegin==m_mPointer)?charOut:(int)(BYTE)begin[-1];
-LPCTSTR p = begin;
-	for(;;){
-		// next character..
-	int lastc = c;
-	c = (p==m_mEnd)?charOut:(int)*(BYTE*)p;
-		// Now we start to wonder if there is an EOL and/or BOL between lastc and c
-	int flagc = 0;
-	int i = 0;
-		if((lastc=='\n' && m_Flags&regNewLine) || (lastc==charOut && !(m_mFlags&regNotBOL))){
-			flagc = charBOL;
-			i = m_BOLs;
-		}
-		if((c=='\n' && m_Flags&regNewLine) || (c==charOut && !(m_mFlags&regNotEOL))){
-			flagc = (flagc==charBOL)?charBOLEOL:charEOL;
-			i+=m_EOLs;
-		}
-		if(i){
-			for(;i>0;i--)
-				MatchStep(from,to,CSop::stCurrent,flagc,CSop::stCurrent);
-		}
-		// Now we wonder about word boundaries..
-		if((flagc==charBOL || (lastc!=charOut && !isWordableChar(lastc))) && (c!=charOut && isWordableChar(c)))
-			flagc=charBOW;
-		if((lastc!=charOut && isWordableChar(lastc)) && (flagc==charEOL || (c!=charOut && !isWordableChar(c))))
-			flagc=charEOW;
-		if(flagc==charBOW || flagc==charEOW){
-			MatchStep(from,to,CSop::stCurrent,flagc,CSop::stCurrent);
-		}
-		// Are we done we all wonder??
-		if(m_Strip[to].m_MatchData&CSop::stCurrent)
-			mp=p;
-		if(MatchStatesEqual(CSop::stCurrent,CSop::stEmpty) || p==end)
-			break;	// Again, we're obliged to note break out. We do.
-		// Sucks.. we have to face this character..
-		MatchStatesCopy(CSop::stTemp,CSop::stCurrent);
-		MatchStatesCopy(CSop::stCurrent,CSop::stEmpty);
-		ASSERT(c!=charOut);
-		MatchStep(from,to,CSop::stTemp,c,CSop::stCurrent);
-		MatchStep(from,to,CSop::stCurrent,charNothing,CSop::stCurrent);
-		p++;
-	}
-	return mp;
-}
-
-LPCTSTR CRegEx::MatchDissect(LPCTSTR begin,LPCTSTR end,int from,int to)
-{
-LPCTSTR sp = begin, dp;
-LPCTSTR stp, rest, tail, ssp, oldssp, sep;
-int ssub, esub;
-int es;
-int i;
-	for(int ss=from;ss<to;ss = es){
-		// Identify end of SubRE
-		es = ss;
-		switch(m_Strip[es].m_Operator){
-		case CSop::opPlus0:
-		case CSop::opQuest0:
-			es+=m_Strip[es].m_Operand;
-			break;
-		case CSop::opChoice0:
-			while(m_Strip[es].m_Operator!=CSop::opChoice1)
-				es+=m_Strip[es].m_Operand;
-			break;
-		}
-		es++;
-		// Figure out what it matched
-		switch(m_Strip[ss].m_Operator){
-		case CSop::opEnd:
-			ASSERT(FALSE);
-			break;
-		case CSop::opChar:
-			sp++;
-			break;
-		case CSop::opBOL:
-		case CSop::opEOL:
-		case CSop::opBOW:
-		case CSop::opEOW:
-			break;
-		case CSop::opAny:
-		case CSop::opAnyOf:
-			sp++;
-			break;
-		case CSop::opBackRef0:
-		case CSop::opBackRef1:
-			ASSERT(FALSE);
-			break;
-			// Cases where lenght of match is hard to find..
-		case CSop::opQuest0:
-			stp=end;
-			for(;;){
-				// How long could this one be??
-				rest = MatchSlow(sp,stp,ss,es);
-				ASSERT(rest);	// It did match.. It should've..
-				// Could the rest match the rest? (good question)
-				tail = MatchSlow(rest,end,es,to);
-				if(tail==end)
-					break;	// yup.
-				// nope, try a shorter match for this one..
-				stp=rest-1;
-				ASSERT(stp>=sp);	// It did work.. It should've..
-			}
-			ssub=ss+1;
-			esub=es-1;
-			// Did innards match?
-			if(MatchSlow(sp,rest,ssub,esub)){
-				dp = MatchDissect(sp,rest,ssub,esub);
-				ASSERT(dp==rest);
-			}else	// nope..
-				ASSERT(sp==rest);
-			sp = rest;
-			break;
-		case CSop::opPlus0:
-			stp=end;
-			for(;;){
-				// How long could this one be??
-				rest = MatchSlow(sp,stp,ss,es);
-				ASSERT(rest);	// It did.. It should've..
-				// Could the rest match the rest?
-				tail = MatchSlow(rest,end,es,to);
-				if(tail==end)
-					break;	// yup.
-				// nope..
-				stp=rest-1;
-				ASSERT(stp>=sp);	// It should've worked, we believe..
-			}
-			ssub=ss+1;
-			esub=es-1;
-			ssp=sp;
-			oldssp=ssp;
-			for(;;){	// Find last match of innards..
-				sep  = MatchSlow(ssp,rest,ssub,esub);
-				if((!sep) || sep==ssp)
-					break; // Failed or matched nothin'
-				oldssp=ssp;
-				ssp=sep;
-			}
-			if(!sep){
-				// Last successfull..
-				sep=ssp;
-				ssp=oldssp;
-			}
-			ASSERT(sep=rest);	// Must exhaust substring they say..
-			VERIFY(MatchSlow(ssp,sep,ssub,esub)==rest);	// Assert or verify - that is the question..
-			dp = MatchDissect(ssp,sep,ssub,esub);
-			ASSERT(dp==sep);
-			sp=rest;
-			break;
-		case CSop::opChoice0:
-			stp = end;
-			for(;;){
-				// how long..
-				rest = MatchSlow(sp,stp,ss,es);
-				ASSERT(rest);
-				// Could it..
-				tail = MatchSlow(rest,end,es,to);
-				if(tail==end)
-					break;	// y
-				// n
-				stp = rest-1;
-				ASSERT(stp>=sp);
-			}
-			ssub=ss+1;
-			esub=ss+m_Strip[ss].m_Operand-1;
-			ASSERT(m_Strip[esub].m_Operator==CSop::opOr0);
-			for(;;){	// Find first matching branch..
-				if(MatchSlow(sp,rest,ssub,esub)==rest)
-					break;
-				// this one missed, try next..
-				ASSERT(m_Strip[esub].m_Operator==CSop::opOr0);
-				esub++;
-				ASSERT(m_Strip[esub].m_Operator==CSop::opOr1);
-				ssub=esub+1;
-				esub+=m_Strip[esub].m_Operand;
-				if(m_Strip[esub].m_Operator==CSop::opOr1)
-					esub--;
-				else
-					ASSERT(m_Strip[esub].m_Operator==CSop::opChoice1);
-			}
-			dp = MatchDissect(sp,rest,ssub,esub);
-			ASSERT(dp==rest);
-			sp=rest;
-			break;
-		case CSop::opPlus1:
-		case CSop::opQuest1:
-		case CSop::opOr0:
-		case CSop::opOr1:
-		case CSop::opChoice1:
-			ASSERT(FALSE);
-			break;
-		case CSop::opLeftParen:
-			i = m_Strip[ss].m_Operand;
-			ASSERT(0<i && i<=m_Subexps);
-			m_Matches[i].m_Begin=sp-m_mBegin;
-			break;
-		case CSop::opRightParen:
-			i = m_Strip[ss].m_Operand;
-			ASSERT(0<i && i<=m_Subexps);
-			m_Matches[i].m_End=sp-m_mBegin;
-			break;
-#ifdef _DEBUG
-		default:	// Uh.. oh..
-			ASSERT(FALSE);
-			break;
-#endif
-		}
-	}
-	ASSERT(sp==end);
-	return sp;
-}
-
-LPCTSTR CRegEx::MatchBackRef(LPCTSTR begin,LPCTSTR end,int from,int to,int level)
-{
-LPCTSTR sp = begin;
-BOOL hard = FALSE;
-	// Get as far as we can as long as it's easy
-	for(int ss=from;!hard && ss<to;ss++){
-	CSop s = m_Strip[ss];
-		switch(s.m_Operator){
-		case CSop::opChar:
-			if(sp==end || *sp++ != s.m_Operand)
-				return NULL;
-			break;
-		case CSop::opAny:
-			if(sp==end)
-				return NULL;
-			sp++;	// I'm sure this ++ could be embedded in previous expression..
-			break;
-		case CSop::opAnyOf:
-			if(sp==end || !m_Sets[s.m_Operand].IsIn(*sp++))
-				return NULL;
-			break;
-		case CSop::opBOL:
-			if(!((sp==m_mBegin && !(m_mFlags&regNotBOL)) || (sp<m_mEnd && *(sp-1)=='\n' && (m_Flags&regNewLine))))
-				return NULL;
-			break;
-		case CSop::opEOL:
-			if(!((sp==m_mEnd && !(m_mFlags&regNotEOL)) || (sp<m_mEnd && *sp=='\n' && (m_Flags&regNewLine))))
-				return NULL;
-			break;
-		case CSop::opBOW:
-			if(!(((sp==m_mBegin && !(m_mFlags&regNotBOL)) || (sp<m_mEnd && *(sp-1)=='\n' && (m_Flags&regNewLine)) || (sp>m_mBegin && !isWordableChar(*(sp-1)))) && (sp<m_mEnd && isWordableChar(*sp))))
-				return NULL;
-			break;
-		case CSop::opEOW:
-			if(!(((sp==m_mEnd && !(m_mFlags&regNotEOL)) || (sp<m_mEnd && *sp=='\n' && (m_Flags&regNewLine)) || (sp<m_mEnd && !isWordableChar(*sp))) && (sp>m_mBegin && isWordableChar(*(sp-1)))))
-				return NULL;
-			break;
-		case CSop::opQuest1:
-			break;
-		case CSop::opOr0:	// Matches null, but needs to skip
-			ss++;
-			s = m_Strip[ss];
-			do{
-				ASSERT(s.m_Operator==CSop::opOr1);
-				ss+=s.m_Operand;
-			}while((s=m_Strip[ss]).m_Operator!=CSop::opChoice1);
-			// Now we should note that ss++ gets us past the Choice1..
-			break;
-		default:
-			// Have to make a choice..
-			hard=TRUE;
-			break;
-		}
-	}
-	if(!hard){	// That was it..
-		if(sp!=end)
-			return NULL;
-		return sp;
-	}
-	ss--;	// Adjust for ther for's final increment..
-	// Hard stuff.. is going on and on..
-CSop s = m_Strip[ss];
-int i, len, offsave;
-int ssub,esub;
-LPCTSTR ssp, dp;
-	switch(s.m_Operator){
-	case CSop::opBackRef0:	// The vilest depths they say.. If I only knew what the 'viles' stands for..
-		i = s.m_Operand;
-		ASSERT(0<i && i<=m_Subexps);
-		if(m_Matches[i].m_End<0)
-			return NULL;
-		ASSERT(m_Matches[i].m_Begin>=0);
-		len = m_Matches[i].GetLength();
-		ASSERT((end-m_mBegin)>=len);
-		if(sp>end-len)
-			return NULL;	// Not enough left to match..
-		ssp = m_mBegin+m_Matches[i].m_Begin;
-		if(memcmp(sp,ssp,len))
-			return NULL;
-		while(m_Strip[ss]!=CSop(CSop::opBackRef1,i))
-			ss++;
-		return MatchBackRef(sp+len,end,ss+1,to,level-1);
-		break;
-	case CSop::opQuest0:	// to null or not they wonder..
-		dp = MatchBackRef(sp,end,ss+1,to,level);
-		if(dp)
-			return dp;	// not..
-		return MatchBackRef(sp,end,ss+s.m_Operand+1,to,level-1);
-		break;
-	case CSop::opPlus0:
-		ASSERT(m_mLastPos.GetSize());
-		ASSERT(level+1 <= m_Pluses);
-		m_mLastPos[level+1]=sp;
-		return MatchBackRef(sp,end,ss+1,to,level+1);
-		break;
-	case CSop::opPlus1:
-		if(sp == m_mLastPos[level])	// Last pass matched null
-			return MatchBackRef(sp,end,ss+1,to,level-1);
-		// Try another pass..
-		m_mLastPos[level]=sp;
-		dp = MatchBackRef(sp,end,ss-s.m_Operand+1,to,level);
-		if(dp)
-			return dp;
-		return MatchBackRef(sp,end,ss+1,to,level-1);
-		break;
-	case CSop::opChoice0:	// find the right one, ifany
-		ssub = ss+1;
-		esub = ss+s.m_Operand-1;
-		ASSERT(m_Strip[esub].m_Operator==CSop::opOr0);
-		for(;;){	// Find first matching branch.
-			dp = MatchBackRef(sp,end,ssub,esub,level);
-			if(dp)
-				return dp;
-			// This one missed, try next one..
-			if(m_Strip[esub].m_Operator==CSop::opChoice1)
-				return NULL;	// There is none..
-			esub++;
-			ASSERT(m_Strip[esub].m_Operator==CSop::opOr1);
-			ssub=esub+1;
-			esub+=m_Strip[esub].m_Operand;
-			if(m_Strip[esub].m_Operator==CSop::opOr1)
-				esub--;
-			else
-				ASSERT(m_Strip[esub].m_Operator==CSop::opChoice1);
-		}
-		break;
-	case CSop::opLeftParen:	// Must undo assignment if rest fails..
-		i = s.m_Operand;
-		ASSERT(0<i && i<=m_Subexps);
-		offsave = m_Matches[i].m_Begin;
-		m_Matches[i].m_Begin = sp-m_mBegin;
-		dp = MatchBackRef(sp,end,ss+1,to,level);
-		if(dp)
-			return dp;
-		m_Matches[i].m_Begin=offsave;
-		return NULL;
-		break;
-	case CSop::opRightParen: // Must undo assignment if rest fails..
-		i = s.m_Operand;
-		ASSERT(0<i && i<=m_Subexps);
-		offsave = m_Matches[i].m_End;
-		m_Matches[i].m_End = sp-m_mBegin;
-		dp = MatchBackRef(sp,end,ss+1,to,level);
-		if(dp)
-			return dp;
-		m_Matches[i].m_End = offsave;
-		return NULL;
-		break;
-#ifdef	_DEBUG
-	default:
-		ASSERT(FALSE);
-		break;
-#endif
-	}
-	ASSERT(FALSE);
-	return NULL;	// Anyway - we can never get here..
-}
-
-#ifdef	_DEBUG
-void CRegEx::CSop::Dump(CDumpContext& dc)
-{
-	switch(m_Operator){
-	case opEnd:
-		dc << "end";
-		break;
-	case opChar:
-		dc << "char('" << (char)m_Operand << "')";
-		break;
-	case opBOL:
-		dc << "BOL";
-		break;
-	case opEOL:
-		dc << "EOL";
-		break;
-	case opAny:
-		dc << "any";
-		break;
-	case opAnyOf:
-		dc << "anyOf(" << m_Operand << ")";
-		break;
-	case opBackRef0:
-		dc << "[ backref(" << m_Operand << ")";
-		break;
-	case opBackRef1:
-		dc << "] backref(" << m_Operand << ")";
-		break;
-	case opPlus0:
-		dc << "[ + (" << m_Operand << ")";
-		break;
-	case opPlus1:
-		dc << "] + (" << m_Operand << ")";
-		break;
-	case opQuest0:
-		dc << "[ ? (" << m_Operand << ")";
-		break;
-	case opQuest1:
-		dc << "] ? (" << m_Operand << ")";
-		break;
-	case opLeftParen:
-		dc << "[ (  (" << m_Operand << ")";
-		break;
-	case opRightParen:
-		dc << "] )  (" << m_Operand << ")";
-		break;
-	case opChoice0:
-		dc << "[ choice (" << m_Operand << ")";
-		break;
-	case opOr0:
-		dc << "[ | (" << m_Operand << ")";
-		break;
-	case opOr1:
-		dc << "] | (" << m_Operand << ")";
-		break;
-	case opChoice1:
-		dc << "] choice (" << m_Operand << ")";
-		break;
-	case opBOW:
-		dc << "BOW";
-		break;
-	case opEOW:
-		dc << "EOW";
-		break;
-	}
-}
-void CRegEx::DumpStrip(CDumpContext& dc)
-{
-	for(int tmp=0;tmp<m_Strip.GetSize();tmp++)
-		dc << tmp << ": " << m_Strip[tmp] << ";\n";
-}
-#endif
-
-
-CString CRegEx::GetMatch(int match)
-{
-CString rv;
-	if(!m_Matches.GetSize())
-		return rv;
-	ASSERT(m_Matches[0].m_Begin<m_Input.GetLength() && m_Matches[0].m_End<=m_Input.GetLength());
-	if(match==matchPreMatch)
-		return m_Input.Left(m_Matches[0].m_Begin);
-	if(match==matchPostMatch)
-		return m_Input.Mid(m_Matches[0].m_End);
-	if(match<0 || match >= m_Matches.GetSize())
-		return rv;
-	if(m_Matches[match].m_Begin<0 || m_Matches[match].m_End<0)
-		return rv;
-	ASSERT(m_Matches[match].m_Begin<m_Input.GetLength() && m_Matches[match].m_End<=m_Input.GetLength());
-	rv = m_Input.Mid(m_Matches[match].m_Begin,m_Matches[match].m_End-m_Matches[match].m_Begin);
-	return rv;
-}
author	Michael Krelin <hacker@klever.net>	2012-12-08 21:19:17 (UTC)
committer	Michael Krelin <hacker@klever.net>	2012-12-11 21:59:29 (UTC)
commit	8808689fe340bec6e90ab13dd502292b0579cf1f (patch) (side-by-side diff)
tree	45b7c863151341f687b74e40bffcbd7ab5468783 /shared-code/RegEx.cpp
parent	6e7e413ca364d79673e523c09767c18e7cff1bec (diff)
download	pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.zip pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.tar.gz pumpkin-8808689fe340bec6e90ab13dd502292b0579cf1f.tar.bz2