From 955d4e00adc9f39ab93bf21f07506eb75b013c70 Mon Sep 17 00:00:00 2001 From: Michael Krelin Date: Mon, 05 Jul 2004 01:53:09 +0000 Subject: initial commit into svn repository git-svn-id: http://svn.klever.net/kin/T42/trunk@1 fe716a7a-6dde-0310-88d9-d003556173a8 --- (limited to 'shared-code/RegEx.h') diff --git a/shared-code/RegEx.h b/shared-code/RegEx.h new file mode 100644 index 0000000..2534768 --- a/dev/null +++ b/shared-code/RegEx.h @@ -0,0 +1,158 @@ +#ifndef __REGEX_H +#define __REGEX_H + +class CRegEx { +public: + CString GetMatch(int match=0); + CString m_Input; + struct CMatch { + CMatch() : m_Begin(-1), m_End(-1) {} + int GetLength() { return m_End-m_Begin; } + int m_Begin; + int m_End; + }; + typedef CArray CMatchBox; + enum { + matchMatch = 0, + matchPreMatch = -1, + matchPostMatch = -2 + }; + CMatchBox m_Matches; + enum { + charOut = 256, + charBOL, charEOL, charBOLEOL, charNothing, charBOW, charEOW, + charMaxCode = charEOW, + charNNChars = (charMaxCode-255) + }; + int m_mFlags; + enum { + regeSuccess = 0, + regeNoMatch = 1, regeBadPattern, regeCollate, regeCType, regeEscape, regeSubReg, regeBracket, + regeParen, regeBrace, regeBadBrace, regeRange, regeSpace, regeBadRepeat, regeEmpty, regeAssert, + regeInvArg + }; + int m_Error; + CRegEx(); + BOOL m_bCompiled; + CString m_Pattern; + BOOL m_bBackRefs; + int m_Pluses; + CString m_Must; + BYTE m_Category[CHAR_MAX-CHAR_MIN+1]; + int m_Categories; + int m_EOLs; + int m_BOLs; + int m_iFlags; + int m_Subexps; + struct CSop { + void Dump(CDumpContext& dc); + CSop() {} + CSop(BYTE op,DWORD opnd=0) : m_Operator(op), m_Operand(opnd) {} + BOOL operator==(CSop& other) {return m_Operator==other.m_Operator && m_Operand==other.m_Operand;} + BOOL operator!=(CSop& other) { return !((*this)==other);} + enum { + opEnd = 1, opChar, opBOL, opEOL, opAny, opAnyOf, opBackRef0, opBackRef1, + opPlus0, opPlus1, opQuest0, opQuest1, opLeftParen, opRightParen, opChoice0, + opOr0, opOr1, opChoice1, opBOW, opEOW + }; + BYTE m_Operator; + DWORD m_Operand; + enum { + stCurrent = 1, stFresh = 2, stTemp = 4, stEmpty = 8 + }; + BYTE m_MatchData; + }; + typedef CArray CStrip; + CStrip m_Strip; + int m_Flags; + struct CSet { + CSet() : m_Hash(0) { memset(m_Set,0,sizeof(m_Set)); } + CSet(CSet& src) { (*this)=src; } + CSet& operator=(CSet& src) { memmove(this,&src,sizeof(*this)); return *this; } + BOOL operator==(CSet& other) { if(m_Hash!=other.m_Hash)return FALSE;return !memcmp(m_Set,other.m_Set,sizeof(m_Set)); } + enum { + size = (CHAR_MAX-CHAR_MIN+1) + }; + BOOL m_Set[size]; + BYTE m_Hash; + public: + UCHAR GetOnly(); + void Sub(UCHAR c); + BOOL IsIn(UCHAR c); + void Add(UCHAR c); + }; + typedef CArray CSets; + CSets m_Sets; + enum { + // Compile flags + regBasic = 0, regExtended = 1, + regIgnoreCase = 2, + regNoSubExpressions = 4, // Also works for matching. + regNewLine = 16, + regLiteral = 32, + // Match Flags + regNotBOL = 1, + regNotEOL = 2, + regOneMatch=64, + regBackRefs=128, + // iFlags + iflagsUseBOL=1, iflagsUseEOL=2, iflagsBad=4 + }; + CString Replace(LPCTSTR src,LPCTSTR rep,int flags=0); + BOOL Match(LPCTSTR src,int flags=0); + BOOL Compile(LPCTSTR regex,int flags=0); +private: +#ifdef _DEBUG + void DumpStrip(CDumpContext& dc); +#endif + LPCTSTR MatchBackRef(LPCTSTR begin,LPCTSTR end,int from,int to,int level); + typedef CArray CStrPtrArray; + CStrPtrArray m_mLastPos; + LPCTSTR MatchDissect(LPCTSTR begin,LPCTSTR end,int from,int to); + LPCTSTR MatchSlow(LPCTSTR begin,LPCTSTR end,int from,int to); + LPCTSTR m_cOldP; + BOOL MatchStatesEqual(BYTE m1,BYTE m2); + LPCTSTR m_mBegin; + void MatchStatesCopy(BYTE dst,BYTE src); + void MatchStep(int from,int to,BYTE maskBefore,int charCode,BYTE maskAfter); + void MatchStatesClear(BYTE mask); + LPCTSTR MatchFast(LPCTSTR begin); + LPCTSTR m_mEnd; + LPCTSTR m_mPointer; + BOOL ParseBREexp(BOOL ordinaryStar); + void ParseBRE(int stopa=0,int stopb=0); + void ParseLiteral(); + int CountPluses(); + void FigureMust(); + BOOL IsInSameSets(UCHAR c1,UCHAR c2); + BOOL IsInSets(UCHAR c); + void Categorize(); + int StripDuplicate(int from,int to); + void EmitRepeat(int pos,int from,int to); + UCHAR ParseBracketSymbol(); + UCHAR ParseBracketCollatingElement(UCHAR term); + void ParseBracketEClass(CSet& cset); + void ParseBracketCClass(CSet& cset); + void ParseBracketTerm(CSet& cset); + int StoreSet(CSet& cset); + void ParseBracket(); + int ParseCount(); + void EmitNonNewLineAny(); + void EmitOrdinary(UCHAR c); + void StripInsert(int pos,CSop& sop); + void ParseEREexp(); + void ParseERE(int stop=0); + struct CParenthesis { + long m_Begin; + long m_End; + CParenthesis(long b=0,long e=0) : m_Begin(b), m_End(e) {} + }; + typedef CArray CParens; + CParens m_ParseParens; + int m_ParsePointer; +}; +#ifdef _DEBUG +inline CDumpContext& operator<<(CDumpContext& dc, CRegEx::CSop& sop) { sop.Dump(dc); return dc; } +#endif + +#endif // __REGEX_H \ No newline at end of file -- cgit v0.9.0.2