summaryrefslogtreecommitdiffabout
path: root/shared-code/RegEx.h
blob: 25347688e9ba37ec065843fe5a773de18609fb8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#ifndef __REGEX_H
#define __REGEX_H

class CRegEx	{
public:
	CString GetMatch(int match=0);
	CString m_Input;
	struct	CMatch	{
		CMatch() : m_Begin(-1), m_End(-1) {}
		int GetLength() { return m_End-m_Begin; }
		int m_Begin;
		int m_End;
	};
	typedef	CArray<CMatch,CMatch&> CMatchBox;
	enum	{
		matchMatch = 0,
		matchPreMatch = -1,
		matchPostMatch = -2
	};
	CMatchBox m_Matches;
	enum	{
		charOut = 256,
		charBOL, charEOL, charBOLEOL, charNothing, charBOW, charEOW,
		charMaxCode = charEOW,
		charNNChars = (charMaxCode-255)
	};
	int m_mFlags;
	enum	{
		regeSuccess = 0,
		regeNoMatch = 1, regeBadPattern, regeCollate, regeCType, regeEscape, regeSubReg, regeBracket,
		regeParen, regeBrace, regeBadBrace, regeRange, regeSpace, regeBadRepeat, regeEmpty, regeAssert,
		regeInvArg
	};
	int m_Error;
	CRegEx();
	BOOL m_bCompiled;
	CString m_Pattern;
	BOOL m_bBackRefs;
	int m_Pluses;
	CString m_Must;
	BYTE m_Category[CHAR_MAX-CHAR_MIN+1];
	int m_Categories;
	int m_EOLs;
	int m_BOLs;
	int m_iFlags;
	int m_Subexps;
	struct CSop {
		void Dump(CDumpContext& dc);
		CSop()	{}
		CSop(BYTE op,DWORD opnd=0) : m_Operator(op), m_Operand(opnd) {}
		BOOL operator==(CSop& other) {return m_Operator==other.m_Operator && m_Operand==other.m_Operand;}
		BOOL operator!=(CSop& other) { return !((*this)==other);}
		enum	{
			opEnd = 1, opChar, opBOL, opEOL, opAny, opAnyOf, opBackRef0, opBackRef1,
			opPlus0, opPlus1, opQuest0, opQuest1, opLeftParen, opRightParen, opChoice0,
			opOr0, opOr1, opChoice1, opBOW, opEOW
		};
		BYTE m_Operator;
		DWORD m_Operand;
		enum	{
			stCurrent = 1, stFresh = 2, stTemp = 4, stEmpty = 8
		};
		BYTE m_MatchData;
	};
	typedef CArray<CSop,CSop&> CStrip;
	CStrip m_Strip;
	int m_Flags;
	struct CSet	{
		CSet() : m_Hash(0) { memset(m_Set,0,sizeof(m_Set)); }
		CSet(CSet& src) { (*this)=src; }
		CSet& operator=(CSet& src) { memmove(this,&src,sizeof(*this)); return *this; }
		BOOL operator==(CSet& other) { if(m_Hash!=other.m_Hash)return FALSE;return !memcmp(m_Set,other.m_Set,sizeof(m_Set)); }
		enum	{
			size = (CHAR_MAX-CHAR_MIN+1)
		};
		BOOL m_Set[size];
		BYTE m_Hash;
	public:
		UCHAR GetOnly();
		void Sub(UCHAR c);
		BOOL IsIn(UCHAR c);
		void Add(UCHAR c);
	};
	typedef CArray<CSet,CSet&> CSets;
	CSets m_Sets;
	enum	{
		// Compile flags
		regBasic = 0, regExtended = 1,
		regIgnoreCase = 2,
		regNoSubExpressions = 4,	// Also works for matching.
		regNewLine = 16,
		regLiteral = 32,
		// Match Flags
		regNotBOL = 1,
		regNotEOL = 2,
		regOneMatch=64,
		regBackRefs=128,
		// iFlags
		iflagsUseBOL=1, iflagsUseEOL=2, iflagsBad=4
	};
	CString Replace(LPCTSTR src,LPCTSTR rep,int flags=0);
	BOOL Match(LPCTSTR src,int flags=0);
	BOOL Compile(LPCTSTR regex,int flags=0);
private:
#ifdef	_DEBUG
	void DumpStrip(CDumpContext& dc);
#endif
	LPCTSTR MatchBackRef(LPCTSTR begin,LPCTSTR end,int from,int to,int level);
	typedef CArray<LPCTSTR,LPCTSTR> CStrPtrArray;
	CStrPtrArray m_mLastPos;
	LPCTSTR MatchDissect(LPCTSTR begin,LPCTSTR end,int from,int to);
	LPCTSTR MatchSlow(LPCTSTR begin,LPCTSTR end,int from,int to);
	LPCTSTR m_cOldP;
	BOOL MatchStatesEqual(BYTE m1,BYTE m2);
	LPCTSTR m_mBegin;
	void MatchStatesCopy(BYTE dst,BYTE src);
	void MatchStep(int from,int to,BYTE maskBefore,int charCode,BYTE maskAfter);
	void MatchStatesClear(BYTE mask);
	LPCTSTR MatchFast(LPCTSTR begin);
	LPCTSTR m_mEnd;
	LPCTSTR m_mPointer;
	BOOL ParseBREexp(BOOL ordinaryStar);
	void ParseBRE(int stopa=0,int stopb=0);
	void ParseLiteral();
	int CountPluses();
	void FigureMust();
	BOOL IsInSameSets(UCHAR c1,UCHAR c2);
	BOOL IsInSets(UCHAR c);
	void Categorize();
	int StripDuplicate(int from,int to);
	void EmitRepeat(int pos,int from,int to);
	UCHAR ParseBracketSymbol();
	UCHAR ParseBracketCollatingElement(UCHAR term);
	void ParseBracketEClass(CSet& cset);
	void ParseBracketCClass(CSet& cset);
	void ParseBracketTerm(CSet& cset);
	int StoreSet(CSet& cset);
	void ParseBracket();
	int ParseCount();
	void EmitNonNewLineAny();
	void EmitOrdinary(UCHAR c);
	void StripInsert(int pos,CSop& sop);
	void ParseEREexp();
	void ParseERE(int stop=0);
	struct	CParenthesis	{
		long m_Begin;
		long m_End;
		CParenthesis(long b=0,long e=0) : m_Begin(b), m_End(e) {}
	};
	typedef CArray<CParenthesis,CParenthesis&> CParens;
	CParens m_ParseParens;
	int m_ParsePointer;
};
#ifdef	_DEBUG
inline CDumpContext& operator<<(CDumpContext& dc, CRegEx::CSop& sop) { sop.Dump(dc); return dc; }
#endif

#endif // __REGEX_H