author | Michael Krelin <hacker@klever.net> | 2004-07-05 01:53:09 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2004-07-05 01:53:09 (UTC) |
commit | 955d4e00adc9f39ab93bf21f07506eb75b013c70 (patch) (unidiff) | |
tree | 92493a2c9ac206b822e24a9e5a6f6b1589be6afb /shared-code/RegEx.h | |
download | T42-955d4e00adc9f39ab93bf21f07506eb75b013c70.zip T42-955d4e00adc9f39ab93bf21f07506eb75b013c70.tar.gz T42-955d4e00adc9f39ab93bf21f07506eb75b013c70.tar.bz2 |
initial commit into svn repository
git-svn-id: http://svn.klever.net/kin/T42/trunk@1 fe716a7a-6dde-0310-88d9-d003556173a8
-rw-r--r-- | shared-code/RegEx.h | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/shared-code/RegEx.h b/shared-code/RegEx.h new file mode 100644 index 0000000..2534768 --- a/dev/null +++ b/shared-code/RegEx.h | |||
@@ -0,0 +1,158 @@ | |||
1 | #ifndef __REGEX_H | ||
2 | #define __REGEX_H | ||
3 | |||
4 | class CRegEx{ | ||
5 | public: | ||
6 | CString GetMatch(int match=0); | ||
7 | CString m_Input; | ||
8 | struct CMatch{ | ||
9 | CMatch() : m_Begin(-1), m_End(-1) {} | ||
10 | int GetLength() { return m_End-m_Begin; } | ||
11 | int m_Begin; | ||
12 | int m_End; | ||
13 | }; | ||
14 | typedefCArray<CMatch,CMatch&> CMatchBox; | ||
15 | enum{ | ||
16 | matchMatch = 0, | ||
17 | matchPreMatch = -1, | ||
18 | matchPostMatch = -2 | ||
19 | }; | ||
20 | CMatchBox m_Matches; | ||
21 | enum{ | ||
22 | charOut = 256, | ||
23 | charBOL, charEOL, charBOLEOL, charNothing, charBOW, charEOW, | ||
24 | charMaxCode = charEOW, | ||
25 | charNNChars = (charMaxCode-255) | ||
26 | }; | ||
27 | int m_mFlags; | ||
28 | enum{ | ||
29 | regeSuccess = 0, | ||
30 | regeNoMatch = 1, regeBadPattern, regeCollate, regeCType, regeEscape, regeSubReg, regeBracket, | ||
31 | regeParen, regeBrace, regeBadBrace, regeRange, regeSpace, regeBadRepeat, regeEmpty, regeAssert, | ||
32 | regeInvArg | ||
33 | }; | ||
34 | int m_Error; | ||
35 | CRegEx(); | ||
36 | BOOL m_bCompiled; | ||
37 | CString m_Pattern; | ||
38 | BOOL m_bBackRefs; | ||
39 | int m_Pluses; | ||
40 | CString m_Must; | ||
41 | BYTE m_Category[CHAR_MAX-CHAR_MIN+1]; | ||
42 | int m_Categories; | ||
43 | int m_EOLs; | ||
44 | int m_BOLs; | ||
45 | int m_iFlags; | ||
46 | int m_Subexps; | ||
47 | struct CSop { | ||
48 | void Dump(CDumpContext& dc); | ||
49 | CSop(){} | ||
50 | CSop(BYTE op,DWORD opnd=0) : m_Operator(op), m_Operand(opnd) {} | ||
51 | BOOL operator==(CSop& other) {return m_Operator==other.m_Operator && m_Operand==other.m_Operand;} | ||
52 | BOOL operator!=(CSop& other) { return !((*this)==other);} | ||
53 | enum{ | ||
54 | opEnd = 1, opChar, opBOL, opEOL, opAny, opAnyOf, opBackRef0, opBackRef1, | ||
55 | opPlus0, opPlus1, opQuest0, opQuest1, opLeftParen, opRightParen, opChoice0, | ||
56 | opOr0, opOr1, opChoice1, opBOW, opEOW | ||
57 | }; | ||
58 | BYTE m_Operator; | ||
59 | DWORD m_Operand; | ||
60 | enum{ | ||
61 | stCurrent = 1, stFresh = 2, stTemp = 4, stEmpty = 8 | ||
62 | }; | ||
63 | BYTE m_MatchData; | ||
64 | }; | ||
65 | typedef CArray<CSop,CSop&> CStrip; | ||
66 | CStrip m_Strip; | ||
67 | int m_Flags; | ||
68 | struct CSet{ | ||
69 | CSet() : m_Hash(0) { memset(m_Set,0,sizeof(m_Set)); } | ||
70 | CSet(CSet& src) { (*this)=src; } | ||
71 | CSet& operator=(CSet& src) { memmove(this,&src,sizeof(*this)); return *this; } | ||
72 | BOOL operator==(CSet& other) { if(m_Hash!=other.m_Hash)return FALSE;return !memcmp(m_Set,other.m_Set,sizeof(m_Set)); } | ||
73 | enum{ | ||
74 | size = (CHAR_MAX-CHAR_MIN+1) | ||
75 | }; | ||
76 | BOOL m_Set[size]; | ||
77 | BYTE m_Hash; | ||
78 | public: | ||
79 | UCHAR GetOnly(); | ||
80 | void Sub(UCHAR c); | ||
81 | BOOL IsIn(UCHAR c); | ||
82 | void Add(UCHAR c); | ||
83 | }; | ||
84 | typedef CArray<CSet,CSet&> CSets; | ||
85 | CSets m_Sets; | ||
86 | enum{ | ||
87 | // Compile flags | ||
88 | regBasic = 0, regExtended = 1, | ||
89 | regIgnoreCase = 2, | ||
90 | regNoSubExpressions = 4,// Also works for matching. | ||
91 | regNewLine = 16, | ||
92 | regLiteral = 32, | ||
93 | // Match Flags | ||
94 | regNotBOL = 1, | ||
95 | regNotEOL = 2, | ||
96 | regOneMatch=64, | ||
97 | regBackRefs=128, | ||
98 | // iFlags | ||
99 | iflagsUseBOL=1, iflagsUseEOL=2, iflagsBad=4 | ||
100 | }; | ||
101 | CString Replace(LPCTSTR src,LPCTSTR rep,int flags=0); | ||
102 | BOOL Match(LPCTSTR src,int flags=0); | ||
103 | BOOL Compile(LPCTSTR regex,int flags=0); | ||
104 | private: | ||
105 | #ifdef_DEBUG | ||
106 | void DumpStrip(CDumpContext& dc); | ||
107 | #endif | ||
108 | LPCTSTR MatchBackRef(LPCTSTR begin,LPCTSTR end,int from,int to,int level); | ||
109 | typedef CArray<LPCTSTR,LPCTSTR> CStrPtrArray; | ||
110 | CStrPtrArray m_mLastPos; | ||
111 | LPCTSTR MatchDissect(LPCTSTR begin,LPCTSTR end,int from,int to); | ||
112 | LPCTSTR MatchSlow(LPCTSTR begin,LPCTSTR end,int from,int to); | ||
113 | LPCTSTR m_cOldP; | ||
114 | BOOL MatchStatesEqual(BYTE m1,BYTE m2); | ||
115 | LPCTSTR m_mBegin; | ||
116 | void MatchStatesCopy(BYTE dst,BYTE src); | ||
117 | void MatchStep(int from,int to,BYTE maskBefore,int charCode,BYTE maskAfter); | ||
118 | void MatchStatesClear(BYTE mask); | ||
119 | LPCTSTR MatchFast(LPCTSTR begin); | ||
120 | LPCTSTR m_mEnd; | ||
121 | LPCTSTR m_mPointer; | ||
122 | BOOL ParseBREexp(BOOL ordinaryStar); | ||
123 | void ParseBRE(int stopa=0,int stopb=0); | ||
124 | void ParseLiteral(); | ||
125 | int CountPluses(); | ||
126 | void FigureMust(); | ||
127 | BOOL IsInSameSets(UCHAR c1,UCHAR c2); | ||
128 | BOOL IsInSets(UCHAR c); | ||
129 | void Categorize(); | ||
130 | int StripDuplicate(int from,int to); | ||
131 | void EmitRepeat(int pos,int from,int to); | ||
132 | UCHAR ParseBracketSymbol(); | ||
133 | UCHAR ParseBracketCollatingElement(UCHAR term); | ||
134 | void ParseBracketEClass(CSet& cset); | ||
135 | void ParseBracketCClass(CSet& cset); | ||
136 | void ParseBracketTerm(CSet& cset); | ||
137 | int StoreSet(CSet& cset); | ||
138 | void ParseBracket(); | ||
139 | int ParseCount(); | ||
140 | void EmitNonNewLineAny(); | ||
141 | void EmitOrdinary(UCHAR c); | ||
142 | void StripInsert(int pos,CSop& sop); | ||
143 | void ParseEREexp(); | ||
144 | void ParseERE(int stop=0); | ||
145 | struct CParenthesis{ | ||
146 | long m_Begin; | ||
147 | long m_End; | ||
148 | CParenthesis(long b=0,long e=0) : m_Begin(b), m_End(e) {} | ||
149 | }; | ||
150 | typedef CArray<CParenthesis,CParenthesis&> CParens; | ||
151 | CParens m_ParseParens; | ||
152 | int m_ParsePointer; | ||
153 | }; | ||
154 | #ifdef_DEBUG | ||
155 | inline CDumpContext& operator<<(CDumpContext& dc, CRegEx::CSop& sop) { sop.Dump(dc); return dc; } | ||
156 | #endif | ||
157 | |||
158 | #endif // __REGEX_H \ No newline at end of file | ||