summaryrefslogtreecommitdiffabout
path: root/CrackURL.cpp
Unidiff
Diffstat (limited to 'CrackURL.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--CrackURL.cpp139
1 files changed, 139 insertions, 0 deletions
diff --git a/CrackURL.cpp b/CrackURL.cpp
new file mode 100644
index 0000000..6537b42
--- a/dev/null
+++ b/CrackURL.cpp
@@ -0,0 +1,139 @@
1#include "stdafx.h"
2#include "CrackURL.h"
3
4CCrackURL::CCrackURL()
5{
6 VERIFY(m_reSchema.Compile("([[:alpha:]]+):",CRegEx::regExtended|CRegEx::regIgnoreCase));
7 VERIFY(m_reUPHP.Compile("(([^:]+)?(:([^@]*))?@)?([^:/]+)(:([[:digit:]]+))?",CRegEx::regExtended|CRegEx::regIgnoreCase));
8 VERIFY(m_reHTTPReq.Compile("(.*/)?([^#\\?]*)(#[^\\?]*)?(\\?.*)?",CRegEx::regExtended|CRegEx::regIgnoreCase));
9}
10
11BOOL CCrackURL::Crack(LPCTSTR url)
12{
13 m_URL = url;
14CString rest;
15 if(m_reSchema.Match(url)){
16 m_Schema = m_reSchema.GetMatch(1);
17 rest = m_reSchema.GetMatch(CRegEx::matchPostMatch);
18 }else{
19 m_Schema.Empty();
20 rest = url;
21 }
22 if(rest.Left(2)=="//"){
23 // URL conforms to common syntax
24 rest=rest.Mid(2);
25 ParseUPHP(rest);
26 }else if(!m_Schema.CompareNoCase("mailto")){
27 ParseUPHP(rest);
28 }else{
29 m_User.Empty(); m_Password.Empty(); m_Host.Empty(); m_Port.Empty();
30 }
31 if((!rest.IsEmpty()) && m_reHTTPReq.Match(rest)){
32 m_pathPath = m_reHTTPReq.GetMatch(1);
33 m_pathFile = m_reHTTPReq.GetMatch(2);
34 m_pathAnchor = m_reHTTPReq.GetMatch(3);
35 m_pathQuery = m_reHTTPReq.GetMatch(4);
36 m_URLPath = m_reHTTPReq.GetMatch(CRegEx::matchMatch);
37 rest = m_reHTTPReq.GetMatch(CRegEx::matchPostMatch);
38 }else{
39 m_URLPath.Empty();
40 m_pathPath.Empty();
41 m_pathFile.Empty();
42 m_pathAnchor.Empty();
43 m_pathQuery.Empty();
44 }
45 m_urlRest = rest;
46 return TRUE;
47}
48
49void CCrackURL::ParseUPHP(CString& rest)
50{
51 if(m_reUPHP.Match(rest)){
52 m_User = m_reUPHP.GetMatch(2);
53 m_Password = m_reUPHP.GetMatch(4);
54 m_Host = m_reUPHP.GetMatch(5);
55 m_Port = m_reUPHP.GetMatch(7);
56 if(!m_Port.IsEmpty())
57 m_nPort = atoi(m_Port);
58 rest = m_reUPHP.GetMatch(CRegEx::matchPostMatch);
59 }else{
60 m_User.Empty();
61 m_Password.Empty();
62 m_Host.Empty();
63 m_Port.Empty();
64 }
65}
66
67BOOL CCrackURL::Adjust(LPCTSTR srcURL)
68{
69CCrackURL cracked;
70 VERIFY(cracked.Crack(srcURL));
71 return Adjust(cracked);
72}
73
74BOOL CCrackURL::Adjust(CCrackURL& srcURL)
75{
76 if(m_Schema.IsEmpty()) m_Schema = srcURL.m_Schema;
77 if(m_Host.IsEmpty()){
78 m_Host = srcURL.m_Host;
79 if(m_User.IsEmpty()) m_User = srcURL.m_User;
80 if(m_Password.IsEmpty()) m_Password = srcURL.m_Password;
81 if(m_Port.IsEmpty()) m_Port = srcURL.m_Port, m_nPort = srcURL.m_nPort;
82 }
83 if(m_pathPath.Find('/')==0)
84 return TRUE;
85 m_pathPath = srcURL.m_pathPath+m_pathPath;
86 m_URLPath.Empty();
87 return TRUE;
88}
89
90CString CCrackURL::Build()
91{
92CString rv;
93 if(!m_Schema.IsEmpty())
94 rv+=m_Schema+':';
95 if(!m_Schema.CompareNoCase("mailto")){
96 rv+=m_User+'@'+m_Host;
97 }else{
98 rv+="//";
99 if(!m_User.IsEmpty()){
100 rv+=m_User;
101 if(!m_Password.IsEmpty())
102 rv+=':'+m_Password;
103 rv+='@';
104 }
105 rv+=m_Host;
106 if(!m_Port.IsEmpty())
107 rv+=':'+m_Port;
108 if(!m_URLPath.IsEmpty()){
109 rv+=m_URLPath;
110 }else{
111 rv+=m_pathPath+m_pathFile+m_pathAnchor+m_pathQuery;
112 }
113 }
114 return rv;
115}
116
117CString CCrackURL::InnermostURL(LPCTSTR url)
118{
119static CRegEx reInnerURL;
120 if(!reInnerURL.m_bCompiled)
121 VERIFY(reInnerURL.Compile(".+[^[:alpha:]]([[:alpha:]]+://[^&]+)",CRegEx::regExtended|CRegEx::regIgnoreCase));
122CString rv = url;
123 while(reInnerURL.Match(rv))
124 rv=reInnerURL.GetMatch(1);
125 return rv;
126}
127
128CString CCrackURL::GetHostName(LPCTSTR url)
129{
130static CRegEx reHost;
131 if(!reHost.m_bCompiled)
132 VERIFY(reHost.Compile("^[[:alpha:]]+://([^/:]+)",CRegEx::regExtended|CRegEx::regIgnoreCase));
133CString rv;
134 if(reHost.Match(url))
135 rv = reHost.GetMatch(1);
136 else
137 rv = url;
138 return rv;
139}