summaryrefslogtreecommitdiffabout
path: root/CrackURL.cpp
Side-by-side diff
Diffstat (limited to 'CrackURL.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--CrackURL.cpp139
1 files changed, 139 insertions, 0 deletions
diff --git a/CrackURL.cpp b/CrackURL.cpp
new file mode 100644
index 0000000..6537b42
--- a/dev/null
+++ b/CrackURL.cpp
@@ -0,0 +1,139 @@
+#include "stdafx.h"
+#include "CrackURL.h"
+
+CCrackURL::CCrackURL()
+{
+ VERIFY(m_reSchema.Compile("([[:alpha:]]+):",CRegEx::regExtended|CRegEx::regIgnoreCase));
+ VERIFY(m_reUPHP.Compile("(([^:]+)?(:([^@]*))?@)?([^:/]+)(:([[:digit:]]+))?",CRegEx::regExtended|CRegEx::regIgnoreCase));
+ VERIFY(m_reHTTPReq.Compile("(.*/)?([^#\\?]*)(#[^\\?]*)?(\\?.*)?",CRegEx::regExtended|CRegEx::regIgnoreCase));
+}
+
+BOOL CCrackURL::Crack(LPCTSTR url)
+{
+ m_URL = url;
+CString rest;
+ if(m_reSchema.Match(url)){
+ m_Schema = m_reSchema.GetMatch(1);
+ rest = m_reSchema.GetMatch(CRegEx::matchPostMatch);
+ }else{
+ m_Schema.Empty();
+ rest = url;
+ }
+ if(rest.Left(2)=="//"){
+ // URL conforms to common syntax
+ rest=rest.Mid(2);
+ ParseUPHP(rest);
+ }else if(!m_Schema.CompareNoCase("mailto")){
+ ParseUPHP(rest);
+ }else{
+ m_User.Empty(); m_Password.Empty(); m_Host.Empty(); m_Port.Empty();
+ }
+ if((!rest.IsEmpty()) && m_reHTTPReq.Match(rest)){
+ m_pathPath = m_reHTTPReq.GetMatch(1);
+ m_pathFile = m_reHTTPReq.GetMatch(2);
+ m_pathAnchor = m_reHTTPReq.GetMatch(3);
+ m_pathQuery = m_reHTTPReq.GetMatch(4);
+ m_URLPath = m_reHTTPReq.GetMatch(CRegEx::matchMatch);
+ rest = m_reHTTPReq.GetMatch(CRegEx::matchPostMatch);
+ }else{
+ m_URLPath.Empty();
+ m_pathPath.Empty();
+ m_pathFile.Empty();
+ m_pathAnchor.Empty();
+ m_pathQuery.Empty();
+ }
+ m_urlRest = rest;
+ return TRUE;
+}
+
+void CCrackURL::ParseUPHP(CString& rest)
+{
+ if(m_reUPHP.Match(rest)){
+ m_User = m_reUPHP.GetMatch(2);
+ m_Password = m_reUPHP.GetMatch(4);
+ m_Host = m_reUPHP.GetMatch(5);
+ m_Port = m_reUPHP.GetMatch(7);
+ if(!m_Port.IsEmpty())
+ m_nPort = atoi(m_Port);
+ rest = m_reUPHP.GetMatch(CRegEx::matchPostMatch);
+ }else{
+ m_User.Empty();
+ m_Password.Empty();
+ m_Host.Empty();
+ m_Port.Empty();
+ }
+}
+
+BOOL CCrackURL::Adjust(LPCTSTR srcURL)
+{
+CCrackURL cracked;
+ VERIFY(cracked.Crack(srcURL));
+ return Adjust(cracked);
+}
+
+BOOL CCrackURL::Adjust(CCrackURL& srcURL)
+{
+ if(m_Schema.IsEmpty()) m_Schema = srcURL.m_Schema;
+ if(m_Host.IsEmpty()){
+ m_Host = srcURL.m_Host;
+ if(m_User.IsEmpty()) m_User = srcURL.m_User;
+ if(m_Password.IsEmpty()) m_Password = srcURL.m_Password;
+ if(m_Port.IsEmpty()) m_Port = srcURL.m_Port, m_nPort = srcURL.m_nPort;
+ }
+ if(m_pathPath.Find('/')==0)
+ return TRUE;
+ m_pathPath = srcURL.m_pathPath+m_pathPath;
+ m_URLPath.Empty();
+ return TRUE;
+}
+
+CString CCrackURL::Build()
+{
+CString rv;
+ if(!m_Schema.IsEmpty())
+ rv+=m_Schema+':';
+ if(!m_Schema.CompareNoCase("mailto")){
+ rv+=m_User+'@'+m_Host;
+ }else{
+ rv+="//";
+ if(!m_User.IsEmpty()){
+ rv+=m_User;
+ if(!m_Password.IsEmpty())
+ rv+=':'+m_Password;
+ rv+='@';
+ }
+ rv+=m_Host;
+ if(!m_Port.IsEmpty())
+ rv+=':'+m_Port;
+ if(!m_URLPath.IsEmpty()){
+ rv+=m_URLPath;
+ }else{
+ rv+=m_pathPath+m_pathFile+m_pathAnchor+m_pathQuery;
+ }
+ }
+ return rv;
+}
+
+CString CCrackURL::InnermostURL(LPCTSTR url)
+{
+static CRegEx reInnerURL;
+ if(!reInnerURL.m_bCompiled)
+ VERIFY(reInnerURL.Compile(".+[^[:alpha:]]([[:alpha:]]+://[^&]+)",CRegEx::regExtended|CRegEx::regIgnoreCase));
+CString rv = url;
+ while(reInnerURL.Match(rv))
+ rv=reInnerURL.GetMatch(1);
+ return rv;
+}
+
+CString CCrackURL::GetHostName(LPCTSTR url)
+{
+static CRegEx reHost;
+ if(!reHost.m_bCompiled)
+ VERIFY(reHost.Compile("^[[:alpha:]]+://([^/:]+)",CRegEx::regExtended|CRegEx::regIgnoreCase));
+CString rv;
+ if(reHost.Match(url))
+ rv = reHost.GetMatch(1);
+ else
+ rv = url;
+ return rv;
+}