author | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
commit | daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (side-by-side diff) | |
tree | 7d929285bc296777c63d4f482c7bb07f8541bce2 /lib/util.cc | |
parent | 42e4fb613d190508b3e8b8993d233044eeea4d20 (diff) | |
download | libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2 |
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to
opkele::data namespace
* added opkele::util::normalize_identifier() function
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | lib/util.cc | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/lib/util.cc b/lib/util.cc index bb8a2e8..29e6738 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -15,2 +15,3 @@ #include <opkele/exception.h> +#include <opkele/data.h> #include <opkele/debug.h> @@ -213,5 +214,4 @@ namespace opkele { string rfc_3986_normalize_uri(const string& uri) { - static const char *whitespace = " \t\r\n"; string rv; - string::size_type ns = uri.find_first_not_of(whitespace); + string::size_type ns = uri.find_first_not_of(data::_whitespace_chars); if(ns==string::npos) @@ -225,3 +225,3 @@ namespace opkele { bool s; - string::size_type ul = uri.find_last_not_of(whitespace)+1; + string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1; if(ul <= (colon+3)) @@ -432,2 +432,37 @@ namespace opkele { + string normalize_identifier(const string& usi,bool strip_fragment) { + if(usi.empty()) + return usi; + string rv; + string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars); + if(fsc==string::npos) + return rv; + string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars); + assert(lsc!=string::npos); + if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1)) + fsc += sizeof("xri://")-1; + if( (fsc+1) >= lsc ) + return rv; + rv.assign(usi,fsc,lsc-fsc+1); + if(strchr(data::_iname_leaders,rv[0])) { + /* TODO: further normalize xri identity, fold case or + * whatever... */ + }else{ + if(rv.find("://")==string::npos) + rv.insert(0,"http://"); + if(strip_fragment) { + string::size_type fp = rv.find('#'); + if(fp!=string::npos) { + string::size_type qp = rv.find('?'); + if(qp==string::npos || qp<fp) + rv.erase(fp); + else if(qp>fp) + rv.erase(fp,qp-fp); + } + } + rv = rfc_3986_normalize_uri(rv); + } + return rv; + } + } |