author | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
commit | daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (side-by-side diff) | |
tree | 7d929285bc296777c63d4f482c7bb07f8541bce2 /lib/util.cc | |
parent | 42e4fb613d190508b3e8b8993d233044eeea4d20 (diff) | |
download | libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2 |
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to
opkele::data namespace
* added opkele::util::normalize_identifier() function
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | lib/util.cc | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/lib/util.cc b/lib/util.cc index bb8a2e8..29e6738 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -14,4 +14,5 @@ #include <opkele/util.h> #include <opkele/exception.h> +#include <opkele/data.h> #include <opkele/debug.h> @@ -212,7 +213,6 @@ namespace opkele { */ string rfc_3986_normalize_uri(const string& uri) { - static const char *whitespace = " \t\r\n"; string rv; - string::size_type ns = uri.find_first_not_of(whitespace); + string::size_type ns = uri.find_first_not_of(data::_whitespace_chars); if(ns==string::npos) throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); @@ -224,5 +224,5 @@ namespace opkele { back_inserter(rv), ::tolower ); bool s; - string::size_type ul = uri.find_last_not_of(whitespace)+1; + string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1; if(ul <= (colon+3)) throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); @@ -431,4 +431,39 @@ namespace opkele { } + string normalize_identifier(const string& usi,bool strip_fragment) { + if(usi.empty()) + return usi; + string rv; + string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars); + if(fsc==string::npos) + return rv; + string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars); + assert(lsc!=string::npos); + if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1)) + fsc += sizeof("xri://")-1; + if( (fsc+1) >= lsc ) + return rv; + rv.assign(usi,fsc,lsc-fsc+1); + if(strchr(data::_iname_leaders,rv[0])) { + /* TODO: further normalize xri identity, fold case or + * whatever... */ + }else{ + if(rv.find("://")==string::npos) + rv.insert(0,"http://"); + if(strip_fragment) { + string::size_type fp = rv.find('#'); + if(fp!=string::npos) { + string::size_type qp = rv.find('?'); + if(qp==string::npos || qp<fp) + rv.erase(fp); + else if(qp>fp) + rv.erase(fp,qp-fp); + } + } + rv = rfc_3986_normalize_uri(rv); + } + return rv; + } + } |