summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-02-19 23:48:32 (UTC)
committer Michael Krelin <hacker@klever.net>2008-02-19 23:48:32 (UTC)
commitdaf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (side-by-side diff)
tree7d929285bc296777c63d4f482c7bb07f8541bce2
parent42e4fb613d190508b3e8b8993d233044eeea4d20 (diff)
downloadlibopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip
libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz
libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to opkele::data namespace * added opkele::util::normalize_identifier() function Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--include/opkele/data.h2
-rw-r--r--include/opkele/util.h2
-rw-r--r--lib/data.cc3
-rw-r--r--lib/discovery.cc17
-rw-r--r--lib/util.cc41
5 files changed, 53 insertions, 12 deletions
diff --git a/include/opkele/data.h b/include/opkele/data.h
index d0b0516..904b5ae 100644
--- a/include/opkele/data.h
+++ b/include/opkele/data.h
@@ -11,8 +11,10 @@ namespace opkele {
*/
namespace data {
extern const char *_default_p;
extern const char *_default_g;
+ extern const char *_iname_leaders;
+ extern const char *_whitespace_chars;
}
}
#endif /* __OPKELE_DATA_H */
diff --git a/include/opkele/util.h b/include/opkele/util.h
index 60955e1..fd974a1 100644
--- a/include/opkele/util.h
+++ b/include/opkele/util.h
@@ -83,8 +83,10 @@ namespace opkele {
* @throw bad_input in case of malformed URI
*/
string rfc_3986_normalize_uri(const string& uri);
+ string normalize_identifier(const string& usi,bool strip_fragment);
+
/**
* Match URI against realm
* @param uri URI to match
* @param realm realm to match against
diff --git a/lib/data.cc b/lib/data.cc
index c040430..f71788f 100644
--- a/lib/data.cc
+++ b/lib/data.cc
@@ -6,6 +6,9 @@ namespace opkele {
const char *_default_p = "155172898181473697471232257763715539915724801966915404479707795314057629378541917580651227423698188993727816152646631438561595825688188889951272158842675419950341258706556549803580104870537681476726513255747040765857479291291572334510643245094715007229621094194349783925984760375594985848253359305585439638443";
const char *_default_g = "2";
+ const char *_iname_leaders = "=@+$!(";
+ const char *_whitespace_chars = " \t\r\n";
+
}
}
diff --git a/lib/discovery.cc b/lib/discovery.cc
index b7f2db6..5913ad4 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -5,8 +5,9 @@
#include <opkele/discovery.h>
#include <opkele/exception.h>
#include <opkele/util.h>
#include <opkele/tidy.h>
+#include <opkele/data.h>
#include <opkele/debug.h>
#include "config.h"
@@ -20,10 +21,8 @@ namespace opkele {
/* TODO: the whole discovery thing needs cleanup and optimization due to
* many changes of concept. */
- static const char *whitespace = " \t\r\n";
- static const char *i_leaders = "=@+$!(";
static const size_t max_html = 16384;
static const struct service_type_t {
const char *uri;
@@ -127,20 +126,20 @@ namespace opkele {
string discover(endpoint_discovery_iterator& oi,const string& identity) {
string rv;
idiscovery_t idis;
- string::size_type fsc = identity.find_first_not_of(whitespace);
+ string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars);
if(fsc==string::npos)
throw bad_input(OPKELE_CP_ "whitespace-only identity");
- string::size_type lsc = identity.find_last_not_of(whitespace);
+ string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars);
assert(lsc!=string::npos);
if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
fsc += sizeof("xri://")-1;
if((fsc+1)>=lsc)
throw bad_input(OPKELE_CP_ "not a character of importance in identity");
string id(identity,fsc,lsc-fsc+1);
idis.clear();
- if(strchr(i_leaders,id[0])) {
+ if(strchr(data::_iname_leaders,id[0])) {
/* TODO: further normalize xri identity? Like folding case
* or whatever... */
rv = id;
set<string> cids;
@@ -494,15 +493,15 @@ namespace opkele {
}else if( !strcasecmp(a[0],"href") ) {
const char *ns = a[1];
for(;*ns && isspace(*ns);++ns);
href.assign(ns);
- string::size_type lns=href.find_last_not_of(whitespace);
+ string::size_type lns=href.find_last_not_of(data::_whitespace_chars);
href.erase(lns+1);
}
}
- for(string::size_type ns=rels.find_first_not_of(whitespace);
- ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) {
- string::size_type s = rels.find_first_of(whitespace,ns);
+ for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars);
+ ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) {
+ string::size_type s = rels.find_first_of(data::_whitespace_chars,ns);
string rel;
if(s==string::npos) {
rel.assign(rels,ns,string::npos);
ns = string::npos;
diff --git a/lib/util.cc b/lib/util.cc
index bb8a2e8..29e6738 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -12,8 +12,9 @@
#include <openssl/hmac.h>
#include <curl/curl.h>
#include <opkele/util.h>
#include <opkele/exception.h>
+#include <opkele/data.h>
#include <opkele/debug.h>
#include <config.h>
#ifdef HAVE_DEMANGLE
@@ -210,11 +211,10 @@ namespace opkele {
* - remove empty and default ports
* - if there's no path component, add '/'
*/
string rfc_3986_normalize_uri(const string& uri) {
- static const char *whitespace = " \t\r\n";
string rv;
- string::size_type ns = uri.find_first_not_of(whitespace);
+ string::size_type ns = uri.find_first_not_of(data::_whitespace_chars);
if(ns==string::npos)
throw bad_input(OPKELE_CP_ "Can't normalize empty URI");
string::size_type colon = uri.find(':',ns);
if(colon==string::npos)
@@ -222,9 +222,9 @@ namespace opkele {
transform(
uri.begin()+ns, uri.begin()+colon+1,
back_inserter(rv), ::tolower );
bool s;
- string::size_type ul = uri.find_last_not_of(whitespace)+1;
+ string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1;
if(ul <= (colon+3))
throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered");
if(uri[colon+1]!='/' || uri[colon+2]!='/')
throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component");
@@ -429,7 +429,42 @@ namespace opkele {
md,&md_len);
return encode_base64(md,md_len);
}
+ string normalize_identifier(const string& usi,bool strip_fragment) {
+ if(usi.empty())
+ return usi;
+ string rv;
+ string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars);
+ if(fsc==string::npos)
+ return rv;
+ string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars);
+ assert(lsc!=string::npos);
+ if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1))
+ fsc += sizeof("xri://")-1;
+ if( (fsc+1) >= lsc )
+ return rv;
+ rv.assign(usi,fsc,lsc-fsc+1);
+ if(strchr(data::_iname_leaders,rv[0])) {
+ /* TODO: further normalize xri identity, fold case or
+ * whatever... */
+ }else{
+ if(rv.find("://")==string::npos)
+ rv.insert(0,"http://");
+ if(strip_fragment) {
+ string::size_type fp = rv.find('#');
+ if(fp!=string::npos) {
+ string::size_type qp = rv.find('?');
+ if(qp==string::npos || qp<fp)
+ rv.erase(fp);
+ else if(qp>fp)
+ rv.erase(fp,qp-fp);
+ }
+ }
+ rv = rfc_3986_normalize_uri(rv);
+ }
+ return rv;
+ }
+
}
}