author | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
commit | daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (unidiff) | |
tree | 7d929285bc296777c63d4f482c7bb07f8541bce2 | |
parent | 42e4fb613d190508b3e8b8993d233044eeea4d20 (diff) | |
download | libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2 |
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to
opkele::data namespace
* added opkele::util::normalize_identifier() function
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | include/opkele/data.h | 2 | ||||
-rw-r--r-- | include/opkele/util.h | 2 | ||||
-rw-r--r-- | lib/data.cc | 3 | ||||
-rw-r--r-- | lib/discovery.cc | 17 | ||||
-rw-r--r-- | lib/util.cc | 41 |
5 files changed, 53 insertions, 12 deletions
diff --git a/include/opkele/data.h b/include/opkele/data.h index d0b0516..904b5ae 100644 --- a/include/opkele/data.h +++ b/include/opkele/data.h | |||
@@ -11,8 +11,10 @@ namespace opkele { | |||
11 | */ | 11 | */ |
12 | namespace data { | 12 | namespace data { |
13 | extern const char *_default_p; | 13 | extern const char *_default_p; |
14 | extern const char *_default_g; | 14 | extern const char *_default_g; |
15 | extern const char *_iname_leaders; | ||
16 | extern const char *_whitespace_chars; | ||
15 | } | 17 | } |
16 | } | 18 | } |
17 | 19 | ||
18 | #endif /* __OPKELE_DATA_H */ | 20 | #endif /* __OPKELE_DATA_H */ |
diff --git a/include/opkele/util.h b/include/opkele/util.h index 60955e1..fd974a1 100644 --- a/include/opkele/util.h +++ b/include/opkele/util.h | |||
@@ -83,8 +83,10 @@ namespace opkele { | |||
83 | * @throw bad_input in case of malformed URI | 83 | * @throw bad_input in case of malformed URI |
84 | */ | 84 | */ |
85 | string rfc_3986_normalize_uri(const string& uri); | 85 | string rfc_3986_normalize_uri(const string& uri); |
86 | 86 | ||
87 | string normalize_identifier(const string& usi,bool strip_fragment); | ||
88 | |||
87 | /** | 89 | /** |
88 | * Match URI against realm | 90 | * Match URI against realm |
89 | * @param uri URI to match | 91 | * @param uri URI to match |
90 | * @param realm realm to match against | 92 | * @param realm realm to match against |
diff --git a/lib/data.cc b/lib/data.cc index c040430..f71788f 100644 --- a/lib/data.cc +++ b/lib/data.cc | |||
@@ -6,6 +6,9 @@ namespace opkele { | |||
6 | 6 | ||
7 | const char *_default_p = "155172898181473697471232257763715539915724801966915404479707795314057629378541917580651227423698188993727816152646631438561595825688188889951272158842675419950341258706556549803580104870537681476726513255747040765857479291291572334510643245094715007229621094194349783925984760375594985848253359305585439638443"; | 7 | const char *_default_p = "155172898181473697471232257763715539915724801966915404479707795314057629378541917580651227423698188993727816152646631438561595825688188889951272158842675419950341258706556549803580104870537681476726513255747040765857479291291572334510643245094715007229621094194349783925984760375594985848253359305585439638443"; |
8 | const char *_default_g = "2"; | 8 | const char *_default_g = "2"; |
9 | 9 | ||
10 | const char *_iname_leaders = "=@+$!("; | ||
11 | const char *_whitespace_chars = " \t\r\n"; | ||
12 | |||
10 | } | 13 | } |
11 | } | 14 | } |
diff --git a/lib/discovery.cc b/lib/discovery.cc index b7f2db6..5913ad4 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc | |||
@@ -5,8 +5,9 @@ | |||
5 | #include <opkele/discovery.h> | 5 | #include <opkele/discovery.h> |
6 | #include <opkele/exception.h> | 6 | #include <opkele/exception.h> |
7 | #include <opkele/util.h> | 7 | #include <opkele/util.h> |
8 | #include <opkele/tidy.h> | 8 | #include <opkele/tidy.h> |
9 | #include <opkele/data.h> | ||
9 | #include <opkele/debug.h> | 10 | #include <opkele/debug.h> |
10 | 11 | ||
11 | #include "config.h" | 12 | #include "config.h" |
12 | 13 | ||
@@ -20,10 +21,8 @@ namespace opkele { | |||
20 | 21 | ||
21 | /* TODO: the whole discovery thing needs cleanup and optimization due to | 22 | /* TODO: the whole discovery thing needs cleanup and optimization due to |
22 | * many changes of concept. */ | 23 | * many changes of concept. */ |
23 | 24 | ||
24 | static const char *whitespace = " \t\r\n"; | ||
25 | static const char *i_leaders = "=@+$!("; | ||
26 | static const size_t max_html = 16384; | 25 | static const size_t max_html = 16384; |
27 | 26 | ||
28 | static const struct service_type_t { | 27 | static const struct service_type_t { |
29 | const char *uri; | 28 | const char *uri; |
@@ -127,20 +126,20 @@ namespace opkele { | |||
127 | 126 | ||
128 | string discover(endpoint_discovery_iterator& oi,const string& identity) { | 127 | string discover(endpoint_discovery_iterator& oi,const string& identity) { |
129 | string rv; | 128 | string rv; |
130 | idiscovery_t idis; | 129 | idiscovery_t idis; |
131 | string::size_type fsc = identity.find_first_not_of(whitespace); | 130 | string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars); |
132 | if(fsc==string::npos) | 131 | if(fsc==string::npos) |
133 | throw bad_input(OPKELE_CP_ "whitespace-only identity"); | 132 | throw bad_input(OPKELE_CP_ "whitespace-only identity"); |
134 | string::size_type lsc = identity.find_last_not_of(whitespace); | 133 | string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars); |
135 | assert(lsc!=string::npos); | 134 | assert(lsc!=string::npos); |
136 | if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) | 135 | if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) |
137 | fsc += sizeof("xri://")-1; | 136 | fsc += sizeof("xri://")-1; |
138 | if((fsc+1)>=lsc) | 137 | if((fsc+1)>=lsc) |
139 | throw bad_input(OPKELE_CP_ "not a character of importance in identity"); | 138 | throw bad_input(OPKELE_CP_ "not a character of importance in identity"); |
140 | string id(identity,fsc,lsc-fsc+1); | 139 | string id(identity,fsc,lsc-fsc+1); |
141 | idis.clear(); | 140 | idis.clear(); |
142 | if(strchr(i_leaders,id[0])) { | 141 | if(strchr(data::_iname_leaders,id[0])) { |
143 | /* TODO: further normalize xri identity? Like folding case | 142 | /* TODO: further normalize xri identity? Like folding case |
144 | * or whatever... */ | 143 | * or whatever... */ |
145 | rv = id; | 144 | rv = id; |
146 | set<string> cids; | 145 | set<string> cids; |
@@ -494,15 +493,15 @@ namespace opkele { | |||
494 | }else if( !strcasecmp(a[0],"href") ) { | 493 | }else if( !strcasecmp(a[0],"href") ) { |
495 | const char *ns = a[1]; | 494 | const char *ns = a[1]; |
496 | for(;*ns && isspace(*ns);++ns); | 495 | for(;*ns && isspace(*ns);++ns); |
497 | href.assign(ns); | 496 | href.assign(ns); |
498 | string::size_type lns=href.find_last_not_of(whitespace); | 497 | string::size_type lns=href.find_last_not_of(data::_whitespace_chars); |
499 | href.erase(lns+1); | 498 | href.erase(lns+1); |
500 | } | 499 | } |
501 | } | 500 | } |
502 | for(string::size_type ns=rels.find_first_not_of(whitespace); | 501 | for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars); |
503 | ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { | 502 | ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) { |
504 | string::size_type s = rels.find_first_of(whitespace,ns); | 503 | string::size_type s = rels.find_first_of(data::_whitespace_chars,ns); |
505 | string rel; | 504 | string rel; |
506 | if(s==string::npos) { | 505 | if(s==string::npos) { |
507 | rel.assign(rels,ns,string::npos); | 506 | rel.assign(rels,ns,string::npos); |
508 | ns = string::npos; | 507 | ns = string::npos; |
diff --git a/lib/util.cc b/lib/util.cc index bb8a2e8..29e6738 100644 --- a/lib/util.cc +++ b/lib/util.cc | |||
@@ -12,8 +12,9 @@ | |||
12 | #include <openssl/hmac.h> | 12 | #include <openssl/hmac.h> |
13 | #include <curl/curl.h> | 13 | #include <curl/curl.h> |
14 | #include <opkele/util.h> | 14 | #include <opkele/util.h> |
15 | #include <opkele/exception.h> | 15 | #include <opkele/exception.h> |
16 | #include <opkele/data.h> | ||
16 | #include <opkele/debug.h> | 17 | #include <opkele/debug.h> |
17 | 18 | ||
18 | #include <config.h> | 19 | #include <config.h> |
19 | #ifdef HAVE_DEMANGLE | 20 | #ifdef HAVE_DEMANGLE |
@@ -210,11 +211,10 @@ namespace opkele { | |||
210 | * - remove empty and default ports | 211 | * - remove empty and default ports |
211 | * - if there's no path component, add '/' | 212 | * - if there's no path component, add '/' |
212 | */ | 213 | */ |
213 | string rfc_3986_normalize_uri(const string& uri) { | 214 | string rfc_3986_normalize_uri(const string& uri) { |
214 | static const char *whitespace = " \t\r\n"; | ||
215 | string rv; | 215 | string rv; |
216 | string::size_type ns = uri.find_first_not_of(whitespace); | 216 | string::size_type ns = uri.find_first_not_of(data::_whitespace_chars); |
217 | if(ns==string::npos) | 217 | if(ns==string::npos) |
218 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); | 218 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); |
219 | string::size_type colon = uri.find(':',ns); | 219 | string::size_type colon = uri.find(':',ns); |
220 | if(colon==string::npos) | 220 | if(colon==string::npos) |
@@ -222,9 +222,9 @@ namespace opkele { | |||
222 | transform( | 222 | transform( |
223 | uri.begin()+ns, uri.begin()+colon+1, | 223 | uri.begin()+ns, uri.begin()+colon+1, |
224 | back_inserter(rv), ::tolower ); | 224 | back_inserter(rv), ::tolower ); |
225 | bool s; | 225 | bool s; |
226 | string::size_type ul = uri.find_last_not_of(whitespace)+1; | 226 | string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1; |
227 | if(ul <= (colon+3)) | 227 | if(ul <= (colon+3)) |
228 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); | 228 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); |
229 | if(uri[colon+1]!='/' || uri[colon+2]!='/') | 229 | if(uri[colon+1]!='/' || uri[colon+2]!='/') |
230 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); | 230 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); |
@@ -429,7 +429,42 @@ namespace opkele { | |||
429 | md,&md_len); | 429 | md,&md_len); |
430 | return encode_base64(md,md_len); | 430 | return encode_base64(md,md_len); |
431 | } | 431 | } |
432 | 432 | ||
433 | string normalize_identifier(const string& usi,bool strip_fragment) { | ||
434 | if(usi.empty()) | ||
435 | return usi; | ||
436 | string rv; | ||
437 | string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars); | ||
438 | if(fsc==string::npos) | ||
439 | return rv; | ||
440 | string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars); | ||
441 | assert(lsc!=string::npos); | ||
442 | if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1)) | ||
443 | fsc += sizeof("xri://")-1; | ||
444 | if( (fsc+1) >= lsc ) | ||
445 | return rv; | ||
446 | rv.assign(usi,fsc,lsc-fsc+1); | ||
447 | if(strchr(data::_iname_leaders,rv[0])) { | ||
448 | /* TODO: further normalize xri identity, fold case or | ||
449 | * whatever... */ | ||
450 | }else{ | ||
451 | if(rv.find("://")==string::npos) | ||
452 | rv.insert(0,"http://"); | ||
453 | if(strip_fragment) { | ||
454 | string::size_type fp = rv.find('#'); | ||
455 | if(fp!=string::npos) { | ||
456 | string::size_type qp = rv.find('?'); | ||
457 | if(qp==string::npos || qp<fp) | ||
458 | rv.erase(fp); | ||
459 | else if(qp>fp) | ||
460 | rv.erase(fp,qp-fp); | ||
461 | } | ||
462 | } | ||
463 | rv = rfc_3986_normalize_uri(rv); | ||
464 | } | ||
465 | return rv; | ||
466 | } | ||
467 | |||
433 | } | 468 | } |
434 | 469 | ||
435 | } | 470 | } |