summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-02-19 23:48:32 (UTC)
committer Michael Krelin <hacker@klever.net>2008-02-19 23:48:32 (UTC)
commitdaf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (unidiff)
tree7d929285bc296777c63d4f482c7bb07f8541bce2
parent42e4fb613d190508b3e8b8993d233044eeea4d20 (diff)
downloadlibopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip
libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz
libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to opkele::data namespace * added opkele::util::normalize_identifier() function Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--include/opkele/data.h2
-rw-r--r--include/opkele/util.h2
-rw-r--r--lib/data.cc3
-rw-r--r--lib/discovery.cc17
-rw-r--r--lib/util.cc41
5 files changed, 53 insertions, 12 deletions
diff --git a/include/opkele/data.h b/include/opkele/data.h
index d0b0516..904b5ae 100644
--- a/include/opkele/data.h
+++ b/include/opkele/data.h
@@ -14,2 +14,4 @@ namespace opkele {
14 extern const char *_default_g; 14 extern const char *_default_g;
15 extern const char *_iname_leaders;
16 extern const char *_whitespace_chars;
15 } 17 }
diff --git a/include/opkele/util.h b/include/opkele/util.h
index 60955e1..fd974a1 100644
--- a/include/opkele/util.h
+++ b/include/opkele/util.h
@@ -86,2 +86,4 @@ namespace opkele {
86 86
87 string normalize_identifier(const string& usi,bool strip_fragment);
88
87 /** 89 /**
diff --git a/lib/data.cc b/lib/data.cc
index c040430..f71788f 100644
--- a/lib/data.cc
+++ b/lib/data.cc
@@ -9,2 +9,5 @@ namespace opkele {
9 9
10 const char *_iname_leaders = "=@+$!(";
11 const char *_whitespace_chars = " \t\r\n";
12
10 } 13 }
diff --git a/lib/discovery.cc b/lib/discovery.cc
index b7f2db6..5913ad4 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -8,2 +8,3 @@
8#include <opkele/tidy.h> 8#include <opkele/tidy.h>
9#include <opkele/data.h>
9#include <opkele/debug.h> 10#include <opkele/debug.h>
@@ -23,4 +24,2 @@ namespace opkele {
23 24
24 static const char *whitespace = " \t\r\n";
25 static const char *i_leaders = "=@+$!(";
26 static const size_t max_html = 16384; 25 static const size_t max_html = 16384;
@@ -130,6 +129,6 @@ namespace opkele {
130 idiscovery_t idis; 129 idiscovery_t idis;
131 string::size_type fsc = identity.find_first_not_of(whitespace); 130 string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars);
132 if(fsc==string::npos) 131 if(fsc==string::npos)
133 throw bad_input(OPKELE_CP_ "whitespace-only identity"); 132 throw bad_input(OPKELE_CP_ "whitespace-only identity");
134 string::size_type lsc = identity.find_last_not_of(whitespace); 133 string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars);
135 assert(lsc!=string::npos); 134 assert(lsc!=string::npos);
@@ -141,3 +140,3 @@ namespace opkele {
141 idis.clear(); 140 idis.clear();
142 if(strchr(i_leaders,id[0])) { 141 if(strchr(data::_iname_leaders,id[0])) {
143 /* TODO: further normalize xri identity? Like folding case 142 /* TODO: further normalize xri identity? Like folding case
@@ -497,3 +496,3 @@ namespace opkele {
497 href.assign(ns); 496 href.assign(ns);
498 string::size_type lns=href.find_last_not_of(whitespace); 497 string::size_type lns=href.find_last_not_of(data::_whitespace_chars);
499 href.erase(lns+1); 498 href.erase(lns+1);
@@ -501,5 +500,5 @@ namespace opkele {
501 } 500 }
502 for(string::size_type ns=rels.find_first_not_of(whitespace); 501 for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars);
503 ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { 502 ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) {
504 string::size_type s = rels.find_first_of(whitespace,ns); 503 string::size_type s = rels.find_first_of(data::_whitespace_chars,ns);
505 string rel; 504 string rel;
diff --git a/lib/util.cc b/lib/util.cc
index bb8a2e8..29e6738 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -15,2 +15,3 @@
15#include <opkele/exception.h> 15#include <opkele/exception.h>
16#include <opkele/data.h>
16#include <opkele/debug.h> 17#include <opkele/debug.h>
@@ -213,5 +214,4 @@ namespace opkele {
213 string rfc_3986_normalize_uri(const string& uri) { 214 string rfc_3986_normalize_uri(const string& uri) {
214 static const char *whitespace = " \t\r\n";
215 string rv; 215 string rv;
216 string::size_type ns = uri.find_first_not_of(whitespace); 216 string::size_type ns = uri.find_first_not_of(data::_whitespace_chars);
217 if(ns==string::npos) 217 if(ns==string::npos)
@@ -225,3 +225,3 @@ namespace opkele {
225 bool s; 225 bool s;
226 string::size_type ul = uri.find_last_not_of(whitespace)+1; 226 string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1;
227 if(ul <= (colon+3)) 227 if(ul <= (colon+3))
@@ -432,2 +432,37 @@ namespace opkele {
432 432
433 string normalize_identifier(const string& usi,bool strip_fragment) {
434 if(usi.empty())
435 return usi;
436 string rv;
437 string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars);
438 if(fsc==string::npos)
439 return rv;
440 string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars);
441 assert(lsc!=string::npos);
442 if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1))
443 fsc += sizeof("xri://")-1;
444 if( (fsc+1) >= lsc )
445 return rv;
446 rv.assign(usi,fsc,lsc-fsc+1);
447 if(strchr(data::_iname_leaders,rv[0])) {
448 /* TODO: further normalize xri identity, fold case or
449 * whatever... */
450 }else{
451 if(rv.find("://")==string::npos)
452 rv.insert(0,"http://");
453 if(strip_fragment) {
454 string::size_type fp = rv.find('#');
455 if(fp!=string::npos) {
456 string::size_type qp = rv.find('?');
457 if(qp==string::npos || qp<fp)
458 rv.erase(fp);
459 else if(qp>fp)
460 rv.erase(fp,qp-fp);
461 }
462 }
463 rv = rfc_3986_normalize_uri(rv);
464 }
465 return rv;
466 }
467
433 } 468 }