-rw-r--r-- | lib/util.cc | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/lib/util.cc b/lib/util.cc index bb8a2e8..29e6738 100644 --- a/lib/util.cc +++ b/lib/util.cc | |||
@@ -4,24 +4,25 @@ | |||
4 | #include <cstring> | 4 | #include <cstring> |
5 | #include <vector> | 5 | #include <vector> |
6 | #include <string> | 6 | #include <string> |
7 | #include <stack> | 7 | #include <stack> |
8 | #include <algorithm> | 8 | #include <algorithm> |
9 | #include <openssl/bio.h> | 9 | #include <openssl/bio.h> |
10 | #include <openssl/evp.h> | 10 | #include <openssl/evp.h> |
11 | #include <openssl/sha.h> | 11 | #include <openssl/sha.h> |
12 | #include <openssl/hmac.h> | 12 | #include <openssl/hmac.h> |
13 | #include <curl/curl.h> | 13 | #include <curl/curl.h> |
14 | #include <opkele/util.h> | 14 | #include <opkele/util.h> |
15 | #include <opkele/exception.h> | 15 | #include <opkele/exception.h> |
16 | #include <opkele/data.h> | ||
16 | #include <opkele/debug.h> | 17 | #include <opkele/debug.h> |
17 | 18 | ||
18 | #include <config.h> | 19 | #include <config.h> |
19 | #ifdef HAVE_DEMANGLE | 20 | #ifdef HAVE_DEMANGLE |
20 | # include <cxxabi.h> | 21 | # include <cxxabi.h> |
21 | #endif | 22 | #endif |
22 | 23 | ||
23 | namespace opkele { | 24 | namespace opkele { |
24 | using namespace std; | 25 | using namespace std; |
25 | 26 | ||
26 | namespace util { | 27 | namespace util { |
27 | 28 | ||
@@ -202,37 +203,36 @@ namespace opkele { | |||
202 | /* | 203 | /* |
203 | * Normalize URL according to the rules, described in rfc 3986, section 6 | 204 | * Normalize URL according to the rules, described in rfc 3986, section 6 |
204 | * | 205 | * |
205 | * - uppercase hex triplets (e.g. %ab -> %AB) | 206 | * - uppercase hex triplets (e.g. %ab -> %AB) |
206 | * - lowercase scheme and host | 207 | * - lowercase scheme and host |
207 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, | 208 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, |
208 | * that is - [:alpha:][:digit:]._~- | 209 | * that is - [:alpha:][:digit:]._~- |
209 | * - remove dot segments | 210 | * - remove dot segments |
210 | * - remove empty and default ports | 211 | * - remove empty and default ports |
211 | * - if there's no path component, add '/' | 212 | * - if there's no path component, add '/' |
212 | */ | 213 | */ |
213 | string rfc_3986_normalize_uri(const string& uri) { | 214 | string rfc_3986_normalize_uri(const string& uri) { |
214 | static const char *whitespace = " \t\r\n"; | ||
215 | string rv; | 215 | string rv; |
216 | string::size_type ns = uri.find_first_not_of(whitespace); | 216 | string::size_type ns = uri.find_first_not_of(data::_whitespace_chars); |
217 | if(ns==string::npos) | 217 | if(ns==string::npos) |
218 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); | 218 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); |
219 | string::size_type colon = uri.find(':',ns); | 219 | string::size_type colon = uri.find(':',ns); |
220 | if(colon==string::npos) | 220 | if(colon==string::npos) |
221 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); | 221 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); |
222 | transform( | 222 | transform( |
223 | uri.begin()+ns, uri.begin()+colon+1, | 223 | uri.begin()+ns, uri.begin()+colon+1, |
224 | back_inserter(rv), ::tolower ); | 224 | back_inserter(rv), ::tolower ); |
225 | bool s; | 225 | bool s; |
226 | string::size_type ul = uri.find_last_not_of(whitespace)+1; | 226 | string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1; |
227 | if(ul <= (colon+3)) | 227 | if(ul <= (colon+3)) |
228 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); | 228 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); |
229 | if(uri[colon+1]!='/' || uri[colon+2]!='/') | 229 | if(uri[colon+1]!='/' || uri[colon+2]!='/') |
230 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); | 230 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); |
231 | if(rv=="http:") | 231 | if(rv=="http:") |
232 | s = false; | 232 | s = false; |
233 | else if(rv=="https:") | 233 | else if(rv=="https:") |
234 | s = true; | 234 | s = true; |
235 | else{ | 235 | else{ |
236 | /* TODO: support more schemes. e.g. xri. How do we normalize | 236 | /* TODO: support more schemes. e.g. xri. How do we normalize |
237 | * xri? | 237 | * xri? |
238 | */ | 238 | */ |
@@ -421,15 +421,50 @@ namespace opkele { | |||
421 | evpmd = EVP_sha1(); | 421 | evpmd = EVP_sha1(); |
422 | else | 422 | else |
423 | throw unsupported(OPKELE_CP_ "unknown association type"); | 423 | throw unsupported(OPKELE_CP_ "unknown association type"); |
424 | unsigned int md_len = 0; | 424 | unsigned int md_len = 0; |
425 | unsigned char md[SHA256_DIGEST_LENGTH]; | 425 | unsigned char md[SHA256_DIGEST_LENGTH]; |
426 | HMAC(evpmd, | 426 | HMAC(evpmd, |
427 | &(secret.front()),secret.size(), | 427 | &(secret.front()),secret.size(), |
428 | (const unsigned char*)kv.data(),kv.length(), | 428 | (const unsigned char*)kv.data(),kv.length(), |
429 | md,&md_len); | 429 | md,&md_len); |
430 | return encode_base64(md,md_len); | 430 | return encode_base64(md,md_len); |
431 | } | 431 | } |
432 | 432 | ||
433 | string normalize_identifier(const string& usi,bool strip_fragment) { | ||
434 | if(usi.empty()) | ||
435 | return usi; | ||
436 | string rv; | ||
437 | string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars); | ||
438 | if(fsc==string::npos) | ||
439 | return rv; | ||
440 | string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars); | ||
441 | assert(lsc!=string::npos); | ||
442 | if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1)) | ||
443 | fsc += sizeof("xri://")-1; | ||
444 | if( (fsc+1) >= lsc ) | ||
445 | return rv; | ||
446 | rv.assign(usi,fsc,lsc-fsc+1); | ||
447 | if(strchr(data::_iname_leaders,rv[0])) { | ||
448 | /* TODO: further normalize xri identity, fold case or | ||
449 | * whatever... */ | ||
450 | }else{ | ||
451 | if(rv.find("://")==string::npos) | ||
452 | rv.insert(0,"http://"); | ||
453 | if(strip_fragment) { | ||
454 | string::size_type fp = rv.find('#'); | ||
455 | if(fp!=string::npos) { | ||
456 | string::size_type qp = rv.find('?'); | ||
457 | if(qp==string::npos || qp<fp) | ||
458 | rv.erase(fp); | ||
459 | else if(qp>fp) | ||
460 | rv.erase(fp,qp-fp); | ||
461 | } | ||
462 | } | ||
463 | rv = rfc_3986_normalize_uri(rv); | ||
464 | } | ||
465 | return rv; | ||
466 | } | ||
467 | |||
433 | } | 468 | } |
434 | 469 | ||
435 | } | 470 | } |