-rw-r--r-- | lib/util.cc | 55 |
1 files changed, 38 insertions, 17 deletions
diff --git a/lib/util.cc b/lib/util.cc index 416e2cc..a9b9bed 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -114,22 +114,32 @@ namespace opkele { } time_t w3c_to_time(const string& w) { + int fraction; struct tm tm_t; memset(&tm_t,0,sizeof(tm_t)); - if( + if( ( + sscanf( + w.c_str(), + "%04d-%02d-%02dT%02d:%02d:%02dZ", + &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, + &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec + ) != 6 + ) && ( sscanf( w.c_str(), - "%04d-%02d-%02dT%02d:%02d:%02dZ", + "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ", &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, - &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec - ) != 6 ) + &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec, + &fraction + ) != 7 + ) ) throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); tm_t.tm_mon--; tm_t.tm_year-=1900; time_t rv = mktime(&tm_t); if(rv==(time_t)-1) throw failed_conversion(OPKELE_CP_ "failed to mktime()"); - return rv; + return rv-timezone; } /* @@ -164,7 +174,7 @@ namespace opkele { /* * Normalize URL according to the rules, described in rfc 3986, section 6 * - * - uppercase hext triplets (e.g. %ab -> %AB) + * - uppercase hex triplets (e.g. %ab -> %AB) * - lowercase scheme and host * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, * that is - [:alpha:][:digit:]._~- @@ -173,30 +183,40 @@ namespace opkele { * - if there's no path component, add '/' */ string rfc_3986_normalize_uri(const string& uri) { + static const char *whitespace = " \t\r\n"; string rv; - string::size_type colon = uri.find(':'); + string::size_type ns = uri.find_first_not_of(whitespace); + if(ns==string::npos) + throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); + string::size_type colon = uri.find(':',ns); if(colon==string::npos) throw bad_input(OPKELE_CP_ "No scheme specified in URI"); transform( - uri.begin(), uri.begin()+colon+1, + uri.begin()+ns, uri.begin()+colon+1, back_inserter(rv), ::tolower ); bool s; - if(rv=="http:") - s = false; - else if(rv=="https:") - s = true; - else - throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); - string::size_type ul = uri.length(); + string::size_type ul = uri.find_last_not_of(whitespace)+1; if(ul <= (colon+3)) throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); if(uri[colon+1]!='/' || uri[colon+2]!='/') throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); + if(rv=="http:") + s = false; + else if(rv=="https:") + s = true; + else{ + /* TODO: support more schemes. + * e.g. xri. How do we normalize + * xri? + */ + rv.append(uri,colon+1,ul-colon-1); + return rv; + } rv += "//"; string::size_type interesting = uri.find_first_of(":/#?",colon+3); if(interesting==string::npos) { transform( - uri.begin()+colon+3,uri.end(), + uri.begin()+colon+3,uri.begin()+ul, back_inserter(rv), ::tolower ); rv += '/'; return rv; } @@ -285,7 +305,8 @@ namespace opkele { } } if(!pseg.empty()) { - rv += '/'; rv += pseg; + if(!qf) rv += '/'; + rv += pseg; } return rv; } |