-rw-r--r-- | lib/util.cc | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/lib/util.cc b/lib/util.cc index ee75d29..a9b9bed 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -180,136 +180,137 @@ namespace opkele { * that is - [:alpha:][:digit:]._~- * - remove dot segments * - remove empty and default ports * - if there's no path component, add '/' */ string rfc_3986_normalize_uri(const string& uri) { static const char *whitespace = " \t\r\n"; string rv; string::size_type ns = uri.find_first_not_of(whitespace); if(ns==string::npos) throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); string::size_type colon = uri.find(':',ns); if(colon==string::npos) throw bad_input(OPKELE_CP_ "No scheme specified in URI"); transform( uri.begin()+ns, uri.begin()+colon+1, back_inserter(rv), ::tolower ); bool s; string::size_type ul = uri.find_last_not_of(whitespace)+1; if(ul <= (colon+3)) throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); if(uri[colon+1]!='/' || uri[colon+2]!='/') throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); if(rv=="http:") s = false; else if(rv=="https:") s = true; else{ /* TODO: support more schemes. * e.g. xri. How do we normalize * xri? */ rv.append(uri,colon+1,ul-colon-1); return rv; } rv += "//"; string::size_type interesting = uri.find_first_of(":/#?",colon+3); if(interesting==string::npos) { transform( uri.begin()+colon+3,uri.begin()+ul, back_inserter(rv), ::tolower ); rv += '/'; return rv; } transform( uri.begin()+colon+3,uri.begin()+interesting, back_inserter(rv), ::tolower ); bool qf = false; char ic = uri[interesting]; if(ic==':') { string::size_type ni = uri.find_first_of("/#?%",interesting+1); const char *nptr = uri.data()+interesting+1; char *eptr = 0; long port = strtol(nptr,&eptr,10); if( (port>0) && (port<65535) && port!=(s?443:80) ) { char tmp[8]; snprintf(tmp,sizeof(tmp),":%ld",port); rv += tmp; } if(ni==string::npos) { rv += '/'; return rv; } interesting = ni; }else if(ic!='/') { rv += '/'; rv += ic; qf = true; ++interesting; } string::size_type n = interesting; char tmp[3] = { 0,0,0 }; stack<string::size_type> psegs; psegs.push(rv.length()); string pseg; for(;n<ul;) { string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n); if(unsafe==string::npos) { pseg.append(uri,n,ul-n-1); n = ul-1; }else{ pseg.append(uri,n,unsafe-n); n = unsafe; } char c = uri[n++]; if(c=='%') { if((n+1)>=ul) throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character"); tmp[0] = uri[n++]; tmp[1] = uri[n++]; if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) )) throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized"); int cc = strtol(tmp,0,16); if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) ) pseg += cc; else{ pseg += '%'; pseg += toupper(tmp[0]); pseg += toupper(tmp[1]); } }else if(qf) { rv += pseg; rv += c; pseg.clear(); }else if(n>=ul || strchr("?/#",c)) { if(pseg.empty() || pseg==".") { }else if(pseg=="..") { if(psegs.size()>1) { rv.resize(psegs.top()); psegs.pop(); } }else{ psegs.push(rv.length()); if(c!='/') { pseg += c; qf = true; } rv += '/'; rv += pseg; } if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) { rv += '/'; if(n<ul) qf = true; }else if(strchr("?#",c)) { if(psegs.size()==1 && psegs.top()==rv.length()) rv += '/'; if(pseg.empty()) rv += c; qf = true; } pseg.clear(); }else{ pseg += c; } } if(!pseg.empty()) { - rv += '/'; rv += pseg; + if(!qf) rv += '/'; + rv += pseg; } return rv; } } } |