author | Michael Krelin <hacker@klever.net> | 2007-11-23 22:18:54 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2007-11-24 07:09:40 (UTC) |
commit | 70f85314fcd188a182aae3a4291c0cd95ba16ee2 (patch) (side-by-side diff) | |
tree | cad4bf188542d4319280a3c30757e72ec2d20bf6 | |
parent | 2589c69c4a909563098365fba141082db4657353 (diff) | |
download | libopkele-70f85314fcd188a182aae3a4291c0cd95ba16ee2.zip libopkele-70f85314fcd188a182aae3a4291c0cd95ba16ee2.tar.gz libopkele-70f85314fcd188a182aae3a4291c0cd95ba16ee2.tar.bz2 |
added URI normalization procedure to opkele::util
as specified in RFC3896, section 6
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | include/opkele/util.h | 10 | ||||
-rw-r--r-- | lib/util.cc | 131 | ||||
-rw-r--r-- | test/test.cc | 67 |
3 files changed, 208 insertions, 0 deletions
diff --git a/include/opkele/util.h b/include/opkele/util.h index edc1859..085c9e6 100644 --- a/include/opkele/util.h +++ b/include/opkele/util.h @@ -1,133 +1,143 @@ #ifndef __OPKELE_UTIL_H #define __OPKELE_UTIL_H #include <time.h> #include <string> #include <vector> #include <openssl/bn.h> #include <openssl/dh.h> namespace opkele { using std::string; using std::vector; /** * @brief opkele utils namespace */ namespace util { /** * Convenience class encapsulating SSL BIGNUM object for the purpose of * automatical freeing. */ class bignum_t { public: BIGNUM *_bn; bignum_t() : _bn(0) { } bignum_t(BIGNUM *bn) : _bn(bn) { } ~bignum_t() throw() { if(_bn) BN_free(_bn); } bignum_t& operator=(BIGNUM *bn) { if(_bn) BN_free(_bn); _bn = bn; return *this; } operator const BIGNUM*(void) const { return _bn; } operator BIGNUM*(void) { return _bn; } }; /** * Convenience clas encapsulating SSL DH object for the purpose of * automatic freeing. */ class dh_t { public: DH *_dh; dh_t() : _dh(0) { } dh_t(DH *dh) : _dh(dh) { } ~dh_t() throw() { if(_dh) DH_free(_dh); } dh_t& operator=(DH *dh) { if(_dh) DH_free(_dh); _dh = dh; return *this; } operator const DH*(void) const { return _dh; } operator DH*(void) { return _dh; } DH* operator->() { return _dh; } const DH* operator->() const { return _dh; } }; /** * Convert base64-encoded SSL BIGNUM to internal representation. * @param b64 base64-encoded number * @return SSL BIGNUM * @throw failed_conversion in case of error */ BIGNUM *base64_to_bignum(const string& b64); /** * Convert decimal representation to SSL BIGNUM. * @param dec decimal representation * @return resulting BIGNUM * @throw failed_conversion in case of error */ BIGNUM *dec_to_bignum(const string& dec); /** * Convert SSL BIGNUM data to base64 encoded string. * @param bn BIGNUM * @return base64encoded string */ string bignum_to_base64(const BIGNUM *bn); /** * Convert internal time representation to w3c format * @param t internal representation * @return w3c time * @throw failed_conversion in case of error */ string time_to_w3c(time_t t); /** * Convert W3C time representation to internal time_t * @param w w3c representation * @return converted time * @throw failed_conversion in case of error */ time_t w3c_to_time(const string& w); /** * Encode string to the representation suitable for using in URL. * @param str string to encode * @return encoded string * @throw failed_conversion in case of failure */ string url_encode(const string& str); /** * Convert number to string * @param l number * @return string representation * @throw failed_conversion in case of failure */ string long_to_string(long l); /** * Convert string to number * @param s string, containing the number * @return the number * @throw failed_conversion in case of failure */ long string_to_long(const string& s); /** * Encode binary data using base64. * @param data pointer to binary data * @param length length of data * @return encoded data */ string encode_base64(const void *data,size_t length); /** * Decode binary data from base64 representation. * @param data base64-encoded data * @param rv container for decoded binary */ void decode_base64(const string& data,vector<unsigned char>& rv); + + /** + * Normalize http(s) URI according to RFC3986, section 6. URI is + * expected to have scheme: in front of it. + * @param uri URI + * @return normalized URI + * @throw not_implemented in case of non-httpi(s) URI + * @throw bad_input in case of malformed URI + */ + string rfc_3986_normalize_uri(const string& uri); } } #endif /* __OPKELE_UTIL_H */ diff --git a/lib/util.cc b/lib/util.cc index 26be66a..eacf6d7 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -1,164 +1,295 @@ #include <errno.h> #include <cassert> +#include <cctype> #include <cstring> #include <vector> #include <string> +#include <stack> #include <openssl/bio.h> #include <openssl/evp.h> #include <curl/curl.h> #include "opkele/util.h" #include "opkele/exception.h" namespace opkele { using namespace std; namespace util { /* * base64 */ string encode_base64(const void *data,size_t length) { BIO *b64 = 0, *bmem = 0; try { b64 = BIO_new(BIO_f_base64()); if(!b64) throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); bmem = BIO_new(BIO_s_mem()); BIO_set_flags(b64,BIO_CLOSE); if(!bmem) throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); BIO_push(b64,bmem); if(((size_t)BIO_write(b64,data,length))!=length) throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); if(BIO_flush(b64)!=1) throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); char *rvd; long rvl = BIO_get_mem_data(bmem,&rvd); string rv(rvd,rvl); BIO_free_all(b64); return rv; }catch(...) { if(b64) BIO_free_all(b64); throw; } } void decode_base64(const string& data,vector<unsigned char>& rv) { BIO *b64 = 0, *bmem = 0; rv.clear(); try { bmem = BIO_new_mem_buf((void*)data.data(),data.size()); if(!bmem) throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()"); b64 = BIO_new(BIO_f_base64()); if(!b64) throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder"); BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); BIO_push(b64,bmem); unsigned char tmp[512]; size_t rb = 0; while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0) rv.insert(rv.end(),tmp,&tmp[rb]); BIO_free_all(b64); }catch(...) { if(b64) BIO_free_all(b64); throw; } } /* * big numerics */ BIGNUM *base64_to_bignum(const string& b64) { vector<unsigned char> bin; decode_base64(b64,bin); BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0); if(!rv) throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()"); return rv; } BIGNUM *dec_to_bignum(const string& dec) { BIGNUM *rv = 0; if(!BN_dec2bn(&rv,dec.c_str())) throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); return rv; } string bignum_to_base64(const BIGNUM *bn) { vector<unsigned char> bin(BN_num_bytes(bn)+1); unsigned char *binptr = &(bin.front())+1; int l = BN_bn2bin(bn,binptr); if(l && (*binptr)&0x80){ (*(--binptr)) = 0; ++l; } return encode_base64(binptr,l); } /* * w3c times */ string time_to_w3c(time_t t) { struct tm tm_t; if(!gmtime_r(&t,&tm_t)) throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); char rv[25]; if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t)) throw failed_conversion(OPKELE_CP_ "failed to strftime()"); return rv; } time_t w3c_to_time(const string& w) { struct tm tm_t; memset(&tm_t,0,sizeof(tm_t)); if( sscanf( w.c_str(), "%04d-%02d-%02dT%02d:%02d:%02dZ", &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec ) != 6 ) throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); tm_t.tm_mon--; tm_t.tm_year-=1900; time_t rv = mktime(&tm_t); if(rv==(time_t)-1) throw failed_conversion(OPKELE_CP_ "failed to mktime()"); return rv; } /* * */ string url_encode(const string& str) { char * t = curl_escape(str.c_str(),str.length()); if(!t) throw failed_conversion(OPKELE_CP_ "failed to curl_escape()"); string rv(t); curl_free(t); return rv; } string long_to_string(long l) { char rv[32]; int r=snprintf(rv,sizeof(rv),"%ld",l); if(r<0 || r>=(int)sizeof(rv)) throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); return rv; } long string_to_long(const string& s) { char *endptr = 0; long rv = strtol(s.c_str(),&endptr,10); if((!endptr) || endptr==s.c_str()) throw failed_conversion(OPKELE_CP_ "failed to strtol()"); return rv; } + /* + * Normalize URL according to the rules, described in rfc 3986, section 6 + * + * - uppercase hext triplets (e.g. %ab -> %AB) + * - lowercase scheme and host + * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, + * that is - [:alpha:][:digit:]._~- + * - remove dot segments + * - remove empty and default ports + * - if there's no path component, add '/' + */ + string rfc_3986_normalize_uri(const string& uri) { + string rv; + string::size_type colon = uri.find(':'); + if(colon==string::npos) + throw bad_input(OPKELE_CP_ "No scheme specified in URI"); + transform( + uri.begin(), uri.begin()+colon+1, + back_inserter(rv), ::tolower ); + bool s; + if(rv=="http:") + s = false; + else if(rv=="https:") + s = true; + else + throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); + string::size_type ul = uri.length(); + if(ul <= (colon+3)) + throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); + if(uri[colon+1]!='/' || uri[colon+2]!='/') + throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); + rv += "//"; + string::size_type interesting = uri.find_first_of(":/#?",colon+3); + if(interesting==string::npos) { + transform( + uri.begin()+colon+3,uri.end(), + back_inserter(rv), ::tolower ); + rv += '/'; return rv; + } + transform( + uri.begin()+colon+3,uri.begin()+interesting, + back_inserter(rv), ::tolower ); + bool qf = false; + char ic = uri[interesting]; + if(ic==':') { + string::size_type ni = uri.find_first_of("/#?%",interesting+1); + const char *nptr = uri.data()+interesting+1; + char *eptr = 0; + long port = strtol(nptr,&eptr,10); + if( (port>0) && (port<65535) && port!=(s?443:80) ) { + char tmp[6]; + snprintf(tmp,sizeof(tmp),"%d",port); + rv += ':'; rv += tmp; + } + if(ni==string::npos) { + rv += '/'; return rv; + } + interesting = ni; + }else if(ic!='/') { + rv += '/'; rv += ic; + qf = true; + ++interesting; + } + string::size_type n = interesting; + char tmp[3] = { 0,0,0 }; + stack<string::size_type> psegs; psegs.push(rv.length()); + string pseg; + for(;n<ul;) { + string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n); + if(unsafe==string::npos) { + pseg.append(uri,n,ul-n-1); n = ul-1; + }else{ + pseg.append(uri,n,unsafe-n); + n = unsafe; + } + char c = uri[n++]; + if(c=='%') { + if((n+1)>=ul) + throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character"); + tmp[0] = uri[n++]; + tmp[1] = uri[n++]; + if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) )) + throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized"); + int cc = strtol(tmp,0,16); + if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) ) + pseg += cc; + else{ + pseg += '%'; + pseg += toupper(tmp[0]); pseg += toupper(tmp[1]); + } + }else if(qf) { + rv += pseg; rv += c; + pseg.clear(); + }else if(n>=ul || strchr("?/#",c)) { + if(pseg.empty() || pseg==".") { + }else if(pseg=="..") { + if(psegs.size()>1) { + rv.resize(psegs.top()); psegs.pop(); + } + }else{ + psegs.push(rv.length()); + if(c!='/') { + pseg += c; + qf = true; + } + rv += '/'; rv += pseg; + } + if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) { + rv += '/'; + if(n<ul) + qf = true; + }else if(strchr("?#",c)) { + if(psegs.size()==1 && psegs.top()==rv.length()) + rv += '/'; + if(pseg.empty()) + rv += c; + qf = true; + } + pseg.clear(); + }else{ + pseg += c; + } + } + if(!pseg.empty()) { + rv += '/'; rv += pseg; + } + return rv; + } + } } diff --git a/test/test.cc b/test/test.cc index f92284c..1a012b5 100644 --- a/test/test.cc +++ b/test/test.cc @@ -1,72 +1,139 @@ #include <iostream> #include <stdexcept> using namespace std; #include <opkele/exception.h> #include <opkele/consumer.h> +#include <opkele/util.h> #include "config.h" class failed_test : public opkele::exception { public: failed_test(OPKELE_E_PARS) : exception(OPKELE_E_CONS) { } }; class dummy_consumer_t : public opkele::consumer_t { public: virtual opkele::assoc_t store_assoc(const string& /* server */,const string& /* handle */,const opkele::secret_t& /* secret */,int /* expires_in */) { throw opkele::not_implemented(OPKELE_CP_ "Not implemented"); } virtual opkele::assoc_t retrieve_assoc(const string& /* server */ ,const string& /* handle */) { throw opkele::not_implemented(OPKELE_CP_ "Not implemented"); } virtual void invalidate_assoc(const string& /* server */,const string& /* handle */) { throw opkele::not_implemented(OPKELE_CP_ "Not implemented"); } }; void test_retrieve_links(const string& f,bool success,const string& s="",const string& d="") { dummy_consumer_t dc; string server, delegate; try { dc.retrieve_links("file://" OPKELE_SRC_DIR "/test/html/"+f,server,delegate); if(!success) throw failed_test(OPKELE_CP_ "Retrieved links when it shouldn't"); if(server!=s) throw failed_test(OPKELE_CP_ "retrieve_links test failed, expected server '"+s+"', got '"+server+"'"); if(delegate!=d) throw failed_test(OPKELE_CP_ "retrieve_links test failed, expected delegate '"+d+"', got '"+delegate+"'"); }catch(opkele::bad_input& obi) { if(success) throw failed_test(OPKELE_CP_ "Test '"+f+"' failed due to 'bad_input'["+obi.what()+"]"); }catch(opkele::failed_assertion& ofa) { if(success) throw failed_test(OPKELE_CP_ "Test '"+f+"' failed due to 'failed_assertion'["+ofa.what()+"]"); } } void test_retrieve_links() { test_retrieve_links("empty.html",false); test_retrieve_links("in-body.html",false); test_retrieve_links("head-in-body.html",false); test_retrieve_links("hkn.html",true,"http://www.klever.net/openid.server","http://hacker.klever.net/"); test_retrieve_links("hkn-server.html",true,"http://www.klever.net/openid.server"); test_retrieve_links("hkn-delegate.html",false); test_retrieve_links("unclosed-head.html",true,"http://www.klever.net/openid.server","http://hacker.klever.net/"); test_retrieve_links("spaced-links.html",true,"http://www.klever.net/openid.server","http://hacker.klever.net/"); test_retrieve_links("spaced-link-attrs.html",true,"http://www.klever.net/openid.server","http://hacker.klever.net/"); test_retrieve_links("2rels.html",true,"http://www.klever.net/openid.server","http://www.klever.net/openid.server"); } +void test_rfc_3986_normalize_uri(const string &ouri,bool success,const string& nuri="") { + try { + string n = opkele::util::rfc_3986_normalize_uri(ouri); + if(!success) + throw failed_test(OPKELE_CP_ "Normalized URI when it shouldn't"); + if(n!=nuri) + throw failed_test(OPKELE_CP_ "rfc_3986_test_failed for '"+ouri+"' failed, expected '"+nuri+"', got '"+n+"'"); + }catch(opkele::bad_input& obi) { + if(success) + throw failed_test(OPKELE_CP_ "Test '"+ouri+"' failed due to 'bad_input'["+obi.what()+"]"); + }catch(opkele::not_implemented& oni) { + if(success) + throw failed_test(OPKELE_CP_ "Test '"+ouri+"' failed due to 'not_implemented'["+oni.what()+"]"); + } +} + +void test_rfc_3986_normalize_uri() { + test_rfc_3986_normalize_uri( + "invalid", false ); + test_rfc_3986_normalize_uri( + "ftp://hacker.klever.net/", false ); + test_rfc_3986_normalize_uri( + "http://", false ); + test_rfc_3986_normalize_uri( + "http:/hacker.klever.net/", false ); + test_rfc_3986_normalize_uri( + "hTTp://hacker.klever.net#uh?oh", true, "http://hacker.klever.net/#uh?oh" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net?uh#oh", true, "http://hacker.klever.net/?uh#oh" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net:80/", true, "http://hacker.klever.net/" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net:80?uh", true, "http://hacker.klever.net/?uh" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net:80#uh", true, "http://hacker.klever.net/#uh" ); + test_rfc_3986_normalize_uri( + "https://hacker.klever.net:443", true, "https://hacker.klever.net/" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net:?oh", true, "http://hacker.klever.net/?oh" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah%2E", true, "http://hacker.klever.net/ah." ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%2E/", true, "http://hacker.klever.net/ah/" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%2b/", true, "http://hacker.klever.net/ah/%2B/" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/./oh?eh", true, "http://hacker.klever.net/ah/oh?eh" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/../oh?", true, "http://hacker.klever.net/oh?" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah//oh?", true, "http://hacker.klever.net/ah/oh?" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/?", true, "http://hacker.klever.net/ah/?" ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%", false ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%a", false ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%zx", false ); + test_rfc_3986_normalize_uri( + "http://hacker.klever.net/ah/%5x", false ); + test_rfc_3986_normalize_uri( + "Http://Hacker.Klever.Net:", true, "http://hacker.klever.net/" ); +} + int main() { try { + test_rfc_3986_normalize_uri(); test_retrieve_links(); }catch(failed_test& ft) { cerr << "Test failed: " << ft.what() << endl; }catch(exception& e) { cerr << "oops: " << e.what() << endl; _exit(1); } _exit(0); } |