author | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-02-19 23:48:32 (UTC) |
commit | daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (patch) (side-by-side diff) | |
tree | 7d929285bc296777c63d4f482c7bb07f8541bce2 | |
parent | 42e4fb613d190508b3e8b8993d233044eeea4d20 (diff) | |
download | libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.zip libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.gz libopkele-daf2d4bcb4a31df6b46d3da7a33ee3f98d85e464.tar.bz2 |
added an identifier normalization utility function
* moved iname leader characters and whitespace characters strings to
opkele::data namespace
* added opkele::util::normalize_identifier() function
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | include/opkele/data.h | 2 | ||||
-rw-r--r-- | include/opkele/util.h | 2 | ||||
-rw-r--r-- | lib/data.cc | 3 | ||||
-rw-r--r-- | lib/discovery.cc | 17 | ||||
-rw-r--r-- | lib/util.cc | 41 |
5 files changed, 53 insertions, 12 deletions
diff --git a/include/opkele/data.h b/include/opkele/data.h index d0b0516..904b5ae 100644 --- a/include/opkele/data.h +++ b/include/opkele/data.h @@ -1,18 +1,20 @@ #ifndef __OPKELE_DATA_H #define __OPKELE_DATA_H /** * @brief the main opkele namespace */ namespace opkele { /** * @brief internal data opkele namespace */ namespace data { extern const char *_default_p; extern const char *_default_g; + extern const char *_iname_leaders; + extern const char *_whitespace_chars; } } #endif /* __OPKELE_DATA_H */ diff --git a/include/opkele/util.h b/include/opkele/util.h index 60955e1..fd974a1 100644 --- a/include/opkele/util.h +++ b/include/opkele/util.h @@ -39,76 +39,78 @@ namespace opkele { string url_encode(const string& str); /** * Make string suitable for using as x(ht)ml attribute. * @param str string to escape * @return escaped string */ string attr_escape(const string& str); /** * Convert number to string * @param l number * @return string representation * @throw failed_conversion in case of failure */ string long_to_string(long l); /** * Convert string to number * @param s string, containing the number * @return the number * @throw failed_conversion in case of failure */ long string_to_long(const string& s); /** * Encode binary data using base64. * @param data pointer to binary data * @param length length of data * @return encoded data */ string encode_base64(const void *data,size_t length); /** * Decode binary data from base64 representation. * @param data base64-encoded data * @param rv container for decoded binary */ void decode_base64(const string& data,vector<unsigned char>& rv); /** * Normalize http(s) URI according to RFC3986, section 6. URI is * expected to have scheme: in front of it. * @param uri URI * @return normalized URI * @throw not_implemented in case of non-httpi(s) URI * @throw bad_input in case of malformed URI */ string rfc_3986_normalize_uri(const string& uri); + string normalize_identifier(const string& usi,bool strip_fragment); + /** * Match URI against realm * @param uri URI to match * @param realm realm to match against * @return true if URI matches realm */ bool uri_matches_realm(const string& uri,const string& realm); /** * Strip fragment part from URI * @param uri input/output parameter containing the URI * @return reference to uri */ string& strip_uri_fragment_part(string& uri); /** * Calculate signature and encode it using base64 * @param assoc association being used for signing * @param om openid message * @return base64 representation of the signature */ string base64_signature(const assoc_t& assoc,const basic_openid_message& om); } } #endif /* __OPKELE_UTIL_H */ diff --git a/lib/data.cc b/lib/data.cc index c040430..f71788f 100644 --- a/lib/data.cc +++ b/lib/data.cc @@ -1,11 +1,14 @@ #include <opkele/data.h> namespace opkele { namespace data { const char *_default_p = "155172898181473697471232257763715539915724801966915404479707795314057629378541917580651227423698188993727816152646631438561595825688188889951272158842675419950341258706556549803580104870537681476726513255747040765857479291291572334510643245094715007229621094194349783925984760375594985848253359305585439638443"; const char *_default_g = "2"; + const char *_iname_leaders = "=@+$!("; + const char *_whitespace_chars = " \t\r\n"; + } } diff --git a/lib/discovery.cc b/lib/discovery.cc index b7f2db6..5913ad4 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc @@ -1,73 +1,72 @@ #include <list> #include <opkele/curl.h> #include <opkele/expat.h> #include <opkele/uris.h> #include <opkele/discovery.h> #include <opkele/exception.h> #include <opkele/util.h> #include <opkele/tidy.h> +#include <opkele/data.h> #include <opkele/debug.h> #include "config.h" #define XRDS_HEADER "X-XRDS-Location" #define CT_HEADER "Content-Type" namespace opkele { using std::list; using xrd::XRD_t; using xrd::service_t; /* TODO: the whole discovery thing needs cleanup and optimization due to * many changes of concept. */ - static const char *whitespace = " \t\r\n"; - static const char *i_leaders = "=@+$!("; static const size_t max_html = 16384; static const struct service_type_t { const char *uri; const char *forceid; } op_service_types[] = { { STURI_OPENID20_OP, IDURI_SELECT20 }, { STURI_OPENID20, 0 }, { STURI_OPENID11, 0 }, { STURI_OPENID10, 0 } }; enum { st_index_1 = 2, st_index_2 = 1 }; static inline bool is_qelement(const XML_Char *n,const char *qen) { return !strcasecmp(n,qen); } static inline bool is_element(const XML_Char *n,const char *en) { if(!strcasecmp(n,en)) return true; int nl = strlen(n), enl = strlen(en); if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' && !strcasecmp(&n[nl-enl],en) ) return true; return false; } static long element_priority(const XML_Char **a) { for(;*a;++a) if(!strcasecmp(*(a++),"priority")) { long rv; return (sscanf(*a,"%ld",&rv)==1)?rv:-1; } return -1; } /* TODO: ideally all attributes should be * retrieved in one run */ static const char *element_attr(const XML_Char **a, const char *at) { for(;*a;++a) if(!strcasecmp(*(a++),at)) { return *a; } return 0; } class idigger_t : public util::curl_t, public util::expat_t { public: @@ -83,108 +82,108 @@ namespace opkele { string http_content_type; service_t html_openid1; service_t html_openid2; string cdata_buf; long status_code; string status_string; typedef list<string> pt_stack_t; pt_stack_t pt_stack; int skipping; bool parser_choked; string save_html; XRD_t *xrd; service_t *xrd_service; string* cdata; idigger_t() : util::curl_t(easy_init()), util::expat_t(0), xri_proxy(XRI_PROXY_URL) { CURLcode r; (r=misc_sets()) || (r=set_write()) || (r=set_header()) ; if(r) throw exception_curl(OPKELE_CP_ "failed to set curly options",r); } ~idigger_t() throw() { } void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) { idiscovery_t idis; idis.xri_identity = false; discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs)); if(!xrds_location.empty()) { idis.clear(); discover_at(idis,xrds_location,xmode_xrd); } idis.normalized_id = idis.canonicalized_id = yurl; service_type_t st; for(st.uri=*types;*types;st.uri=*(++types)) queue_endpoints(oi,idis,&st); } string discover(endpoint_discovery_iterator& oi,const string& identity) { string rv; idiscovery_t idis; - string::size_type fsc = identity.find_first_not_of(whitespace); + string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars); if(fsc==string::npos) throw bad_input(OPKELE_CP_ "whitespace-only identity"); - string::size_type lsc = identity.find_last_not_of(whitespace); + string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars); assert(lsc!=string::npos); if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) fsc += sizeof("xri://")-1; if((fsc+1)>=lsc) throw bad_input(OPKELE_CP_ "not a character of importance in identity"); string id(identity,fsc,lsc-fsc+1); idis.clear(); - if(strchr(i_leaders,id[0])) { + if(strchr(data::_iname_leaders,id[0])) { /* TODO: further normalize xri identity? Like folding case * or whatever... */ rv = id; set<string> cids; for(const struct service_type_t *st=op_service_types; st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) { idis.clear(); discover_at( idis, xri_proxy + util::url_encode(id)+ "?_xrd_t="+util::url_encode(st->uri)+ "&_xrd_r=application/xrd%2Bxml" ";sep=true;refs=true", xmode_xrd ); if(status_code==241) continue; if(status_code!=100) throw failed_xri_resolution(OPKELE_CP_ "XRI resolution failed with '"+status_string+"' message" ", while looking for SEP with type '"+st->uri+"'", status_code); if(idis.xrd.canonical_ids.empty()) throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); string cid = idis.xrd.canonical_ids.begin()->second; if(cids.find(cid)==cids.end()) { cids.insert(cid); idis.clear(); discover_at( idis, xri_proxy + util::url_encode(id)+ "?_xrd_t="+util::url_encode(st->uri)+ "&_xrd_r=application/xrd%2Bxml" ";sep=true;refs=true", xmode_xrd ); if(status_code==241) continue; if(status_code!=100) throw failed_xri_resolution(OPKELE_CP_ "XRI resolution failed with '"+status_string+"' message" ", while looking for SEP with type '"+st->uri+"'" " on canonical id", status_code); } idis.canonicalized_id = cid; idis.normalized_id = rv; idis.xri_identity = true; queue_endpoints(oi,idis,st); } }else{ idis.xri_identity = false; if(id.find("://")==string::npos) id.insert(0,"http://"); string::size_type fp = id.find('#'); if(fp!=string::npos) { string::size_type qp = id.find('?'); @@ -450,103 +449,103 @@ namespace opkele { assert(!pt_stack.empty()); assert(pt_stack.back()==(NSURI_XRD "\tService")); pt_stack.pop_back(); xrd_service = 0; }else if(is_qelement(n,NSURI_XRD "\tStatus")) { assert(xrd); if(is_qelement(pt_stack.back().c_str(),n)) { assert(cdata==&status_string); pt_stack.pop_back(); if(status_code!=100) skipping = -1; } }else if(is_qelement(n,NSURI_XRD "\tExpires")) { assert(xrd); xrd->expires = util::w3c_to_time(cdata_buf); }else if((xmode&xmode_html) && is_element(n,"head")) { skipping = -1; } cdata = 0; } void character_data(const XML_Char *s,int l) { if(skipping) return; if(cdata) cdata->append(s,l); } void html_start_element(const XML_Char *n,const XML_Char **a) { if(is_element(n,"meta")) { bool heq = false; string l; for(;*a;a+=2) { if(!( strcasecmp(a[0],"http-equiv") || strcasecmp(a[1],XRDS_HEADER) )) heq = true; else if(!strcasecmp(a[0],"content")) l.assign(a[1]); } if(heq) xrds_location = l; }else if(is_element(n,"link")) { string rels; string href; for(;*a;a+=2) { if( !strcasecmp(a[0],"rel") ) { rels.assign(a[1]); }else if( !strcasecmp(a[0],"href") ) { const char *ns = a[1]; for(;*ns && isspace(*ns);++ns); href.assign(ns); - string::size_type lns=href.find_last_not_of(whitespace); + string::size_type lns=href.find_last_not_of(data::_whitespace_chars); href.erase(lns+1); } } - for(string::size_type ns=rels.find_first_not_of(whitespace); - ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { - string::size_type s = rels.find_first_of(whitespace,ns); + for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars); + ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) { + string::size_type s = rels.find_first_of(data::_whitespace_chars,ns); string rel; if(s==string::npos) { rel.assign(rels,ns,string::npos); ns = string::npos; }else{ rel.assign(rels,ns,s-ns); ns = s; } if(rel=="openid.server") html_openid1.uris.add(-1,xrd::uri_t(href)); else if(rel=="openid.delegate") html_openid1.local_ids.add(-1,href); else if(rel=="openid2.provider") html_openid2.uris.add(-1,xrd::uri_t(href)); else if(rel=="openid2.local_id") html_openid2.local_ids.add(-1,href); } }else if(is_element(n,"body")) { skipping = -1; } } void queue_endpoints(endpoint_discovery_iterator& oi, const idiscovery_t &id, const service_type_t *st) { openid_endpoint_t ep; ep.claimed_id = id.canonicalized_id; for(xrd::services_t::const_iterator isvc=id.xrd.services.begin(); isvc!=id.xrd.services.end(); ++isvc) { const xrd::service_t svc = isvc->second; if(svc.types.find(st->uri)==svc.types.end()) continue; for(xrd::uris_t::const_iterator iu=svc.uris.begin();iu!=svc.uris.end();++iu) { ep.uri = iu->second.uri; if(id.xri_identity) { if(iu->second.append=="qxri") { ep.uri += id.normalized_id; } /* TODO: else handle other append attribute values */ } if(st->forceid) { ep.local_id = ep.claimed_id = st->forceid; *(oi++) = ep; }else{ if(svc.local_ids.empty()) { ep.local_id = ep.claimed_id; *(oi++) = ep; }else{ for(xrd::local_ids_t::const_iterator ilid=svc.local_ids.begin(); ilid!=svc.local_ids.end(); ++ilid) { diff --git a/lib/util.cc b/lib/util.cc index bb8a2e8..29e6738 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -1,63 +1,64 @@ #include <errno.h> #include <cassert> #include <cctype> #include <cstring> #include <vector> #include <string> #include <stack> #include <algorithm> #include <openssl/bio.h> #include <openssl/evp.h> #include <openssl/sha.h> #include <openssl/hmac.h> #include <curl/curl.h> #include <opkele/util.h> #include <opkele/exception.h> +#include <opkele/data.h> #include <opkele/debug.h> #include <config.h> #ifdef HAVE_DEMANGLE # include <cxxabi.h> #endif namespace opkele { using namespace std; namespace util { /* * base64 */ string encode_base64(const void *data,size_t length) { BIO *b64 = 0, *bmem = 0; try { b64 = BIO_new(BIO_f_base64()); if(!b64) throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); bmem = BIO_new(BIO_s_mem()); BIO_set_flags(b64,BIO_CLOSE); if(!bmem) throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); BIO_push(b64,bmem); if(((size_t)BIO_write(b64,data,length))!=length) throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); if(BIO_flush(b64)!=1) throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); char *rvd; long rvl = BIO_get_mem_data(bmem,&rvd); string rv(rvd,rvl); BIO_free_all(b64); return rv; }catch(...) { if(b64) BIO_free_all(b64); throw; } } void decode_base64(const string& data,vector<unsigned char>& rv) { BIO *b64 = 0, *bmem = 0; rv.clear(); try { bmem = BIO_new_mem_buf((void*)data.data(),data.size()); if(!bmem) @@ -166,109 +167,108 @@ namespace opkele { string attr_escape(const string& str) { static const char *unsafechars = "<>&\n\"'"; string rv; string::size_type p=0; while(true) { string::size_type us = str.find_first_of(unsafechars,p); if(us==string::npos) { if(p!=str.length()) rv.append(str,p,str.length()-p); return rv; } rv.append(str,p,us-p); rv += "&#"; rv += long_to_string((long)str[us]); rv += ';'; p = us+1; } } string long_to_string(long l) { char rv[32]; int r=snprintf(rv,sizeof(rv),"%ld",l); if(r<0 || r>=(int)sizeof(rv)) throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); return rv; } long string_to_long(const string& s) { char *endptr = 0; long rv = strtol(s.c_str(),&endptr,10); if((!endptr) || endptr==s.c_str()) throw failed_conversion(OPKELE_CP_ "failed to strtol()"); return rv; } /* * Normalize URL according to the rules, described in rfc 3986, section 6 * * - uppercase hex triplets (e.g. %ab -> %AB) * - lowercase scheme and host * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, * that is - [:alpha:][:digit:]._~- * - remove dot segments * - remove empty and default ports * - if there's no path component, add '/' */ string rfc_3986_normalize_uri(const string& uri) { - static const char *whitespace = " \t\r\n"; string rv; - string::size_type ns = uri.find_first_not_of(whitespace); + string::size_type ns = uri.find_first_not_of(data::_whitespace_chars); if(ns==string::npos) throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); string::size_type colon = uri.find(':',ns); if(colon==string::npos) throw bad_input(OPKELE_CP_ "No scheme specified in URI"); transform( uri.begin()+ns, uri.begin()+colon+1, back_inserter(rv), ::tolower ); bool s; - string::size_type ul = uri.find_last_not_of(whitespace)+1; + string::size_type ul = uri.find_last_not_of(data::_whitespace_chars)+1; if(ul <= (colon+3)) throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); if(uri[colon+1]!='/' || uri[colon+2]!='/') throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); if(rv=="http:") s = false; else if(rv=="https:") s = true; else{ /* TODO: support more schemes. e.g. xri. How do we normalize * xri? */ rv.append(uri,colon+1,ul-colon-1); return rv; } rv += "//"; string::size_type interesting = uri.find_first_of(":/#?",colon+3); if(interesting==string::npos) { transform( uri.begin()+colon+3,uri.begin()+ul, back_inserter(rv), ::tolower ); rv += '/'; return rv; } transform( uri.begin()+colon+3,uri.begin()+interesting, back_inserter(rv), ::tolower ); bool qf = false; char ic = uri[interesting]; if(ic==':') { string::size_type ni = uri.find_first_of("/#?%",interesting+1); const char *nptr = uri.data()+interesting+1; char *eptr = 0; long port = strtol(nptr,&eptr,10); if( (port>0) && (port<65535) && port!=(s?443:80) ) { char tmp[8]; snprintf(tmp,sizeof(tmp),":%ld",port); rv += tmp; } if(ni==string::npos) { rv += '/'; return rv; } interesting = ni; }else if(ic!='/') { rv += '/'; rv += ic; qf = true; ++interesting; } string::size_type n = interesting; @@ -385,51 +385,86 @@ namespace opkele { string abi_demangle(const char *mn) { #ifndef HAVE_DEMANGLE return mn; #else /* !HAVE_DEMANGLE */ int dstat; char *demangled = abi::__cxa_demangle(mn,0,0,&dstat); if(dstat) return mn; string rv = demangled; free(demangled); return rv; #endif /* !HAVE_DEMANGLE */ } string base64_signature(const assoc_t& assoc,const basic_openid_message& om) { const string& slist = om.get_field("signed"); string kv; string::size_type p=0; while(true) { string::size_type co = slist.find(',',p); string f = (co==string::npos) ?slist.substr(p):slist.substr(p,co-p); kv += f; kv += ':'; kv += om.get_field(f); kv += '\n'; if(co==string::npos) break; p = co+1; } const secret_t& secret = assoc->secret(); const EVP_MD *evpmd; const string& at = assoc->assoc_type(); if(at=="HMAC-SHA256") evpmd = EVP_sha256(); else if(at=="HMAC-SHA1") evpmd = EVP_sha1(); else throw unsupported(OPKELE_CP_ "unknown association type"); unsigned int md_len = 0; unsigned char md[SHA256_DIGEST_LENGTH]; HMAC(evpmd, &(secret.front()),secret.size(), (const unsigned char*)kv.data(),kv.length(), md,&md_len); return encode_base64(md,md_len); } + string normalize_identifier(const string& usi,bool strip_fragment) { + if(usi.empty()) + return usi; + string rv; + string::size_type fsc = usi.find_first_not_of(data::_whitespace_chars); + if(fsc==string::npos) + return rv; + string::size_type lsc = usi.find_last_not_of(data::_whitespace_chars); + assert(lsc!=string::npos); + if(!strncasecmp(usi.c_str()+fsc,"xri://",sizeof("xri://")-1)) + fsc += sizeof("xri://")-1; + if( (fsc+1) >= lsc ) + return rv; + rv.assign(usi,fsc,lsc-fsc+1); + if(strchr(data::_iname_leaders,rv[0])) { + /* TODO: further normalize xri identity, fold case or + * whatever... */ + }else{ + if(rv.find("://")==string::npos) + rv.insert(0,"http://"); + if(strip_fragment) { + string::size_type fp = rv.find('#'); + if(fp!=string::npos) { + string::size_type qp = rv.find('?'); + if(qp==string::npos || qp<fp) + rv.erase(fp); + else if(qp>fp) + rv.erase(fp,qp-fp); + } + } + rv = rfc_3986_normalize_uri(rv); + } + return rv; + } + } } |