author | Michael Krelin <hacker@klever.net> | 2007-12-02 21:48:18 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2007-12-02 21:51:08 (UTC) |
commit | 262f1579f0a9138a01f06afea06d00155cefd4b5 (patch) (side-by-side diff) | |
tree | fb4db0ee7b679a1957c63abbe6f6af1d2fa82531 | |
parent | 73d98f3652b498b9a74b183bef395714c7d73fda (diff) | |
download | libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.zip libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.gz libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.bz2 |
first cut on XRI resolver
This commit adds openid service resolver that does discovery using XRI (proxy
only), Yadis protocol and html-based discovery. It uses expat as xml parsing
engine, which makes it a bit more strict about html it receives, but I think
failing to discover links in *severely* broken html is better than
misdetecting links, hidden in comments or such.
This is highly experimental code and needs more thoughts and testing.
Thanks everyone pushing me towards this development. Namely Joseph, John,
Gen.
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | include/Makefile.am | 8 | ||||
-rw-r--r-- | include/opkele/openid_service_resolver.h | 118 | ||||
-rw-r--r-- | include/opkele/uris.h | 10 | ||||
-rw-r--r-- | lib/Makefile.am | 3 | ||||
-rw-r--r-- | lib/openid_service_resolver.cc | 294 | ||||
-rw-r--r-- | lib/util.cc | 16 | ||||
-rw-r--r-- | test/.gitignore | 1 | ||||
-rw-r--r-- | test/Makefile.am | 5 | ||||
-rw-r--r-- | test/openid_resolve.cc | 36 |
9 files changed, 482 insertions, 9 deletions
diff --git a/include/Makefile.am b/include/Makefile.am index 0385cfb..23c7e0d 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -10,8 +10,10 @@ nobase_include_HEADERS = \ opkele/sreg.h \ opkele/extension_chain.h \ opkele/xconsumer.h \ - opkele/xserver.h + opkele/xserver.h \ + opkele/curl.h opkele/expat.h \ + opkele/openid_service_resolver.h \ + opkele/uris.h EXTRA_DIST = \ opkele/data.h \ - opkele/util.h \ - opkele/curl.h opkele/expat.h + opkele/util.h diff --git a/include/opkele/openid_service_resolver.h b/include/opkele/openid_service_resolver.h new file mode 100644 index 0000000..64edd28 --- a/dev/null +++ b/include/opkele/openid_service_resolver.h @@ -0,0 +1,118 @@ +#ifndef __OPKELE_OPENID_SERVICE_RESOLVER_H +#define __OPKELE_OPENID_SERVICE_RESOLVER_H + +#include <climits> +#include <string> +#include <list> +#include <set> +#include <map> +#include <opkele/curl.h> +#include <opkele/expat.h> + +namespace opkele { + using std::list; + using std::string; + using std::set; + using std::map; + + struct openid_auth_SEP_t { + long priority; + set<string> xrd_Type; + string xrd_URI; + string openid_Delegate; + + openid_auth_SEP_t() : priority(LONG_MAX) { } + }; + + struct openid_auth_info_t { + string canonical_id; + openid_auth_SEP_t auth_SEP; + }; + + + class openid_service_resolver_t : public util::curl_t, public util::expat_t { + public: + string xri_proxy; + + openid_service_resolver_t(const string& xp=""); + ~openid_service_resolver_t() throw() { } + + const openid_auth_info_t& resolve(const string& id); + + enum state_t { + state_parse = 0, + state_stopping_head, state_stopping_body, + state_stopping_size + }; + state_t state; + + struct parser_node_t { + string element; + string content; + typedef map<string,string> attrs_t; + attrs_t attrs; + bool skip_text, skip_tags; + openid_auth_info_t auth_info; + + parser_node_t(const XML_Char *n,const XML_Char **a) + : skip_text(true), skip_tags(true) + { + element = n; + for(;*a;a+=2) + attrs[a[0]] = a[1]; + } + + }; + + class parser_tree_t : public list<parser_node_t> { + public: + const_reference top() const { return back(); } + reference top() { return back(); } + + const_reference parent() const { + const_reverse_iterator rv = rbegin(); + return *(++rv); } + reference parent() { + reverse_iterator rv = rbegin(); + return *(++rv); } + + inline void pop() { pop_back(); } + inline void push(const_reference e) { push_back(e); } + + void push(const XML_Char *n,const XML_Char **a) { + parser_node_t nn(n,a); + if(empty()) + nn.skip_text = nn.skip_tags = true; + else{ + const_reference t = top(); + nn.skip_text = t.skip_text; nn.skip_tags = t.skip_tags; + } + push(nn); + } + }; + parser_tree_t tree; + + void start_element(const XML_Char *n,const XML_Char **a); + void end_element(const XML_Char *n); + void character_data(const XML_Char *s,int l); + + string xrds_location; + openid_auth_SEP_t html_SEP; + openid_auth_info_t auth_info; + + void pop_tag(); + + size_t write(void *p,size_t s,size_t nm); + + string http_content_type; + + size_t header(void *p,size_t s,size_t nm); + + bool xri_mode; + + void discover_service(const string& url,bool xri=false); + }; + +} + +#endif /* __OPKELE_OPENID_SERVICE_RESOLVER_H */ diff --git a/include/opkele/uris.h b/include/opkele/uris.h new file mode 100644 index 0000000..9a6a3cd --- a/dev/null +++ b/include/opkele/uris.h @@ -0,0 +1,10 @@ +#ifndef __OPKELE_URIS_H +#define __OPKELE_URIS_H + +#define NSURI_XRDS "xri://$xrds" +#define NSURI_XRD "xri://$xrd*($v*2.0)" +#define NSURI_OPENID10 "http://openid.net/xmlns/1.0" + +#define STURI_OPENID10 "http://openid.net/signon/1.0" + +#endif /* __OPKELE_URIS_H */ diff --git a/lib/Makefile.am b/lib/Makefile.am index 7309353..b278faf 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -24,6 +24,7 @@ libopkele_la_SOURCES = \ extension.cc \ sreg.cc \ extension_chain.cc \ - curl.cc expat.cc + curl.cc expat.cc \ + openid_service_resolver.cc libopkele_la_LDFLAGS = \ -version-info 2:0:0 diff --git a/lib/openid_service_resolver.cc b/lib/openid_service_resolver.cc new file mode 100644 index 0000000..5f82955 --- a/dev/null +++ b/lib/openid_service_resolver.cc @@ -0,0 +1,294 @@ +#include <cctype> +#include <opkele/exception.h> +#include <opkele/util.h> +#include <opkele/openid_service_resolver.h> +#include <opkele/uris.h> + +#define LOCATION_HEADER "X-XRDS-Location" + +namespace opkele { + static const char *whitespace = " \t\r\n"; + + openid_service_resolver_t::openid_service_resolver_t(const string& xp) + : util::curl_t(easy_init()), + util::expat_t(0), + xri_proxy(xp.empty()?"http://beta.xri.net/":xp) + { + CURLcode r; + (r=misc_sets()) + || (r=set_write()) + || (r==set_header()) + ; + if(r) + throw opkele::exception_curl(OPKELE_CP_ "failed to set curly options",r); + } + + static bool is_element(const XML_Char *n,const char *en) { + if(!strcasecmp(n,en)) return true; + int nl = strlen(n), enl = strlen(en); + if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' + && !strcasecmp(&n[nl-enl],en) ) + return true; + return false; + } + static inline bool is_qelement(const XML_Char *n,const char *qen) { + return !strcasecmp(n,qen); + } + static inline bool is_element( + const openid_service_resolver_t::parser_node_t& n, + const char *en) { + return is_element(n.element.c_str(),en); + } + static inline bool is_qelement( + const openid_service_resolver_t::parser_node_t& n, + const char *qen) { + return is_qelement(n.element.c_str(),qen); + } + + void openid_service_resolver_t::start_element(const XML_Char *n,const XML_Char **a) { + if(state!=state_parse) return; + tree.push(n,a); + parser_node_t& t = tree.top(); + if(is_element(n,"html") || is_element(n,"head") + || is_qelement(n,NSURI_XRDS "\tXRDS") + || is_qelement(n,NSURI_XRD "\tXRD") ) + t.skip_tags = false; + else if(is_qelement(n,NSURI_XRD "\tService") + || is_qelement(n,NSURI_XRD "\tType") + || is_qelement(n,NSURI_XRD "\tURI") + || is_qelement(n,NSURI_OPENID10 "\tDelegate") + || is_qelement(n,NSURI_XRD "\tCanonicalID") ) + t.skip_tags = t.skip_text = false; + else if(is_element(n,"body")) + state = state_stopping_body; + } + void openid_service_resolver_t::end_element(const XML_Char *n) { + if(state!=state_parse) return; + assert(tree.top().element == n); + pop_tag(); + } + void openid_service_resolver_t::character_data(const XML_Char *s,int l) { + if(state!=state_parse) return; + if( !( tree.empty() || tree.top().skip_text ) ) + tree.top().content.append(s,l); + } + + static void copy_trim_whitespace(string& to,const string& from) { + string::size_type ns0 = from.find_first_not_of(whitespace); + if(ns0==string::npos) { + to.clear(); return; + } + string::size_type ns1 = from.find_last_not_of(whitespace); + assert(ns1!=string::npos); + to.assign(from,ns0,ns1-ns0+1); + } + + void openid_service_resolver_t::pop_tag() { + assert(!tree.empty()); + parser_node_t& t = tree.top(); + if( is_element(t,"meta") + && !strcasecmp(t.attrs["http-equiv"].c_str(),LOCATION_HEADER) ) { + xrds_location = t.attrs["content"]; + }else if( is_element(t,"link") ) { + parser_node_t::attrs_t::const_iterator ir = t.attrs.find("rel"); + if(ir!=t.attrs.end()) { + const string& rels = ir->second; + for(string::size_type ns = rels.find_first_not_of(whitespace); + ns!=string::npos; + ns=rels.find_first_not_of(whitespace,ns)) { + string::size_type s = rels.find_first_of(whitespace,ns); + string rel; + if(s==string::npos) { + rel.assign(rels,ns,string::npos); + ns = string::npos; + }else{ + rel.assign(rels,ns,s-ns); + ns = s; + } + if(rel=="openid.server") + copy_trim_whitespace(html_SEP.xrd_URI,t.attrs["href"]); + else if(rel=="openid.delegate") + copy_trim_whitespace(html_SEP.openid_Delegate,t.attrs["href"]); + } + } + }else if( is_element(t,"head") ) + state = state_stopping_head; + else if( is_qelement(t,NSURI_XRD "\tXRD")) { + if( !( + ( + xri_mode + && t.auth_info.canonical_id.empty() + ) || + t.auth_info.auth_SEP.xrd_Type.empty() + ) ) + auth_info = t.auth_info; + }else if( tree.size()>1 ) { + parser_node_t& p = tree.parent(); + if( is_qelement(p,NSURI_XRD "\tService") ) { + if( is_qelement(t,NSURI_XRD "\tType") ) { + if(t.content==STURI_OPENID10) { + string tmp; copy_trim_whitespace(tmp,t.content); + p.auth_info.auth_SEP.xrd_Type.insert(tmp); + } + }else if( is_qelement(t,NSURI_XRD "\tURI") ) + copy_trim_whitespace(p.auth_info.auth_SEP.xrd_URI,t.content); + else if( is_qelement(t,NSURI_OPENID10 "\tDelegate") ) + copy_trim_whitespace(p.auth_info.auth_SEP.openid_Delegate,t.content); + }else if( is_qelement(p,NSURI_XRD "\tXRD") ) { + if(is_qelement(t,NSURI_XRD "\tService") ) { + if( !t.auth_info.auth_SEP.xrd_Type.empty() ) { + parser_node_t::attrs_t::const_iterator ip + = t.attrs.find("priority"); + if(ip!=t.attrs.end()) { + const char *nptr = ip->second.c_str(); + char *eptr = 0; + t.auth_info.auth_SEP.priority = strtol(nptr,&eptr,10); + if(nptr==eptr) + t.auth_info.auth_SEP.priority = LONG_MAX; + } + if( (t.auth_info.auth_SEP.priority < p.auth_info.auth_SEP.priority) + || p.auth_info.auth_SEP.xrd_Type.empty() ) + p.auth_info.auth_SEP = t.auth_info.auth_SEP; + } + }else if( is_qelement(t,NSURI_XRD "\tCanonicalID") ) + copy_trim_whitespace(p.auth_info.canonical_id,t.content); + } + } + + tree.pop(); + } + + size_t openid_service_resolver_t::write(void *p,size_t s,size_t nm) { + if(state != state_parse) + return 0; + /* TODO: limit total size */ + size_t bytes = s*nm; + parse((const char *)p,bytes,false); + return bytes; + } + + size_t openid_service_resolver_t::header(void *p,size_t s,size_t nm) { + size_t bytes = s*nm; + const char *h = (const char *)p; + const char *colon = (const char*)memchr(p,':',bytes); + const char *space = (const char*)memchr(p,' ',bytes); + if(space && ( (!colon) || space<colon ) ) { + xrds_location.clear(); http_content_type.clear(); + }else if(colon) { + const char *hv = ++colon; + int hnl = colon-h; + int rb; + for(rb = bytes-hnl-1; + rb>0 && isspace(*hv); + ++hv,--rb ); + while(rb>0 && isspace(hv[rb-1])) + --rb; + if(rb) { + if( (hnl >= sizeof(LOCATION_HEADER)) + && !strncasecmp(h,LOCATION_HEADER ":", + sizeof(LOCATION_HEADER)) ) { + xrds_location.assign(hv,rb); + }else if( (hnl >= sizeof("Content-Type")) + && !strncasecmp(h,"Content-Type:", + sizeof("Content-Type")) ) { + const char *sc = (const char*)memchr( + hv,';',rb); + http_content_type.assign( + hv,sc?(sc-hv):rb ); + } + } + } + return curl_t::header(p,s,nm); + } + + void openid_service_resolver_t::discover_service(const string& url,bool xri) { + CURLcode r = easy_setopt(CURLOPT_URL,url.c_str()); + if(r) + throw opkele::exception_curl(OPKELE_CP_ "failed to set curly urlie",r); + + (*(expat_t*)this) = parser_create_ns(); + set_user_data(); set_element_handler(); + set_character_data_handler(); + tree.clear(); + state = state_parse; + + r = easy_perform(); + if(r && r!=CURLE_WRITE_ERROR) + throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); + + parse(0,0,true); + while(!tree.empty()) pop_tag(); + } + + const openid_auth_info_t& openid_service_resolver_t::resolve(const string& id) { + auth_info = openid_auth_info_t(); + html_SEP = openid_auth_SEP_t(); + + string::size_type fns = id.find_first_not_of(whitespace); + if(fns==string::npos) + throw opkele::bad_input(OPKELE_CP_ "whitespace-only identity"); + string::size_type lns = id.find_last_not_of(whitespace); + assert(lns!=string::npos); + if(!strncasecmp( + id.c_str()+fns,"xri://", + sizeof("xri://")-1)) + fns+=sizeof("xri://")-1; + string nid(id,fns,lns-fns+1); + if(nid.empty()) + throw opkele::bad_input(OPKELE_CP_ "nothing significant in identity"); + if(strchr("=@+$!(",*nid.c_str())) { + discover_service( + xri_proxy + util::url_encode(nid) + + "?_xrd_t=" STURI_OPENID10 "&_xrd_r=application/xrd+xml;sep=true", + true ); + if(auth_info.canonical_id.empty() + || auth_info.auth_SEP.xrd_Type.empty() ) + throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service for XRI found"); + return auth_info; + }else{ + const char *np = nid.c_str(); + if( (strncasecmp(np,"http",4) || strncmp( + tolower(*(np+4))=='s'? np+5 : np+4, "://", 3)) +#ifndef NDEBUG + && strncasecmp(np,"file:///",sizeof("file:///")-1) +#endif /* XXX: or how do I let tests work? */ + ) + nid.insert(0,"http://"); + string::size_type fp = nid.find('#'); + if(fp!=string::npos) { + string::size_type qp = nid.find('?'); + if(qp==string::npos || qp<fp) { + nid.erase(fp); + }else if(qp>fp) + nid.erase(fp,qp-fp); + } + discover_service(nid); + const char *eu = 0; + CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); + if(r) + throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); + string canonicalized_id = util::rfc_3986_normalize_uri(eu); + if(xrds_location.empty()) { + if(auth_info.auth_SEP.xrd_Type.empty()) { + if(html_SEP.xrd_URI.empty()) + throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service discovered"); + auth_info.auth_SEP = html_SEP; + auth_info.auth_SEP.xrd_Type.clear(); auth_info.auth_SEP.xrd_Type.insert( STURI_OPENID10 ); + auth_info.canonical_id = canonicalized_id; + }else{ + if(auth_info.canonical_id.empty()) + auth_info.canonical_id = canonicalized_id; + } + return auth_info; + }else{ + discover_service(xrds_location); + if(auth_info.auth_SEP.xrd_Type.empty()) + throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service found in Yadis document"); + if(auth_info.canonical_id.empty()) + auth_info.canonical_id = canonicalized_id; + return auth_info; + } + } + } + +} diff --git a/lib/util.cc b/lib/util.cc index ac70938..69d37b5 100644 --- a/lib/util.cc +++ b/lib/util.cc @@ -173,21 +173,29 @@ namespace opkele { * - if there's no path component, add '/' */ string rfc_3986_normalize_uri(const string& uri) { + static const char *whitespace = " \t\r\n"; string rv; - string::size_type colon = uri.find(':'); + string::size_type ns = uri.find_first_not_of(whitespace); + if(ns==string::npos) + throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); + string::size_type colon = uri.find(':',ns); if(colon==string::npos) throw bad_input(OPKELE_CP_ "No scheme specified in URI"); transform( - uri.begin(), uri.begin()+colon+1, + uri.begin()+ns, uri.begin()+colon+1, back_inserter(rv), ::tolower ); bool s; if(rv=="http:") s = false; else if(rv=="https:") s = true; +#ifndef NDEBUG + else if(rv=="file:") + s = false; +#endif /* XXX: or try to make tests work some other way */ else throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); - string::size_type ul = uri.length(); + string::size_type ul = uri.find_last_not_of(whitespace)+1; if(ul <= (colon+3)) throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); if(uri[colon+1]!='/' || uri[colon+2]!='/') @@ -196,7 +204,7 @@ namespace opkele { string::size_type interesting = uri.find_first_of(":/#?",colon+3); if(interesting==string::npos) { transform( - uri.begin()+colon+3,uri.end(), + uri.begin()+colon+3,uri.begin()+ul, back_inserter(rv), ::tolower ); rv += '/'; return rv; } diff --git a/test/.gitignore b/test/.gitignore index 918b3c9..5ce4dc9 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -2,3 +2,4 @@ /.libs /test *.o +/openid_resolve diff --git a/test/Makefile.am b/test/Makefile.am index 4b78087..5aa87b3 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,4 +1,4 @@ -noinst_PROGRAMS = test +noinst_PROGRAMS = test openid_resolve DEFAULT_INCLUDES = -I${top_builddir} INCLUDES = -I${top_srcdir}/include/ ${KONFORKA_CFLAGS} @@ -10,3 +10,6 @@ EXTRA_DIST=$(addsuffix .html,$(addprefix html/, \ empty head-in-body hkn-delegate hkn-server hkn in-body \ unclosed-head spaced-links spaced-link-attrs 2rels \ )) + +openid_resolve_SOURCES = openid_resolve.cc +openid_resolve_LDADD = ${top_builddir}/lib/libopkele.la diff --git a/test/openid_resolve.cc b/test/openid_resolve.cc new file mode 100644 index 0000000..31c7a6a --- a/dev/null +++ b/test/openid_resolve.cc @@ -0,0 +1,36 @@ +#include <iostream> +#include <stdexcept> +#include <iterator> +#include <algorithm> +using namespace std; +#include <opkele/exception.h> +#include <opkele/openid_service_resolver.h> + +int main(int argc,char **argv) { + try { + if(argc<2) + throw opkele::exception(OPKELE_CP_ "Please, give me something to resolve"); + opkele::openid_service_resolver_t resolver; + for(int a=1;a<argc;++a) { + const opkele::openid_auth_info_t& iai = resolver.resolve(argv[a]); + clog + << "====================" << endl + << "canonical id is " << iai.canonical_id << endl + << endl + << "service priority is " << iai.auth_SEP.priority << endl + << "service types are " ; + copy( + iai.auth_SEP.xrd_Type.begin(), iai.auth_SEP.xrd_Type.end(), + ostream_iterator<string>(clog," ") ); + clog << endl + << "service URI is " << iai.auth_SEP.xrd_URI << endl; + if(!iai.auth_SEP.openid_Delegate.empty()) + clog << "openid:Delegate is " << iai.auth_SEP.openid_Delegate << endl; + clog << endl; + } + }catch(exception& e) { + cerr << "oops: " << e.what() << endl; + _exit(1); + } + _exit(0); +} |