-rw-r--r-- | include/opkele/expat.h | 6 | ||||
-rw-r--r-- | lib/discovery.cc | 11 |
2 files changed, 7 insertions, 10 deletions
diff --git a/include/opkele/expat.h b/include/opkele/expat.h index 97ed61a..60c41ac 100644 --- a/include/opkele/expat.h +++ b/include/opkele/expat.h @@ -1,93 +1,91 @@ #ifndef __OPKELE_EXPAT_H #define __OPKELE_EXPAT_H #include <cassert> #include <expat.h> namespace opkele { namespace util { class expat_t { public: XML_Parser _x; expat_t() : _x(0) { } expat_t(XML_Parser x) : _x(x) { } virtual ~expat_t() throw(); expat_t& operator=(XML_Parser x); operator const XML_Parser(void) const { return _x; } operator XML_Parser(void) { return _x; } inline bool parse(const char *s,int len,bool final=false) { assert(_x); - return XML_Parse(_x,s,len,final); } - enum XML_Status stop_parser(bool resumable=false) { - assert(_x); - return XML_StopParser(_x,resumable); } + return XML_Parse(_x,s,len,final); + } virtual void start_element(const XML_Char *n,const XML_Char **a) { } virtual void end_element(const XML_Char *n) { } void set_element_handler(); virtual void character_data(const XML_Char *s,int l) { } void set_character_data_handler(); virtual void processing_instruction(const XML_Char *t,const XML_Char *d) { } void set_processing_instruction_handler(); virtual void comment(const XML_Char *d) { } void set_comment_handler(); virtual void start_cdata_section() { } virtual void end_cdata_section() { } void set_cdata_section_handler(); virtual void default_handler(const XML_Char *s,int l) { } void set_default_handler(); void set_default_handler_expand(); virtual void start_namespace_decl(const XML_Char *p,const XML_Char *u) { } virtual void end_namespace_decl(const XML_Char *p) { } void set_namespace_decl_handler(); inline enum XML_Error get_error_code() { assert(_x); return XML_GetErrorCode(_x); } static inline const XML_LChar *error_string(XML_Error c) { return XML_ErrorString(c); } inline long get_current_byte_index() { assert(_x); return XML_GetCurrentByteIndex(_x); } inline int get_current_line_number() { assert(_x); return XML_GetCurrentLineNumber(_x); } inline int get_current_column_number() { assert(_x); return XML_GetCurrentColumnNumber(_x); } inline void set_user_data() { assert(_x); XML_SetUserData(_x,this); } inline bool set_base(const XML_Char *b) { assert(_x); return XML_SetBase(_x,b); } inline const XML_Char *get_base() { assert(_x); return XML_GetBase(_x); } inline int get_specified_attribute_count() { assert(_x); return XML_GetSpecifiedAttributeCount(_x); } inline bool set_param_entity_parsing(enum XML_ParamEntityParsing c) { assert(_x); return XML_SetParamEntityParsing(_x,c); } inline static XML_Parser parser_create(const XML_Char *e=0) { return XML_ParserCreate(e); } inline static XML_Parser parser_create_ns(const XML_Char *e=0,XML_Char s='\t') { return XML_ParserCreateNS(e,s); } }; } } #endif /* __OPKELE_EXPAT_H */ diff --git a/lib/discovery.cc b/lib/discovery.cc index af9686a..d868308 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc @@ -158,290 +158,289 @@ namespace opkele { if(r && r!=CURLE_WRITE_ERROR) throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); if(!parser_choked) { parse(0,0,true); }else{ /* TODO: do not bother if we've seen xml */ try { util::tidy_doc_t td = util::tidy_doc_t::create(); if(!td) throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); #ifndef NDEBUG td.opt_set(TidyQuiet,false); td.opt_set(TidyShowWarnings,false); #endif /* NDEBUG */ td.opt_set(TidyForceOutput,true); td.opt_set(TidyXhtmlOut,true); td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); td.opt_set(TidyMark,false); if(td.parse_string(save_html)<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); if(td.clean_and_repair()<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); util::tidy_buf_t tide; if(td.save_buffer(tide)<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); prepare_to_parse(); parse(tide.c_str(),tide.size(),true); }catch(exception_tidy& et) { } } save_html.clear(); } void prepare_to_parse() { (*(expat_t*)this) = parser_create_ns(); set_user_data(); set_element_handler(); set_character_data_handler(); if(xmode&xmode_html) { html_openid1.clear(); html_openid2.clear(); parser_choked = false; } cdata = 0; xrd_service = 0; skipping = 0; status_code = 100; status_string.clear(); } void html2xrd(XRD_t& x) { if(!html_openid1.uris.empty()) { html_openid1.types.insert(STURI_OPENID11); x.services.add(-1,html_openid1); } if(!html_openid2.uris.empty()) { html_openid2.types.insert(STURI_OPENID20); x.services.add(-1,html_openid2); } } size_t write(void *p,size_t s,size_t nm) { /* TODO: limit total size */ size_t bytes = s*nm; const char *inbuf = (const char*)p; if(xmode&xmode_html) { size_t mbts = save_html.capacity()-save_html.size(); size_t bts = 0; if(mbts>0) { bts = (bytes>mbts)?mbts:bytes; save_html.append(inbuf,bts); } if(skipping<0) return bts; } if(skipping<0) return 0; bool rp = parse(inbuf,bytes,false); if(!rp) { parser_choked = true; skipping = -1; if(!(xmode&xmode_html)) bytes = 0; } return bytes; } size_t header(void *p,size_t s,size_t nm) { size_t bytes = s*nm; const char *h = (const char*)p; const char *colon = (const char*)memchr(p,':',bytes); const char *space = (const char*)memchr(p,' ',bytes); if(space && ( (!colon) || space<colon ) ) { xrds_location.clear(); http_content_type.clear(); }else if(colon) { const char *hv = ++colon; int hnl = colon-h; int rb; for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); while(rb>0 && isspace(hv[rb-1])) --rb; if(rb) { if( (hnl>=sizeof(XRDS_HEADER)) && !strncasecmp(h,XRDS_HEADER":", sizeof(XRDS_HEADER)) ) { xrds_location.assign(hv,rb); }else if( (hnl>=sizeof(CT_HEADER)) && !strncasecmp(h,CT_HEADER":", sizeof(CT_HEADER)) ) { const char *sc = (const char*)memchr( hv,';',rb); http_content_type.assign(hv,sc?(sc-hv):rb); } } } return curl_t::header(p,s,nm); } void start_element(const XML_Char *n,const XML_Char **a) { if(skipping<0) return; if(skipping) { if(xmode&xmode_html) html_start_element(n,a); ++skipping; return; } if(pt_stack.empty()) { if(is_qelement(n,NSURI_XRDS "\tXRDS")) return; if(is_qelement(n,NSURI_XRD "\tXRD")) { assert(xrd); xrd->clear(); pt_stack.push_back(n); }else if(xmode&xmode_html) { html_start_element(n,a); }else{ - skipping = -1; stop_parser(); + skipping = -1; } }else{ int pt_s = pt_stack.size(); if(pt_s==1) { if(is_qelement(n,NSURI_XRD "\tCanonicalID")) { assert(xrd); cdata = &(xrd->canonical_ids.add(element_priority(a),string())); }else if(is_qelement(n,NSURI_XRD "\tLocalID")) { assert(xrd); cdata = &(xrd->local_ids.add(element_priority(a),string())); }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { assert(xrd); cdata = &(xrd->provider_id); }else if(is_qelement(n,NSURI_XRD "\tService")) { assert(xrd); xrd_service = &(xrd->services.add(element_priority(a), service_t())); pt_stack.push_back(n); }else if(is_qelement(n,NSURI_XRD "\tStatus")) { for(;*a;) { if(!strcasecmp(*(a++),"code")) { if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) { cdata = &status_string; pt_stack.push_back(n); break; } } } }else if(is_qelement(n,NSURI_XRD "\tExpires")) { assert(xrd); cdata_buf.clear(); cdata = &cdata_buf; }else if(xmode&xmode_html) { html_start_element(n,a); }else{ skipping = 1; } }else if(pt_s==2) { if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) { if(is_qelement(n,NSURI_XRD "\tType")) { assert(xrd); assert(xrd_service); cdata_buf.clear(); cdata = &cdata_buf; }else if(is_qelement(n,NSURI_XRD "\tURI")) { assert(xrd); assert(xrd_service); cdata = &(xrd_service->uris.add(element_priority(a),string())); }else if(is_qelement(n,NSURI_XRD "\tLocalID") || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) { assert(xrd); assert(xrd_service); cdata = &(xrd_service->local_ids.add(element_priority(a),string())); }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { assert(xrd); assert(xrd_service); cdata = &(xrd_service->provider_id); }else{ skipping = 1; } }else skipping = 1; }else if(xmode&xmode_html) { html_start_element(n,a); }else{ skipping = 1; } } } void end_element(const XML_Char *n) { if(skipping<0) return; if(skipping) { --skipping; return; } if(is_qelement(n,NSURI_XRD "\tType")) { assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf); xrd_service->types.insert(cdata_buf); }else if(is_qelement(n,NSURI_XRD "\tService")) { assert(xrd); assert(xrd_service); assert(!pt_stack.empty()); assert(pt_stack.back()==(NSURI_XRD "\tService")); pt_stack.pop_back(); xrd_service = 0; }else if(is_qelement(n,NSURI_XRD "\tStatus")) { assert(xrd); if(is_qelement(pt_stack.back().c_str(),n)) { assert(cdata==&status_string); pt_stack.pop_back(); - if(status_code!=100) { - skipping = -1; stop_parser(); - } + if(status_code!=100) + skipping = -1; } }else if(is_qelement(n,NSURI_XRD "\tExpires")) { assert(xrd); xrd->expires = util::w3c_to_time(cdata_buf); }else if((xmode&xmode_html) && is_element(n,"head")) { - skipping = -1; stop_parser(); + skipping = -1; } cdata = 0; } void character_data(const XML_Char *s,int l) { if(skipping) return; if(cdata) cdata->append(s,l); } void html_start_element(const XML_Char *n,const XML_Char **a) { if(is_element(n,"meta")) { bool heq = false; string l; for(;*a;a+=2) { if(!( strcasecmp(a[0],"http-equiv") || strcasecmp(a[1],XRDS_HEADER) )) heq = true; else if(!strcasecmp(a[0],"content")) l.assign(a[1]); } if(heq) xrds_location = l; }else if(is_element(n,"link")) { string rels; string href; for(;*a;a+=2) { if( !strcasecmp(a[0],"rel") ) { rels.assign(a[1]); }else if( !strcasecmp(a[0],"href") ) { const char *ns = a[1]; for(;*ns && isspace(*ns);++ns); href.assign(ns); string::size_type lns=href.find_last_not_of(whitespace); href.erase(lns+1); } } for(string::size_type ns=rels.find_first_not_of(whitespace); ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { string::size_type s = rels.find_first_of(whitespace,ns); string rel; if(s==string::npos) { rel.assign(rels,ns,string::npos); ns = string::npos; }else{ rel.assign(rels,ns,s-ns); ns = s; } if(rel=="openid.server") html_openid1.uris.add(-1,href); else if(rel=="openid.delegate") html_openid1.local_ids.add(-1,href); else if(rel=="openid2.provider") html_openid2.uris.add(-1,href); else if(rel=="openid2.local_id") html_openid2.local_ids.add(-1,href); } }else if(is_element(n,"body")) { - skipping = -1; stop_parser(); + skipping = -1; } } }; void idiscover(idiscovery_t& result,const string& identity) { idigger_t idigger; idigger.discover(result,identity); } } |