-rw-r--r-- | lib/discovery.cc | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc index c118c80..3b90977 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc @@ -199,128 +199,129 @@ namespace opkele { if(r) throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); if(xrds_location.empty()) { if(idis.xrd.empty()) html2xrd(oi,idis); else{ for(const service_type_t *st=op_service_types; st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) queue_endpoints(oi,idis,st); } }else{ idis.clear(); idis.canonicalized_id = cid; discover_at(idis,xrds_location,xmode_xrd); if(idis.xrd.empty()) html2xrd(oi,idis); else{ for(const service_type_t *st=op_service_types; st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) queue_endpoints(oi,idis,st); } } } return rv; } void discover_at(idiscovery_t& idis,const string& url,int xm) { CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); if(r) throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); http_content_type.clear(); xmode = xm; prepare_to_parse(); if(xmode&xmode_html) { xrds_location.clear(); save_html.clear(); save_html.reserve(max_html); } xrd = &idis.xrd; r = easy_perform(); if(r && r!=CURLE_WRITE_ERROR) throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); if(!parser_choked) { parse(0,0,true); }else if(xmode&xmode_html){ /* TODO: do not bother if we've seen xml */ try { util::tidy_doc_t td = util::tidy_doc_t::create(); if(!td) throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); #ifndef NDEBUG td.opt_set(TidyQuiet,false); td.opt_set(TidyShowWarnings,false); #endif /* NDEBUG */ td.opt_set(TidyForceOutput,true); td.opt_set(TidyXhtmlOut,true); td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); td.opt_set(TidyMark,false); + td.opt_set(TidyNumEntities,true); if(td.parse_string(save_html)<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); if(td.clean_and_repair()<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); util::tidy_buf_t tide; if(td.save_buffer(tide)<=0) throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); prepare_to_parse(); parse(tide.c_str(),tide.size(),true); }catch(exception_tidy& et) { } } save_html.clear(); } void prepare_to_parse() { (*(expat_t*)this) = parser_create_ns(); set_user_data(); set_element_handler(); set_character_data_handler(); if(xmode&xmode_html) { html_openid1.clear(); html_openid2.clear(); parser_choked = false; } cdata = 0; xrd_service = 0; skipping = 0; pt_stack.clear(); status_code = 100; status_string.clear(); } void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { XRD_t& x = id.xrd; if(!html_openid2.uris.empty()) { html_openid2.types.insert(STURI_OPENID20); x.services.add(-1,html_openid2); queue_endpoints(oi,id,&op_service_types[st_index_2]); } if(!html_openid1.uris.empty()) { html_openid1.types.insert(STURI_OPENID11); x.services.add(-1,html_openid1); queue_endpoints(oi,id,&op_service_types[st_index_1]); } } size_t write(void *p,size_t s,size_t nm) { /* TODO: limit total size */ size_t bytes = s*nm; const char *inbuf = (const char*)p; if(xmode&xmode_html) { size_t mbts = save_html.capacity()-save_html.size(); size_t bts = 0; if(mbts>0) { bts = (bytes>mbts)?mbts:bytes; save_html.append(inbuf,bts); } if(skipping<0) return bts; } if(skipping<0) return 0; bool rp = parse(inbuf,bytes,false); if(!rp) { parser_choked = true; skipping = -1; if(!(xmode&xmode_html)) bytes = 0; } |