summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
committer Michael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
commitaa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4 (patch) (unidiff)
treec64231565e123d678850a7b0560b348ee82b2dcc
parent752e484cd2fc239bc582a88fe7d62a225880ee3b (diff)
downloadlibopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.zip
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.gz
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.bz2
fix for discovery failing on some entities coming before the tag of interest
One of the numerous opportunities to express my gratitude by means of commit message to Joseph Smarr of Plaxo for spotting it. Thanks, Joseph! Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc1
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index c118c80..3b90977 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -215,96 +215,97 @@ namespace opkele {
215 html2xrd(oi,idis); 215 html2xrd(oi,idis);
216 else{ 216 else{
217 for(const service_type_t *st=op_service_types; 217 for(const service_type_t *st=op_service_types;
218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
219 queue_endpoints(oi,idis,st); 219 queue_endpoints(oi,idis,st);
220 } 220 }
221 } 221 }
222 } 222 }
223 return rv; 223 return rv;
224 } 224 }
225 225
226 void discover_at(idiscovery_t& idis,const string& url,int xm) { 226 void discover_at(idiscovery_t& idis,const string& url,int xm) {
227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
228 if(r) 228 if(r)
229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
232 232
233 http_content_type.clear(); 233 http_content_type.clear();
234 xmode = xm; 234 xmode = xm;
235 prepare_to_parse(); 235 prepare_to_parse();
236 if(xmode&xmode_html) { 236 if(xmode&xmode_html) {
237 xrds_location.clear(); 237 xrds_location.clear();
238 save_html.clear(); 238 save_html.clear();
239 save_html.reserve(max_html); 239 save_html.reserve(max_html);
240 } 240 }
241 xrd = &idis.xrd; 241 xrd = &idis.xrd;
242 242
243 r = easy_perform(); 243 r = easy_perform();
244 if(r && r!=CURLE_WRITE_ERROR) 244 if(r && r!=CURLE_WRITE_ERROR)
245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
246 246
247 if(!parser_choked) { 247 if(!parser_choked) {
248 parse(0,0,true); 248 parse(0,0,true);
249 }else if(xmode&xmode_html){ 249 }else if(xmode&xmode_html){
250 /* TODO: do not bother if we've seen xml */ 250 /* TODO: do not bother if we've seen xml */
251 try { 251 try {
252 util::tidy_doc_t td = util::tidy_doc_t::create(); 252 util::tidy_doc_t td = util::tidy_doc_t::create();
253 if(!td) 253 if(!td)
254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
255#ifndef NDEBUG 255#ifndef NDEBUG
256 td.opt_set(TidyQuiet,false); 256 td.opt_set(TidyQuiet,false);
257 td.opt_set(TidyShowWarnings,false); 257 td.opt_set(TidyShowWarnings,false);
258#endif /* NDEBUG */ 258#endif /* NDEBUG */
259 td.opt_set(TidyForceOutput,true); 259 td.opt_set(TidyForceOutput,true);
260 td.opt_set(TidyXhtmlOut,true); 260 td.opt_set(TidyXhtmlOut,true);
261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
262 td.opt_set(TidyMark,false); 262 td.opt_set(TidyMark,false);
263 td.opt_set(TidyNumEntities,true);
263 if(td.parse_string(save_html)<=0) 264 if(td.parse_string(save_html)<=0)
264 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 265 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
265 if(td.clean_and_repair()<=0) 266 if(td.clean_and_repair()<=0)
266 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 267 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
267 util::tidy_buf_t tide; 268 util::tidy_buf_t tide;
268 if(td.save_buffer(tide)<=0) 269 if(td.save_buffer(tide)<=0)
269 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 270 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
270 prepare_to_parse(); 271 prepare_to_parse();
271 parse(tide.c_str(),tide.size(),true); 272 parse(tide.c_str(),tide.size(),true);
272 }catch(exception_tidy& et) { } 273 }catch(exception_tidy& et) { }
273 } 274 }
274 save_html.clear(); 275 save_html.clear();
275 } 276 }
276 277
277 void prepare_to_parse() { 278 void prepare_to_parse() {
278 (*(expat_t*)this) = parser_create_ns(); 279 (*(expat_t*)this) = parser_create_ns();
279 set_user_data(); set_element_handler(); 280 set_user_data(); set_element_handler();
280 set_character_data_handler(); 281 set_character_data_handler();
281 282
282 if(xmode&xmode_html) { 283 if(xmode&xmode_html) {
283 html_openid1.clear(); html_openid2.clear(); 284 html_openid1.clear(); html_openid2.clear();
284 parser_choked = false; 285 parser_choked = false;
285 } 286 }
286 287
287 cdata = 0; xrd_service = 0; skipping = 0; 288 cdata = 0; xrd_service = 0; skipping = 0;
288 pt_stack.clear(); 289 pt_stack.clear();
289 status_code = 100; status_string.clear(); 290 status_code = 100; status_string.clear();
290 } 291 }
291 292
292 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 293 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
293 XRD_t& x = id.xrd; 294 XRD_t& x = id.xrd;
294 if(!html_openid2.uris.empty()) { 295 if(!html_openid2.uris.empty()) {
295 html_openid2.types.insert(STURI_OPENID20); 296 html_openid2.types.insert(STURI_OPENID20);
296 x.services.add(-1,html_openid2); 297 x.services.add(-1,html_openid2);
297 queue_endpoints(oi,id,&op_service_types[st_index_2]); 298 queue_endpoints(oi,id,&op_service_types[st_index_2]);
298 } 299 }
299 if(!html_openid1.uris.empty()) { 300 if(!html_openid1.uris.empty()) {
300 html_openid1.types.insert(STURI_OPENID11); 301 html_openid1.types.insert(STURI_OPENID11);
301 x.services.add(-1,html_openid1); 302 x.services.add(-1,html_openid1);
302 queue_endpoints(oi,id,&op_service_types[st_index_1]); 303 queue_endpoints(oi,id,&op_service_types[st_index_1]);
303 } 304 }
304 } 305 }
305 306
306 size_t write(void *p,size_t s,size_t nm) { 307 size_t write(void *p,size_t s,size_t nm) {
307 /* TODO: limit total size */ 308 /* TODO: limit total size */
308 size_t bytes = s*nm; 309 size_t bytes = s*nm;
309 const char *inbuf = (const char*)p; 310 const char *inbuf = (const char*)p;
310 if(xmode&xmode_html) { 311 if(xmode&xmode_html) {