summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
committer Michael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
commitaa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4 (patch) (unidiff)
treec64231565e123d678850a7b0560b348ee82b2dcc
parent752e484cd2fc239bc582a88fe7d62a225880ee3b (diff)
downloadlibopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.zip
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.gz
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.bz2
fix for discovery failing on some entities coming before the tag of interest
One of the numerous opportunities to express my gratitude by means of commit message to Joseph Smarr of Plaxo for spotting it. Thanks, Joseph! Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--lib/discovery.cc1
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index c118c80..3b90977 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -199,128 +199,129 @@ namespace opkele {
199 if(r) 199 if(r)
200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); 201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) );
202 if(xrds_location.empty()) { 202 if(xrds_location.empty()) {
203 if(idis.xrd.empty()) 203 if(idis.xrd.empty())
204 html2xrd(oi,idis); 204 html2xrd(oi,idis);
205 else{ 205 else{
206 for(const service_type_t *st=op_service_types; 206 for(const service_type_t *st=op_service_types;
207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
208 queue_endpoints(oi,idis,st); 208 queue_endpoints(oi,idis,st);
209 } 209 }
210 }else{ 210 }else{
211 idis.clear(); 211 idis.clear();
212 idis.canonicalized_id = cid; 212 idis.canonicalized_id = cid;
213 discover_at(idis,xrds_location,xmode_xrd); 213 discover_at(idis,xrds_location,xmode_xrd);
214 if(idis.xrd.empty()) 214 if(idis.xrd.empty())
215 html2xrd(oi,idis); 215 html2xrd(oi,idis);
216 else{ 216 else{
217 for(const service_type_t *st=op_service_types; 217 for(const service_type_t *st=op_service_types;
218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
219 queue_endpoints(oi,idis,st); 219 queue_endpoints(oi,idis,st);
220 } 220 }
221 } 221 }
222 } 222 }
223 return rv; 223 return rv;
224 } 224 }
225 225
226 void discover_at(idiscovery_t& idis,const string& url,int xm) { 226 void discover_at(idiscovery_t& idis,const string& url,int xm) {
227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
228 if(r) 228 if(r)
229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
232 232
233 http_content_type.clear(); 233 http_content_type.clear();
234 xmode = xm; 234 xmode = xm;
235 prepare_to_parse(); 235 prepare_to_parse();
236 if(xmode&xmode_html) { 236 if(xmode&xmode_html) {
237 xrds_location.clear(); 237 xrds_location.clear();
238 save_html.clear(); 238 save_html.clear();
239 save_html.reserve(max_html); 239 save_html.reserve(max_html);
240 } 240 }
241 xrd = &idis.xrd; 241 xrd = &idis.xrd;
242 242
243 r = easy_perform(); 243 r = easy_perform();
244 if(r && r!=CURLE_WRITE_ERROR) 244 if(r && r!=CURLE_WRITE_ERROR)
245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
246 246
247 if(!parser_choked) { 247 if(!parser_choked) {
248 parse(0,0,true); 248 parse(0,0,true);
249 }else if(xmode&xmode_html){ 249 }else if(xmode&xmode_html){
250 /* TODO: do not bother if we've seen xml */ 250 /* TODO: do not bother if we've seen xml */
251 try { 251 try {
252 util::tidy_doc_t td = util::tidy_doc_t::create(); 252 util::tidy_doc_t td = util::tidy_doc_t::create();
253 if(!td) 253 if(!td)
254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
255#ifndef NDEBUG 255#ifndef NDEBUG
256 td.opt_set(TidyQuiet,false); 256 td.opt_set(TidyQuiet,false);
257 td.opt_set(TidyShowWarnings,false); 257 td.opt_set(TidyShowWarnings,false);
258#endif /* NDEBUG */ 258#endif /* NDEBUG */
259 td.opt_set(TidyForceOutput,true); 259 td.opt_set(TidyForceOutput,true);
260 td.opt_set(TidyXhtmlOut,true); 260 td.opt_set(TidyXhtmlOut,true);
261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
262 td.opt_set(TidyMark,false); 262 td.opt_set(TidyMark,false);
263 td.opt_set(TidyNumEntities,true);
263 if(td.parse_string(save_html)<=0) 264 if(td.parse_string(save_html)<=0)
264 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 265 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
265 if(td.clean_and_repair()<=0) 266 if(td.clean_and_repair()<=0)
266 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 267 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
267 util::tidy_buf_t tide; 268 util::tidy_buf_t tide;
268 if(td.save_buffer(tide)<=0) 269 if(td.save_buffer(tide)<=0)
269 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 270 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
270 prepare_to_parse(); 271 prepare_to_parse();
271 parse(tide.c_str(),tide.size(),true); 272 parse(tide.c_str(),tide.size(),true);
272 }catch(exception_tidy& et) { } 273 }catch(exception_tidy& et) { }
273 } 274 }
274 save_html.clear(); 275 save_html.clear();
275 } 276 }
276 277
277 void prepare_to_parse() { 278 void prepare_to_parse() {
278 (*(expat_t*)this) = parser_create_ns(); 279 (*(expat_t*)this) = parser_create_ns();
279 set_user_data(); set_element_handler(); 280 set_user_data(); set_element_handler();
280 set_character_data_handler(); 281 set_character_data_handler();
281 282
282 if(xmode&xmode_html) { 283 if(xmode&xmode_html) {
283 html_openid1.clear(); html_openid2.clear(); 284 html_openid1.clear(); html_openid2.clear();
284 parser_choked = false; 285 parser_choked = false;
285 } 286 }
286 287
287 cdata = 0; xrd_service = 0; skipping = 0; 288 cdata = 0; xrd_service = 0; skipping = 0;
288 pt_stack.clear(); 289 pt_stack.clear();
289 status_code = 100; status_string.clear(); 290 status_code = 100; status_string.clear();
290 } 291 }
291 292
292 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 293 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
293 XRD_t& x = id.xrd; 294 XRD_t& x = id.xrd;
294 if(!html_openid2.uris.empty()) { 295 if(!html_openid2.uris.empty()) {
295 html_openid2.types.insert(STURI_OPENID20); 296 html_openid2.types.insert(STURI_OPENID20);
296 x.services.add(-1,html_openid2); 297 x.services.add(-1,html_openid2);
297 queue_endpoints(oi,id,&op_service_types[st_index_2]); 298 queue_endpoints(oi,id,&op_service_types[st_index_2]);
298 } 299 }
299 if(!html_openid1.uris.empty()) { 300 if(!html_openid1.uris.empty()) {
300 html_openid1.types.insert(STURI_OPENID11); 301 html_openid1.types.insert(STURI_OPENID11);
301 x.services.add(-1,html_openid1); 302 x.services.add(-1,html_openid1);
302 queue_endpoints(oi,id,&op_service_types[st_index_1]); 303 queue_endpoints(oi,id,&op_service_types[st_index_1]);
303 } 304 }
304 } 305 }
305 306
306 size_t write(void *p,size_t s,size_t nm) { 307 size_t write(void *p,size_t s,size_t nm) {
307 /* TODO: limit total size */ 308 /* TODO: limit total size */
308 size_t bytes = s*nm; 309 size_t bytes = s*nm;
309 const char *inbuf = (const char*)p; 310 const char *inbuf = (const char*)p;
310 if(xmode&xmode_html) { 311 if(xmode&xmode_html) {
311 size_t mbts = save_html.capacity()-save_html.size(); 312 size_t mbts = save_html.capacity()-save_html.size();
312 size_t bts = 0; 313 size_t bts = 0;
313 if(mbts>0) { 314 if(mbts>0) {
314 bts = (bytes>mbts)?mbts:bytes; 315 bts = (bytes>mbts)?mbts:bytes;
315 save_html.append(inbuf,bts); 316 save_html.append(inbuf,bts);
316 } 317 }
317 if(skipping<0) return bts; 318 if(skipping<0) return bts;
318 } 319 }
319 if(skipping<0) return 0; 320 if(skipping<0) return 0;
320 bool rp = parse(inbuf,bytes,false); 321 bool rp = parse(inbuf,bytes,false);
321 if(!rp) { 322 if(!rp) {
322 parser_choked = true; 323 parser_choked = true;
323 skipping = -1; 324 skipping = -1;
324 if(!(xmode&xmode_html)) 325 if(!(xmode&xmode_html))
325 bytes = 0; 326 bytes = 0;
326 } 327 }