author | Michael Krelin <hacker@klever.net> | 2008-04-06 19:26:50 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-04-06 19:26:50 (UTC) |
commit | aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4 (patch) (unidiff) | |
tree | c64231565e123d678850a7b0560b348ee82b2dcc /lib/discovery.cc | |
parent | 752e484cd2fc239bc582a88fe7d62a225880ee3b (diff) | |
download | libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.zip libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.gz libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.bz2 |
fix for discovery failing on some entities coming before the tag of interest
One of the numerous opportunities to express my gratitude by means of commit
message to Joseph Smarr of Plaxo for spotting it. Thanks, Joseph!
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | lib/discovery.cc | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc index c118c80..3b90977 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc | |||
@@ -199,128 +199,129 @@ namespace opkele { | |||
199 | if(r) | 199 | if(r) |
200 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); | 200 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); |
201 | string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); | 201 | string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); |
202 | if(xrds_location.empty()) { | 202 | if(xrds_location.empty()) { |
203 | if(idis.xrd.empty()) | 203 | if(idis.xrd.empty()) |
204 | html2xrd(oi,idis); | 204 | html2xrd(oi,idis); |
205 | else{ | 205 | else{ |
206 | for(const service_type_t *st=op_service_types; | 206 | for(const service_type_t *st=op_service_types; |
207 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) | 207 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) |
208 | queue_endpoints(oi,idis,st); | 208 | queue_endpoints(oi,idis,st); |
209 | } | 209 | } |
210 | }else{ | 210 | }else{ |
211 | idis.clear(); | 211 | idis.clear(); |
212 | idis.canonicalized_id = cid; | 212 | idis.canonicalized_id = cid; |
213 | discover_at(idis,xrds_location,xmode_xrd); | 213 | discover_at(idis,xrds_location,xmode_xrd); |
214 | if(idis.xrd.empty()) | 214 | if(idis.xrd.empty()) |
215 | html2xrd(oi,idis); | 215 | html2xrd(oi,idis); |
216 | else{ | 216 | else{ |
217 | for(const service_type_t *st=op_service_types; | 217 | for(const service_type_t *st=op_service_types; |
218 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) | 218 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) |
219 | queue_endpoints(oi,idis,st); | 219 | queue_endpoints(oi,idis,st); |
220 | } | 220 | } |
221 | } | 221 | } |
222 | } | 222 | } |
223 | return rv; | 223 | return rv; |
224 | } | 224 | } |
225 | 225 | ||
226 | void discover_at(idiscovery_t& idis,const string& url,int xm) { | 226 | void discover_at(idiscovery_t& idis,const string& url,int xm) { |
227 | CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); | 227 | CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); |
228 | if(r) | 228 | if(r) |
229 | throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); | 229 | throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); |
230 | if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) | 230 | if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) |
231 | throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); | 231 | throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); |
232 | 232 | ||
233 | http_content_type.clear(); | 233 | http_content_type.clear(); |
234 | xmode = xm; | 234 | xmode = xm; |
235 | prepare_to_parse(); | 235 | prepare_to_parse(); |
236 | if(xmode&xmode_html) { | 236 | if(xmode&xmode_html) { |
237 | xrds_location.clear(); | 237 | xrds_location.clear(); |
238 | save_html.clear(); | 238 | save_html.clear(); |
239 | save_html.reserve(max_html); | 239 | save_html.reserve(max_html); |
240 | } | 240 | } |
241 | xrd = &idis.xrd; | 241 | xrd = &idis.xrd; |
242 | 242 | ||
243 | r = easy_perform(); | 243 | r = easy_perform(); |
244 | if(r && r!=CURLE_WRITE_ERROR) | 244 | if(r && r!=CURLE_WRITE_ERROR) |
245 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); | 245 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); |
246 | 246 | ||
247 | if(!parser_choked) { | 247 | if(!parser_choked) { |
248 | parse(0,0,true); | 248 | parse(0,0,true); |
249 | }else if(xmode&xmode_html){ | 249 | }else if(xmode&xmode_html){ |
250 | /* TODO: do not bother if we've seen xml */ | 250 | /* TODO: do not bother if we've seen xml */ |
251 | try { | 251 | try { |
252 | util::tidy_doc_t td = util::tidy_doc_t::create(); | 252 | util::tidy_doc_t td = util::tidy_doc_t::create(); |
253 | if(!td) | 253 | if(!td) |
254 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); | 254 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); |
255 | #ifndef NDEBUG | 255 | #ifndef NDEBUG |
256 | td.opt_set(TidyQuiet,false); | 256 | td.opt_set(TidyQuiet,false); |
257 | td.opt_set(TidyShowWarnings,false); | 257 | td.opt_set(TidyShowWarnings,false); |
258 | #endif /* NDEBUG */ | 258 | #endif /* NDEBUG */ |
259 | td.opt_set(TidyForceOutput,true); | 259 | td.opt_set(TidyForceOutput,true); |
260 | td.opt_set(TidyXhtmlOut,true); | 260 | td.opt_set(TidyXhtmlOut,true); |
261 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); | 261 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); |
262 | td.opt_set(TidyMark,false); | 262 | td.opt_set(TidyMark,false); |
263 | td.opt_set(TidyNumEntities,true); | ||
263 | if(td.parse_string(save_html)<=0) | 264 | if(td.parse_string(save_html)<=0) |
264 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); | 265 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); |
265 | if(td.clean_and_repair()<=0) | 266 | if(td.clean_and_repair()<=0) |
266 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); | 267 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); |
267 | util::tidy_buf_t tide; | 268 | util::tidy_buf_t tide; |
268 | if(td.save_buffer(tide)<=0) | 269 | if(td.save_buffer(tide)<=0) |
269 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); | 270 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); |
270 | prepare_to_parse(); | 271 | prepare_to_parse(); |
271 | parse(tide.c_str(),tide.size(),true); | 272 | parse(tide.c_str(),tide.size(),true); |
272 | }catch(exception_tidy& et) { } | 273 | }catch(exception_tidy& et) { } |
273 | } | 274 | } |
274 | save_html.clear(); | 275 | save_html.clear(); |
275 | } | 276 | } |
276 | 277 | ||
277 | void prepare_to_parse() { | 278 | void prepare_to_parse() { |
278 | (*(expat_t*)this) = parser_create_ns(); | 279 | (*(expat_t*)this) = parser_create_ns(); |
279 | set_user_data(); set_element_handler(); | 280 | set_user_data(); set_element_handler(); |
280 | set_character_data_handler(); | 281 | set_character_data_handler(); |
281 | 282 | ||
282 | if(xmode&xmode_html) { | 283 | if(xmode&xmode_html) { |
283 | html_openid1.clear(); html_openid2.clear(); | 284 | html_openid1.clear(); html_openid2.clear(); |
284 | parser_choked = false; | 285 | parser_choked = false; |
285 | } | 286 | } |
286 | 287 | ||
287 | cdata = 0; xrd_service = 0; skipping = 0; | 288 | cdata = 0; xrd_service = 0; skipping = 0; |
288 | pt_stack.clear(); | 289 | pt_stack.clear(); |
289 | status_code = 100; status_string.clear(); | 290 | status_code = 100; status_string.clear(); |
290 | } | 291 | } |
291 | 292 | ||
292 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { | 293 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { |
293 | XRD_t& x = id.xrd; | 294 | XRD_t& x = id.xrd; |
294 | if(!html_openid2.uris.empty()) { | 295 | if(!html_openid2.uris.empty()) { |
295 | html_openid2.types.insert(STURI_OPENID20); | 296 | html_openid2.types.insert(STURI_OPENID20); |
296 | x.services.add(-1,html_openid2); | 297 | x.services.add(-1,html_openid2); |
297 | queue_endpoints(oi,id,&op_service_types[st_index_2]); | 298 | queue_endpoints(oi,id,&op_service_types[st_index_2]); |
298 | } | 299 | } |
299 | if(!html_openid1.uris.empty()) { | 300 | if(!html_openid1.uris.empty()) { |
300 | html_openid1.types.insert(STURI_OPENID11); | 301 | html_openid1.types.insert(STURI_OPENID11); |
301 | x.services.add(-1,html_openid1); | 302 | x.services.add(-1,html_openid1); |
302 | queue_endpoints(oi,id,&op_service_types[st_index_1]); | 303 | queue_endpoints(oi,id,&op_service_types[st_index_1]); |
303 | } | 304 | } |
304 | } | 305 | } |
305 | 306 | ||
306 | size_t write(void *p,size_t s,size_t nm) { | 307 | size_t write(void *p,size_t s,size_t nm) { |
307 | /* TODO: limit total size */ | 308 | /* TODO: limit total size */ |
308 | size_t bytes = s*nm; | 309 | size_t bytes = s*nm; |
309 | const char *inbuf = (const char*)p; | 310 | const char *inbuf = (const char*)p; |
310 | if(xmode&xmode_html) { | 311 | if(xmode&xmode_html) { |
311 | size_t mbts = save_html.capacity()-save_html.size(); | 312 | size_t mbts = save_html.capacity()-save_html.size(); |
312 | size_t bts = 0; | 313 | size_t bts = 0; |
313 | if(mbts>0) { | 314 | if(mbts>0) { |
314 | bts = (bytes>mbts)?mbts:bytes; | 315 | bts = (bytes>mbts)?mbts:bytes; |
315 | save_html.append(inbuf,bts); | 316 | save_html.append(inbuf,bts); |
316 | } | 317 | } |
317 | if(skipping<0) return bts; | 318 | if(skipping<0) return bts; |
318 | } | 319 | } |
319 | if(skipping<0) return 0; | 320 | if(skipping<0) return 0; |
320 | bool rp = parse(inbuf,bytes,false); | 321 | bool rp = parse(inbuf,bytes,false); |
321 | if(!rp) { | 322 | if(!rp) { |
322 | parser_choked = true; | 323 | parser_choked = true; |
323 | skipping = -1; | 324 | skipping = -1; |
324 | if(!(xmode&xmode_html)) | 325 | if(!(xmode&xmode_html)) |
325 | bytes = 0; | 326 | bytes = 0; |
326 | } | 327 | } |