author | Michael Krelin <hacker@klever.net> | 2008-12-02 21:36:54 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2008-12-02 21:36:54 (UTC) |
commit | a2a524f744849e81ec91a7afbaf641d1b57e14b1 (patch) (unidiff) | |
tree | d023154d9485179fdd3001d5816a0ebec05e5d5c | |
parent | 47bf5ab634dc5cc3bd5d71894b2d6681f7a83024 (diff) | |
download | libopkele-a2a524f744849e81ec91a7afbaf641d1b57e14b1.zip libopkele-a2a524f744849e81ec91a7afbaf641d1b57e14b1.tar.gz libopkele-a2a524f744849e81ec91a7afbaf641d1b57e14b1.tar.bz2 |
added passthrough unknown encoding handler to discovery parser
-rw-r--r-- | lib/discovery.cc | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc index bd1f917..cbdceb3 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc | |||
@@ -238,96 +238,97 @@ namespace opkele { | |||
238 | xrds_location.clear(); | 238 | xrds_location.clear(); |
239 | save_html.clear(); | 239 | save_html.clear(); |
240 | save_html.reserve(max_html); | 240 | save_html.reserve(max_html); |
241 | } | 241 | } |
242 | xrd = &idis.xrd; | 242 | xrd = &idis.xrd; |
243 | 243 | ||
244 | r = easy_perform(); | 244 | r = easy_perform(); |
245 | if(r && r!=CURLE_WRITE_ERROR) | 245 | if(r && r!=CURLE_WRITE_ERROR) |
246 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); | 246 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); |
247 | 247 | ||
248 | if(!parser_choked) { | 248 | if(!parser_choked) { |
249 | parse(0,0,true); | 249 | parse(0,0,true); |
250 | }else if(xmode&xmode_html){ | 250 | }else if(xmode&xmode_html){ |
251 | /* TODO: do not bother if we've seen xml */ | 251 | /* TODO: do not bother if we've seen xml */ |
252 | try { | 252 | try { |
253 | util::tidy_doc_t td = util::tidy_doc_t::create(); | 253 | util::tidy_doc_t td = util::tidy_doc_t::create(); |
254 | if(!td) | 254 | if(!td) |
255 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); | 255 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); |
256 | #ifdef NDEBUG | 256 | #ifdef NDEBUG |
257 | td.opt_set(TidyQuiet,true); | 257 | td.opt_set(TidyQuiet,true); |
258 | td.opt_set(TidyShowWarnings,false); | 258 | td.opt_set(TidyShowWarnings,false); |
259 | #else /* NDEBUG */ | 259 | #else /* NDEBUG */ |
260 | td.opt_set(TidyQuiet,false); | 260 | td.opt_set(TidyQuiet,false); |
261 | td.opt_set(TidyShowWarnings,true); | 261 | td.opt_set(TidyShowWarnings,true); |
262 | #endif /* NDEBUG */ | 262 | #endif /* NDEBUG */ |
263 | td.opt_set(TidyForceOutput,true); | 263 | td.opt_set(TidyForceOutput,true); |
264 | td.opt_set(TidyXhtmlOut,true); | 264 | td.opt_set(TidyXhtmlOut,true); |
265 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); | 265 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); |
266 | td.opt_set(TidyMark,false); | 266 | td.opt_set(TidyMark,false); |
267 | td.opt_set(TidyNumEntities,true); | 267 | td.opt_set(TidyNumEntities,true); |
268 | if(td.parse_string(save_html)<=0) | 268 | if(td.parse_string(save_html)<=0) |
269 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); | 269 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); |
270 | if(td.clean_and_repair()<=0) | 270 | if(td.clean_and_repair()<=0) |
271 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); | 271 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); |
272 | util::tidy_buf_t tide; | 272 | util::tidy_buf_t tide; |
273 | if(td.save_buffer(tide)<=0) | 273 | if(td.save_buffer(tide)<=0) |
274 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); | 274 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); |
275 | prepare_to_parse(); | 275 | prepare_to_parse(); |
276 | parse(tide.c_str(),tide.size(),true); | 276 | parse(tide.c_str(),tide.size(),true); |
277 | }catch(exception_tidy& et) { } | 277 | }catch(exception_tidy& et) { } |
278 | } | 278 | } |
279 | save_html.clear(); | 279 | save_html.clear(); |
280 | } | 280 | } |
281 | 281 | ||
282 | void prepare_to_parse() { | 282 | void prepare_to_parse() { |
283 | (*(expat_t*)this) = parser_create_ns(); | 283 | (*(expat_t*)this) = parser_create_ns(); |
284 | set_user_data(); set_element_handler(); | 284 | set_user_data(); set_element_handler(); |
285 | set_character_data_handler(); | 285 | set_character_data_handler(); |
286 | set_unknown_encoding_handler(); | ||
286 | 287 | ||
287 | if(xmode&xmode_html) { | 288 | if(xmode&xmode_html) { |
288 | html_openid1.clear(); html_openid2.clear(); | 289 | html_openid1.clear(); html_openid2.clear(); |
289 | parser_choked = false; | 290 | parser_choked = false; |
290 | } | 291 | } |
291 | 292 | ||
292 | cdata = 0; xrd_service = 0; skipping = 0; | 293 | cdata = 0; xrd_service = 0; skipping = 0; |
293 | pt_stack.clear(); | 294 | pt_stack.clear(); |
294 | status_code = 100; status_string.clear(); | 295 | status_code = 100; status_string.clear(); |
295 | } | 296 | } |
296 | 297 | ||
297 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { | 298 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { |
298 | XRD_t& x = id.xrd; | 299 | XRD_t& x = id.xrd; |
299 | if(!html_openid2.uris.empty()) { | 300 | if(!html_openid2.uris.empty()) { |
300 | html_openid2.types.insert(STURI_OPENID20); | 301 | html_openid2.types.insert(STURI_OPENID20); |
301 | x.services.add(-1,html_openid2); | 302 | x.services.add(-1,html_openid2); |
302 | queue_endpoints(oi,id,&op_service_types[st_index_2]); | 303 | queue_endpoints(oi,id,&op_service_types[st_index_2]); |
303 | } | 304 | } |
304 | if(!html_openid1.uris.empty()) { | 305 | if(!html_openid1.uris.empty()) { |
305 | html_openid1.types.insert(STURI_OPENID11); | 306 | html_openid1.types.insert(STURI_OPENID11); |
306 | x.services.add(-1,html_openid1); | 307 | x.services.add(-1,html_openid1); |
307 | queue_endpoints(oi,id,&op_service_types[st_index_1]); | 308 | queue_endpoints(oi,id,&op_service_types[st_index_1]); |
308 | } | 309 | } |
309 | } | 310 | } |
310 | 311 | ||
311 | size_t write(void *p,size_t s,size_t nm) { | 312 | size_t write(void *p,size_t s,size_t nm) { |
312 | /* TODO: limit total size */ | 313 | /* TODO: limit total size */ |
313 | size_t bytes = s*nm; | 314 | size_t bytes = s*nm; |
314 | const char *inbuf = (const char*)p; | 315 | const char *inbuf = (const char*)p; |
315 | if(xmode&xmode_html) { | 316 | if(xmode&xmode_html) { |
316 | size_t mbts = save_html.capacity()-save_html.size(); | 317 | size_t mbts = save_html.capacity()-save_html.size(); |
317 | size_t bts = 0; | 318 | size_t bts = 0; |
318 | if(mbts>0) { | 319 | if(mbts>0) { |
319 | bts = (bytes>mbts)?mbts:bytes; | 320 | bts = (bytes>mbts)?mbts:bytes; |
320 | save_html.append(inbuf,bts); | 321 | save_html.append(inbuf,bts); |
321 | } | 322 | } |
322 | if(skipping<0) return bts; | 323 | if(skipping<0) return bts; |
323 | } | 324 | } |
324 | if(skipping<0) return 0; | 325 | if(skipping<0) return 0; |
325 | bool rp = parse(inbuf,bytes,false); | 326 | bool rp = parse(inbuf,bytes,false); |
326 | if(!rp) { | 327 | if(!rp) { |
327 | parser_choked = true; | 328 | parser_choked = true; |
328 | skipping = -1; | 329 | skipping = -1; |
329 | if(!(xmode&xmode_html)) | 330 | if(!(xmode&xmode_html)) |
330 | bytes = 0; | 331 | bytes = 0; |
331 | } | 332 | } |
332 | return bytes; | 333 | return bytes; |
333 | } | 334 | } |
@@ -524,64 +525,71 @@ namespace opkele { | |||
524 | html_openid1.uris.add(-1,xrd::uri_t(href)); | 525 | html_openid1.uris.add(-1,xrd::uri_t(href)); |
525 | else if(rel=="openid.delegate") | 526 | else if(rel=="openid.delegate") |
526 | html_openid1.local_ids.add(-1,href); | 527 | html_openid1.local_ids.add(-1,href); |
527 | else if(rel=="openid2.provider") | 528 | else if(rel=="openid2.provider") |
528 | html_openid2.uris.add(-1,xrd::uri_t(href)); | 529 | html_openid2.uris.add(-1,xrd::uri_t(href)); |
529 | else if(rel=="openid2.local_id") | 530 | else if(rel=="openid2.local_id") |
530 | html_openid2.local_ids.add(-1,href); | 531 | html_openid2.local_ids.add(-1,href); |
531 | } | 532 | } |
532 | }else if(is_element(n,"body")) { | 533 | }else if(is_element(n,"body")) { |
533 | skipping = -1; | 534 | skipping = -1; |
534 | } | 535 | } |
535 | } | 536 | } |
536 | 537 | ||
537 | void queue_endpoints(endpoint_discovery_iterator& oi, | 538 | void queue_endpoints(endpoint_discovery_iterator& oi, |
538 | const idiscovery_t &id, | 539 | const idiscovery_t &id, |
539 | const service_type_t *st) { | 540 | const service_type_t *st) { |
540 | openid_endpoint_t ep; | 541 | openid_endpoint_t ep; |
541 | ep.claimed_id = id.canonicalized_id; | 542 | ep.claimed_id = id.canonicalized_id; |
542 | for(xrd::services_t::const_iterator isvc=id.xrd.services.begin(); | 543 | for(xrd::services_t::const_iterator isvc=id.xrd.services.begin(); |
543 | isvc!=id.xrd.services.end(); ++isvc) { | 544 | isvc!=id.xrd.services.end(); ++isvc) { |
544 | const xrd::service_t svc = isvc->second; | 545 | const xrd::service_t svc = isvc->second; |
545 | if(svc.types.find(st->uri)==svc.types.end()) continue; | 546 | if(svc.types.find(st->uri)==svc.types.end()) continue; |
546 | for(xrd::uris_t::const_iterator iu=svc.uris.begin();iu!=svc.uris.end();++iu) { | 547 | for(xrd::uris_t::const_iterator iu=svc.uris.begin();iu!=svc.uris.end();++iu) { |
547 | ep.uri = iu->second.uri; | 548 | ep.uri = iu->second.uri; |
548 | if(id.xri_identity) { | 549 | if(id.xri_identity) { |
549 | if(iu->second.append=="qxri") { | 550 | if(iu->second.append=="qxri") { |
550 | ep.uri += id.normalized_id; | 551 | ep.uri += id.normalized_id; |
551 | } /* TODO: else handle other append attribute values */ | 552 | } /* TODO: else handle other append attribute values */ |
552 | } | 553 | } |
553 | if(st->forceid) { | 554 | if(st->forceid) { |
554 | ep.local_id = ep.claimed_id = st->forceid; | 555 | ep.local_id = ep.claimed_id = st->forceid; |
555 | *(oi++) = ep; | 556 | *(oi++) = ep; |
556 | }else{ | 557 | }else{ |
557 | if(svc.local_ids.empty()) { | 558 | if(svc.local_ids.empty()) { |
558 | ep.local_id = ep.claimed_id; | 559 | ep.local_id = ep.claimed_id; |
559 | *(oi++) = ep; | 560 | *(oi++) = ep; |
560 | }else{ | 561 | }else{ |
561 | for(xrd::local_ids_t::const_iterator ilid=svc.local_ids.begin(); | 562 | for(xrd::local_ids_t::const_iterator ilid=svc.local_ids.begin(); |
562 | ilid!=svc.local_ids.end(); ++ilid) { | 563 | ilid!=svc.local_ids.end(); ++ilid) { |
563 | ep.local_id = ilid->second; | 564 | ep.local_id = ilid->second; |
564 | *(oi++) = ep; | 565 | *(oi++) = ep; |
565 | } | 566 | } |
566 | } | 567 | } |
567 | } | 568 | } |
568 | } | 569 | } |
569 | } | 570 | } |
570 | } | 571 | } |
571 | 572 | ||
573 | int unknown_encoding(const XML_Char *n,XML_Encoding *i) { | ||
574 | for(int ii=0;ii < sizeof(i->map)/sizeof(i->map[0]);++ii) | ||
575 | i->map[ii] = ii; | ||
576 | i->convert = 0; i->release = 0; | ||
577 | return XML_STATUS_OK; | ||
578 | } | ||
579 | |||
572 | }; | 580 | }; |
573 | 581 | ||
574 | string idiscover(endpoint_discovery_iterator oi,const string& identity) { | 582 | string idiscover(endpoint_discovery_iterator oi,const string& identity) { |
575 | idigger_t idigger; | 583 | idigger_t idigger; |
576 | return idigger.discover(oi,identity); | 584 | return idigger.discover(oi,identity); |
577 | } | 585 | } |
578 | 586 | ||
579 | void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) try { | 587 | void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) try { |
580 | idigger_t idigger; | 588 | idigger_t idigger; |
581 | idigger.yadiscover(oi,yurl,types,redirs); | 589 | idigger.yadiscover(oi,yurl,types,redirs); |
582 | }catch(exception_curl& ec) { | 590 | }catch(exception_curl& ec) { |
583 | if(redirs || ec._error!=CURLE_TOO_MANY_REDIRECTS) | 591 | if(redirs || ec._error!=CURLE_TOO_MANY_REDIRECTS) |
584 | throw; | 592 | throw; |
585 | } | 593 | } |
586 | 594 | ||
587 | } | 595 | } |