summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-01-06 23:00:18 (UTC)
committer Michael Krelin <hacker@klever.net>2008-01-06 23:05:57 (UTC)
commit7b5a82f255a85fe2ef466b68e40d9eb1829c633a (patch) (side-by-side diff)
treee466813db17bd974de0b0d277f35219c393c5796
parent6a843bb112988976892058c56d5fe2f24e24300a (diff)
downloadlibopkele-7b5a82f255a85fe2ef466b68e40d9eb1829c633a.zip
libopkele-7b5a82f255a85fe2ef466b68e40d9eb1829c633a.tar.gz
libopkele-7b5a82f255a85fe2ef466b68e40d9eb1829c633a.tar.bz2
Revert "discovery: stop parser with XML_StopParser()" for now
This reverts commit 7bde7f66284b47a75bbceadc360e7f03550ace21. because some older expat version do not support this function I don't want to make it conditional for now. Conflicts: lib/discovery.cc Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--include/opkele/expat.h6
-rw-r--r--lib/discovery.cc11
2 files changed, 7 insertions, 10 deletions
diff --git a/include/opkele/expat.h b/include/opkele/expat.h
index 97ed61a..60c41ac 100644
--- a/include/opkele/expat.h
+++ b/include/opkele/expat.h
@@ -1,93 +1,91 @@
#ifndef __OPKELE_EXPAT_H
#define __OPKELE_EXPAT_H
#include <cassert>
#include <expat.h>
namespace opkele {
namespace util {
class expat_t {
public:
XML_Parser _x;
expat_t() : _x(0) { }
expat_t(XML_Parser x) : _x(x) { }
virtual ~expat_t() throw();
expat_t& operator=(XML_Parser x);
operator const XML_Parser(void) const { return _x; }
operator XML_Parser(void) { return _x; }
inline bool parse(const char *s,int len,bool final=false) {
assert(_x);
- return XML_Parse(_x,s,len,final); }
- enum XML_Status stop_parser(bool resumable=false) {
- assert(_x);
- return XML_StopParser(_x,resumable); }
+ return XML_Parse(_x,s,len,final);
+ }
virtual void start_element(const XML_Char *n,const XML_Char **a) { }
virtual void end_element(const XML_Char *n) { }
void set_element_handler();
virtual void character_data(const XML_Char *s,int l) { }
void set_character_data_handler();
virtual void processing_instruction(const XML_Char *t,const XML_Char *d) { }
void set_processing_instruction_handler();
virtual void comment(const XML_Char *d) { }
void set_comment_handler();
virtual void start_cdata_section() { }
virtual void end_cdata_section() { }
void set_cdata_section_handler();
virtual void default_handler(const XML_Char *s,int l) { }
void set_default_handler();
void set_default_handler_expand();
virtual void start_namespace_decl(const XML_Char *p,const XML_Char *u) { }
virtual void end_namespace_decl(const XML_Char *p) { }
void set_namespace_decl_handler();
inline enum XML_Error get_error_code() {
assert(_x); return XML_GetErrorCode(_x); }
static inline const XML_LChar *error_string(XML_Error c) {
return XML_ErrorString(c); }
inline long get_current_byte_index() {
assert(_x); return XML_GetCurrentByteIndex(_x); }
inline int get_current_line_number() {
assert(_x); return XML_GetCurrentLineNumber(_x); }
inline int get_current_column_number() {
assert(_x); return XML_GetCurrentColumnNumber(_x); }
inline void set_user_data() {
assert(_x); XML_SetUserData(_x,this); }
inline bool set_base(const XML_Char *b) {
assert(_x); return XML_SetBase(_x,b); }
inline const XML_Char *get_base() {
assert(_x); return XML_GetBase(_x); }
inline int get_specified_attribute_count() {
assert(_x); return XML_GetSpecifiedAttributeCount(_x); }
inline bool set_param_entity_parsing(enum XML_ParamEntityParsing c) {
assert(_x); return XML_SetParamEntityParsing(_x,c); }
inline static XML_Parser parser_create(const XML_Char *e=0) {
return XML_ParserCreate(e); }
inline static XML_Parser parser_create_ns(const XML_Char *e=0,XML_Char s='\t') {
return XML_ParserCreateNS(e,s); }
};
}
}
#endif /* __OPKELE_EXPAT_H */
diff --git a/lib/discovery.cc b/lib/discovery.cc
index af9686a..d868308 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -30,418 +30,417 @@ namespace opkele {
int nl = strlen(n), enl = strlen(en);
if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
&& !strcasecmp(&n[nl-enl],en) )
return true;
return false;
}
static long element_priority(const XML_Char **a) {
for(;*a;++a)
if(!strcasecmp(*(a++),"priority")) {
long rv;
return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
}
return -1;
}
class idigger_t : public util::curl_t, public util::expat_t {
public:
string xri_proxy;
enum {
xmode_html = 1, xmode_xrd = 2
};
int xmode;
string xrds_location;
string http_content_type;
service_t html_openid1;
service_t html_openid2;
string cdata_buf;
long status_code;
string status_string;
typedef list<string> pt_stack_t;
pt_stack_t pt_stack;
int skipping;
bool parser_choked;
string save_html;
XRD_t *xrd;
service_t *xrd_service;
string* cdata;
idigger_t()
: util::curl_t(easy_init()),
util::expat_t(0),
xri_proxy(XRI_PROXY_URL) {
CURLcode r;
(r=misc_sets())
|| (r=set_write())
|| (r=set_header())
;
if(r)
throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
}
~idigger_t() throw() { }
void discover(idiscovery_t& result,const string& identity) {
result.clear();
string::size_type fsc = identity.find_first_not_of(whitespace);
if(fsc==string::npos)
throw bad_input(OPKELE_CP_ "whtiespace-only identity");
string::size_type lsc = identity.find_last_not_of(whitespace);
assert(lsc!=string::npos);
if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
fsc += sizeof("xri://")-1;
if((fsc+1)>=lsc)
throw bad_input(OPKELE_CP_ "not a character of importance in identity");
string id(identity,fsc,lsc-fsc+1);
if(strchr(i_leaders,id[0])) {
result.normalized_id = id;
result.xri_identity = true;
/* TODO: further canonicalize xri identity? Like folding case or whatever... */
discover_at(
result,
xri_proxy + util::url_encode(id)+
"?_xrd_r=application/xrd+xml;sep=false", xmode_xrd);
if(status_code!=100)
throw failed_xri_resolution(OPKELE_CP_
"XRI resolution failed with '"+status_string+"' message",status_code);
if(result.xrd.canonical_ids.empty())
throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
result.canonicalized_id = result.xrd.canonical_ids.begin()->second;
}else{
result.xri_identity = false;
if(id.find("://")==string::npos)
id.insert(0,"http://");
string::size_type fp = id.find('#');
if(fp!=string::npos) {
string::size_type qp = id.find('?');
if(qp==string::npos || qp<fp)
id.erase(fp);
else if(qp>fp)
id.erase(fp,qp-fp);
}
result.normalized_id = util::rfc_3986_normalize_uri(id);
discover_at(result,id,xmode_html|xmode_xrd);
const char * eu = 0;
CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
if(r)
throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */
if(xrds_location.empty()) {
html2xrd(result.xrd);
}else{
discover_at(result,xrds_location,xmode_xrd);
if(result.xrd.empty())
html2xrd(result.xrd);
}
}
}
void discover_at(idiscovery_t& result,const string& url,int xm) {
CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
if(r)
throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
http_content_type.clear();
xmode = xm;
prepare_to_parse();
if(xmode&xmode_html) {
xrds_location.clear();
save_html.clear();
save_html.reserve(max_html);
}
xrd = &result.xrd;
r = easy_perform();
if(r && r!=CURLE_WRITE_ERROR)
throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
if(!parser_choked) {
parse(0,0,true);
}else{
/* TODO: do not bother if we've seen xml */
try {
util::tidy_doc_t td = util::tidy_doc_t::create();
if(!td)
throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
#ifndef NDEBUG
td.opt_set(TidyQuiet,false);
td.opt_set(TidyShowWarnings,false);
#endif /* NDEBUG */
td.opt_set(TidyForceOutput,true);
td.opt_set(TidyXhtmlOut,true);
td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
td.opt_set(TidyMark,false);
if(td.parse_string(save_html)<=0)
throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
if(td.clean_and_repair()<=0)
throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
util::tidy_buf_t tide;
if(td.save_buffer(tide)<=0)
throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
prepare_to_parse();
parse(tide.c_str(),tide.size(),true);
}catch(exception_tidy& et) { }
}
save_html.clear();
}
void prepare_to_parse() {
(*(expat_t*)this) = parser_create_ns();
set_user_data(); set_element_handler();
set_character_data_handler();
if(xmode&xmode_html) {
html_openid1.clear(); html_openid2.clear();
parser_choked = false;
}
cdata = 0; xrd_service = 0; skipping = 0;
status_code = 100; status_string.clear();
}
void html2xrd(XRD_t& x) {
if(!html_openid1.uris.empty()) {
html_openid1.types.insert(STURI_OPENID11);
x.services.add(-1,html_openid1);
}
if(!html_openid2.uris.empty()) {
html_openid2.types.insert(STURI_OPENID20);
x.services.add(-1,html_openid2);
}
}
size_t write(void *p,size_t s,size_t nm) {
/* TODO: limit total size */
size_t bytes = s*nm;
const char *inbuf = (const char*)p;
if(xmode&xmode_html) {
size_t mbts = save_html.capacity()-save_html.size();
size_t bts = 0;
if(mbts>0) {
bts = (bytes>mbts)?mbts:bytes;
save_html.append(inbuf,bts);
}
if(skipping<0) return bts;
}
if(skipping<0) return 0;
bool rp = parse(inbuf,bytes,false);
if(!rp) {
parser_choked = true;
skipping = -1;
if(!(xmode&xmode_html))
bytes = 0;
}
return bytes;
}
size_t header(void *p,size_t s,size_t nm) {
size_t bytes = s*nm;
const char *h = (const char*)p;
const char *colon = (const char*)memchr(p,':',bytes);
const char *space = (const char*)memchr(p,' ',bytes);
if(space && ( (!colon) || space<colon ) ) {
xrds_location.clear(); http_content_type.clear();
}else if(colon) {
const char *hv = ++colon;
int hnl = colon-h;
int rb;
for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
while(rb>0 && isspace(hv[rb-1])) --rb;
if(rb) {
if( (hnl>=sizeof(XRDS_HEADER))
&& !strncasecmp(h,XRDS_HEADER":",
sizeof(XRDS_HEADER)) ) {
xrds_location.assign(hv,rb);
}else if( (hnl>=sizeof(CT_HEADER))
&& !strncasecmp(h,CT_HEADER":",
sizeof(CT_HEADER)) ) {
const char *sc = (const char*)memchr(
hv,';',rb);
http_content_type.assign(hv,sc?(sc-hv):rb);
}
}
}
return curl_t::header(p,s,nm);
}
void start_element(const XML_Char *n,const XML_Char **a) {
if(skipping<0) return;
if(skipping) {
if(xmode&xmode_html)
html_start_element(n,a);
++skipping; return;
}
if(pt_stack.empty()) {
if(is_qelement(n,NSURI_XRDS "\tXRDS"))
return;
if(is_qelement(n,NSURI_XRD "\tXRD")) {
assert(xrd);
xrd->clear();
pt_stack.push_back(n);
}else if(xmode&xmode_html) {
html_start_element(n,a);
}else{
- skipping = -1; stop_parser();
+ skipping = -1;
}
}else{
int pt_s = pt_stack.size();
if(pt_s==1) {
if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
assert(xrd);
cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
}else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
assert(xrd);
cdata = &(xrd->local_ids.add(element_priority(a),string()));
}else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
assert(xrd);
cdata = &(xrd->provider_id);
}else if(is_qelement(n,NSURI_XRD "\tService")) {
assert(xrd);
xrd_service = &(xrd->services.add(element_priority(a),
service_t()));
pt_stack.push_back(n);
}else if(is_qelement(n,NSURI_XRD "\tStatus")) {
for(;*a;) {
if(!strcasecmp(*(a++),"code")) {
if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) {
cdata = &status_string;
pt_stack.push_back(n);
break;
}
}
}
}else if(is_qelement(n,NSURI_XRD "\tExpires")) {
assert(xrd);
cdata_buf.clear();
cdata = &cdata_buf;
}else if(xmode&xmode_html) {
html_start_element(n,a);
}else{
skipping = 1;
}
}else if(pt_s==2) {
if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) {
if(is_qelement(n,NSURI_XRD "\tType")) {
assert(xrd); assert(xrd_service);
cdata_buf.clear();
cdata = &cdata_buf;
}else if(is_qelement(n,NSURI_XRD "\tURI")) {
assert(xrd); assert(xrd_service);
cdata = &(xrd_service->uris.add(element_priority(a),string()));
}else if(is_qelement(n,NSURI_XRD "\tLocalID")
|| is_qelement(n,NSURI_OPENID10 "\tDelegate") ) {
assert(xrd); assert(xrd_service);
cdata = &(xrd_service->local_ids.add(element_priority(a),string()));
}else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
assert(xrd); assert(xrd_service);
cdata = &(xrd_service->provider_id);
}else{
skipping = 1;
}
}else
skipping = 1;
}else if(xmode&xmode_html) {
html_start_element(n,a);
}else{
skipping = 1;
}
}
}
void end_element(const XML_Char *n) {
if(skipping<0) return;
if(skipping) {
--skipping; return;
}
if(is_qelement(n,NSURI_XRD "\tType")) {
assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf);
xrd_service->types.insert(cdata_buf);
}else if(is_qelement(n,NSURI_XRD "\tService")) {
assert(xrd); assert(xrd_service);
assert(!pt_stack.empty());
assert(pt_stack.back()==(NSURI_XRD "\tService"));
pt_stack.pop_back();
xrd_service = 0;
}else if(is_qelement(n,NSURI_XRD "\tStatus")) {
assert(xrd);
if(is_qelement(pt_stack.back().c_str(),n)) {
assert(cdata==&status_string);
pt_stack.pop_back();
- if(status_code!=100) {
- skipping = -1; stop_parser();
- }
+ if(status_code!=100)
+ skipping = -1;
}
}else if(is_qelement(n,NSURI_XRD "\tExpires")) {
assert(xrd);
xrd->expires = util::w3c_to_time(cdata_buf);
}else if((xmode&xmode_html) && is_element(n,"head")) {
- skipping = -1; stop_parser();
+ skipping = -1;
}
cdata = 0;
}
void character_data(const XML_Char *s,int l) {
if(skipping) return;
if(cdata) cdata->append(s,l);
}
void html_start_element(const XML_Char *n,const XML_Char **a) {
if(is_element(n,"meta")) {
bool heq = false;
string l;
for(;*a;a+=2) {
if(!( strcasecmp(a[0],"http-equiv")
|| strcasecmp(a[1],XRDS_HEADER) ))
heq = true;
else if(!strcasecmp(a[0],"content"))
l.assign(a[1]);
}
if(heq)
xrds_location = l;
}else if(is_element(n,"link")) {
string rels;
string href;
for(;*a;a+=2) {
if( !strcasecmp(a[0],"rel") ) {
rels.assign(a[1]);
}else if( !strcasecmp(a[0],"href") ) {
const char *ns = a[1];
for(;*ns && isspace(*ns);++ns);
href.assign(ns);
string::size_type lns=href.find_last_not_of(whitespace);
href.erase(lns+1);
}
}
for(string::size_type ns=rels.find_first_not_of(whitespace);
ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) {
string::size_type s = rels.find_first_of(whitespace,ns);
string rel;
if(s==string::npos) {
rel.assign(rels,ns,string::npos);
ns = string::npos;
}else{
rel.assign(rels,ns,s-ns);
ns = s;
}
if(rel=="openid.server")
html_openid1.uris.add(-1,href);
else if(rel=="openid.delegate")
html_openid1.local_ids.add(-1,href);
else if(rel=="openid2.provider")
html_openid2.uris.add(-1,href);
else if(rel=="openid2.local_id")
html_openid2.local_ids.add(-1,href);
}
}else if(is_element(n,"body")) {
- skipping = -1; stop_parser();
+ skipping = -1;
}
}
};
void idiscover(idiscovery_t& result,const string& identity) {
idigger_t idigger;
idigger.discover(result,identity);
}
}