summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-01-05 21:47:04 (UTC)
committer Michael Krelin <hacker@klever.net>2008-01-05 22:03:59 (UTC)
commit76f52a8fd79dd12680752c017d67d4be01f0afbc (patch) (unidiff)
tree42d640112a0381707d36ac3d72e48937978d911e
parenta0719fb611507d8b9962b87c600855d8837fc266 (diff)
downloadlibopkele-76f52a8fd79dd12680752c017d67d4be01f0afbc.zip
libopkele-76f52a8fd79dd12680752c017d67d4be01f0afbc.tar.gz
libopkele-76f52a8fd79dd12680752c017d67d4be01f0afbc.tar.bz2
made more robust html discovery by using htmltidy
now when parsing document that we expect might be html we also save first 16K of the document to the buffer and if the parser choked we run the saved data through htmltidy and feed the output to the parser again. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--lib/Makefile.am4
-rw-r--r--lib/discovery.cc75
2 files changed, 66 insertions, 13 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9c73e96..989de28 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,31 +1,31 @@
1lib_LTLIBRARIES = libopkele.la 1lib_LTLIBRARIES = libopkele.la
2 2
3AM_CPPFLAGS = ${CPPFLAGS_DEBUG} 3AM_CPPFLAGS = ${CPPFLAGS_DEBUG}
4DEFAULT_INCLUDES = -I${top_builddir} 4DEFAULT_INCLUDES = -I${top_builddir}
5INCLUDES = \ 5INCLUDES = \
6 -I${top_srcdir}/include/ \ 6 -I${top_srcdir}/include/ \
7 ${KONFORKA_CFLAGS} \ 7 ${KONFORKA_CFLAGS} \
8 ${OPENSSL_CFLAGS} \ 8 ${OPENSSL_CFLAGS} \
9 ${LIBCURL_CPPFLAGS} \ 9 ${LIBCURL_CPPFLAGS} \
10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} 10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} ${TIDY_CFLAGS}
11libopkele_la_LIBADD = \ 11libopkele_la_LIBADD = \
12 ${LIBCURL} \ 12 ${LIBCURL} \
13 ${PCRE_LIBS} ${EXPAT_LIBS} \ 13 ${PCRE_LIBS} ${EXPAT_LIBS} \
14 ${OPENSSL_LIBS} \ 14 ${OPENSSL_LIBS} \
15 ${KONFORKA_LIBS} 15 ${KONFORKA_LIBS} ${TIDY_LIBS}
16 16
17libopkele_la_SOURCES = \ 17libopkele_la_SOURCES = \
18 params.cc \ 18 params.cc \
19 util.cc \ 19 util.cc \
20 server.cc \ 20 server.cc \
21 secret.cc \ 21 secret.cc \
22 data.cc \ 22 data.cc \
23 consumer.cc \ 23 consumer.cc \
24 exception.cc \ 24 exception.cc \
25 extension.cc \ 25 extension.cc \
26 sreg.cc \ 26 sreg.cc \
27 extension_chain.cc \ 27 extension_chain.cc \
28 curl.cc expat.cc \ 28 curl.cc expat.cc \
29 discovery.cc 29 discovery.cc
30libopkele_la_LDFLAGS = \ 30libopkele_la_LDFLAGS = \
31 -version-info 2:0:0 31 -version-info 2:0:0
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 8729cfb..a308b56 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -1,394 +1,447 @@
1#include <list> 1#include <list>
2#include <opkele/curl.h> 2#include <opkele/curl.h>
3#include <opkele/expat.h> 3#include <opkele/expat.h>
4#include <opkele/uris.h> 4#include <opkele/uris.h>
5#include <opkele/discovery.h> 5#include <opkele/discovery.h>
6#include <opkele/exception.h> 6#include <opkele/exception.h>
7#include <opkele/util.h> 7#include <opkele/util.h>
8#include <opkele/tidy.h>
8#include <opkele/debug.h> 9#include <opkele/debug.h>
9 10
10#include "config.h" 11#include "config.h"
11 12
12#define XRDS_HEADER "X-XRDS-Location" 13#define XRDS_HEADER "X-XRDS-Location"
13#define CT_HEADER "Content-Type" 14#define CT_HEADER "Content-Type"
14 15
15namespace opkele { 16namespace opkele {
16 using std::list; 17 using std::list;
17 using xrd::XRD_t; 18 using xrd::XRD_t;
18 using xrd::service_t; 19 using xrd::service_t;
19 20
20 static const char *whitespace = " \t\r\n"; 21 static const char *whitespace = " \t\r\n";
21 static const char *i_leaders = "=@+$!("; 22 static const char *i_leaders = "=@+$!(";
23 static const size_t max_html = 16384;
22 24
23 static inline bool is_qelement(const XML_Char *n,const char *qen) { 25 static inline bool is_qelement(const XML_Char *n,const char *qen) {
24 return !strcasecmp(n,qen); 26 return !strcasecmp(n,qen);
25 } 27 }
26 static inline bool is_element(const XML_Char *n,const char *en) { 28 static inline bool is_element(const XML_Char *n,const char *en) {
27 if(!strcasecmp(n,en)) return true; 29 if(!strcasecmp(n,en)) return true;
28 int nl = strlen(n), enl = strlen(en); 30 int nl = strlen(n), enl = strlen(en);
29 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' 31 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
30 && !strcasecmp(&n[nl-enl],en) ) 32 && !strcasecmp(&n[nl-enl],en) )
31 return true; 33 return true;
32 return false; 34 return false;
33 } 35 }
34 36
35 static long element_priority(const XML_Char **a) { 37 static long element_priority(const XML_Char **a) {
36 for(;*a;++a) 38 for(;*a;++a)
37 if(!strcasecmp(*(a++),"priority")) { 39 if(!strcasecmp(*(a++),"priority")) {
38 long rv; 40 long rv;
39 return (sscanf(*a,"%ld",&rv)==1)?rv:-1; 41 return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
40 } 42 }
41 return -1; 43 return -1;
42 } 44 }
43 45
44 class idigger_t : public util::curl_t, public util::expat_t { 46 class idigger_t : public util::curl_t, public util::expat_t {
45 public: 47 public:
46 string xri_proxy; 48 string xri_proxy;
47 49
48 enum { 50 enum {
49 xmode_html = 1, xmode_xrd = 2 51 xmode_html = 1, xmode_xrd = 2
50 }; 52 };
51 int xmode; 53 int xmode;
52 54
53 string xrds_location; 55 string xrds_location;
54 string http_content_type; 56 string http_content_type;
55 service_t html_openid1; 57 service_t html_openid1;
56 service_t html_openid2; 58 service_t html_openid2;
57 string cdata_buf; 59 string cdata_buf;
58 long status_code; 60 long status_code;
59 string status_string; 61 string status_string;
60 62
61 typedef list<string> pt_stack_t; 63 typedef list<string> pt_stack_t;
62 pt_stack_t pt_stack; 64 pt_stack_t pt_stack;
63 int skipping; 65 int skipping;
66 bool parser_choked;
67 string save_html;
64 68
65 XRD_t *xrd; 69 XRD_t *xrd;
66 service_t *xrd_service; 70 service_t *xrd_service;
67 string* cdata; 71 string* cdata;
68 72
69 idigger_t() 73 idigger_t()
70 : util::curl_t(easy_init()), 74 : util::curl_t(easy_init()),
71 util::expat_t(0), 75 util::expat_t(0),
72 xri_proxy(XRI_PROXY_URL) { 76 xri_proxy(XRI_PROXY_URL) {
73 CURLcode r; 77 CURLcode r;
74 (r=misc_sets()) 78 (r=misc_sets())
75 || (r=set_write()) 79 || (r=set_write())
76 || (r=set_header()) 80 || (r=set_header())
77 ; 81 ;
78 if(r) 82 if(r)
79 throw exception_curl(OPKELE_CP_ "failed to set curly options",r); 83 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
80 } 84 }
81 ~idigger_t() throw() { } 85 ~idigger_t() throw() { }
82 86
83 void discover(idiscovery_t& result,const string& identity) { 87 void discover(idiscovery_t& result,const string& identity) {
84 result.clear(); 88 result.clear();
85 string::size_type fsc = identity.find_first_not_of(whitespace); 89 string::size_type fsc = identity.find_first_not_of(whitespace);
86 if(fsc==string::npos) 90 if(fsc==string::npos)
87 throw bad_input(OPKELE_CP_ "whtiespace-only identity"); 91 throw bad_input(OPKELE_CP_ "whtiespace-only identity");
88 string::size_type lsc = identity.find_last_not_of(whitespace); 92 string::size_type lsc = identity.find_last_not_of(whitespace);
89 assert(lsc!=string::npos); 93 assert(lsc!=string::npos);
90 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 94 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
91 fsc += sizeof("xri://")-1; 95 fsc += sizeof("xri://")-1;
92 if((fsc+1)>=lsc) 96 if((fsc+1)>=lsc)
93 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 97 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
94 string id(identity,fsc,lsc-fsc+1); 98 string id(identity,fsc,lsc-fsc+1);
95 if(strchr(i_leaders,id[0])) { 99 if(strchr(i_leaders,id[0])) {
96 result.normalized_id = id; 100 result.normalized_id = id;
97 result.xri_identity = true; 101 result.xri_identity = true;
98 /* TODO: further canonicalize xri identity? Like folding case or whatever... */ 102 /* TODO: further canonicalize xri identity? Like folding case or whatever... */
99 discover_at( 103 discover_at(
100 result, 104 result,
101 xri_proxy + util::url_encode(id)+ 105 xri_proxy + util::url_encode(id)+
102 "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd); 106 "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd);
103 if(status_code!=100) 107 if(status_code!=100)
104 throw failed_xri_resolution(OPKELE_CP_ 108 throw failed_xri_resolution(OPKELE_CP_
105 "XRI resolution failed with '"+status_string+"' message",status_code); 109 "XRI resolution failed with '"+status_string+"' message",status_code);
106 if(result.xrd.canonical_ids.empty()) 110 if(result.xrd.canonical_ids.empty())
107 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); 111 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
108 result.canonicalized_id = result.xrd.canonical_ids.begin()->second; 112 result.canonicalized_id = result.xrd.canonical_ids.begin()->second;
109 }else{ 113 }else{
110 result.xri_identity = false; 114 result.xri_identity = false;
111 if(id.find("://")==string::npos) 115 if(id.find("://")==string::npos)
112 id.insert(0,"http://"); 116 id.insert(0,"http://");
113 string::size_type fp = id.find('#'); 117 string::size_type fp = id.find('#');
114 if(fp!=string::npos) { 118 if(fp!=string::npos) {
115 string::size_type qp = id.find('?'); 119 string::size_type qp = id.find('?');
116 if(qp==string::npos || qp<fp) 120 if(qp==string::npos || qp<fp)
117 id.erase(fp); 121 id.erase(fp);
118 else if(qp>fp) 122 else if(qp>fp)
119 id.erase(fp,qp-fp); 123 id.erase(fp,qp-fp);
120 } 124 }
121 result.normalized_id = util::rfc_3986_normalize_uri(id); 125 result.normalized_id = util::rfc_3986_normalize_uri(id);
122 discover_at(result,id,xmode_html|xmode_xrd); 126 discover_at(result,id,xmode_html|xmode_xrd);
123 const char * eu = 0; 127 const char * eu = 0;
124 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 128 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
125 if(r) 129 if(r)
126 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 130 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
127 result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */ 131 result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */
128 if(xrds_location.empty()) { 132 if(xrds_location.empty()) {
129 html2xrd(result.xrd); 133 html2xrd(result.xrd);
130 }else{ 134 }else{
131 discover_at(result,xrds_location,xmode_xrd); 135 discover_at(result,xrds_location,xmode_xrd);
132 if(result.xrd.empty()) 136 if(result.xrd.empty())
133 html2xrd(result.xrd); 137 html2xrd(result.xrd);
134 } 138 }
135 } 139 }
136 } 140 }
137 141
138 void discover_at(idiscovery_t& result,const string& url,int xm) { 142 void discover_at(idiscovery_t& result,const string& url,int xm) {
139 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str()); 143 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
140 if(r) 144 if(r)
141 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r); 145 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
142 146
143 (*(expat_t*)this) = parser_create_ns();
144 set_user_data(); set_element_handler();
145 set_character_data_handler();
146
147 http_content_type.clear(); 147 http_content_type.clear();
148 xmode = xm; 148 xmode = xm;
149 prepare_to_parse();
149 if(xmode&xmode_html) { 150 if(xmode&xmode_html) {
150 xrds_location.clear(); 151 xrds_location.clear();
151 html_openid1.clear(); html_openid2.clear(); 152 save_html.clear();
153 save_html.reserve(max_html);
152 } 154 }
153 xrd = &result.xrd; 155 xrd = &result.xrd;
154 cdata = 0; xrd_service = 0; skipping = 0;
155 status_code = 100; status_string.clear();
156 156
157 r = easy_perform(); 157 r = easy_perform();
158 if(r && r!=CURLE_WRITE_ERROR) 158 if(r && r!=CURLE_WRITE_ERROR)
159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
160 160
161 parse(0,0,true); 161 if(!parser_choked) {
162 parse(0,0,true);
163 }else{
164 /* TODO: do not bother if we've seen xml */
165 try {
166 util::tidy_doc_t td = util::tidy_doc_t::create();
167 if(!td)
168 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
169#ifndef NDEBUG
170 td.opt_set(TidyQuiet,false);
171 td.opt_set(TidyShowWarnings,false);
172 td.opt_set(TidyForceOutput,true);
173 td.opt_set(TidyXhtmlOut,true);
174 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
175 td.opt_set(TidyMark,false);
176#endif /* NDEBUG */
177 if(td.parse_string(save_html)<=0)
178 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
179 if(td.clean_and_repair()<=0)
180 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
181 util::tidy_buf_t tide;
182 if(td.save_buffer(tide)<=0)
183 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
184 prepare_to_parse();
185 parse(tide.c_str(),tide.size(),true);
186 }catch(exception_tidy& et) { }
187 }
188 save_html.clear();
189 }
190
191 void prepare_to_parse() {
192 (*(expat_t*)this) = parser_create_ns();
193 set_user_data(); set_element_handler();
194 set_character_data_handler();
195
196 if(xmode&xmode_html) {
197 html_openid1.clear(); html_openid2.clear();
198 parser_choked = false;
199 }
200
201 cdata = 0; xrd_service = 0; skipping = 0;
202 status_code = 100; status_string.clear();
162 } 203 }
163 204
164 void html2xrd(XRD_t& x) { 205 void html2xrd(XRD_t& x) {
165 if(!html_openid1.uris.empty()) { 206 if(!html_openid1.uris.empty()) {
166 html_openid1.types.insert(STURI_OPENID11); 207 html_openid1.types.insert(STURI_OPENID11);
167 x.services.add(-1,html_openid1); 208 x.services.add(-1,html_openid1);
168 } 209 }
169 if(!html_openid2.uris.empty()) { 210 if(!html_openid2.uris.empty()) {
170 html_openid2.types.insert(STURI_OPENID20); 211 html_openid2.types.insert(STURI_OPENID20);
171 x.services.add(-1,html_openid2); 212 x.services.add(-1,html_openid2);
172 } 213 }
173 } 214 }
174 215
175 size_t write(void *p,size_t s,size_t nm) { 216 size_t write(void *p,size_t s,size_t nm) {
176 if(skipping<0) return 0;
177 /* TODO: limit total size */ 217 /* TODO: limit total size */
178 size_t bytes = s*nm; 218 size_t bytes = s*nm;
179 bool rp = parse((const char *)p,bytes,false); 219 const char *inbuf = (const char*)p;
220 if(xmode&xmode_html) {
221 size_t mbts = save_html.capacity()-save_html.size();
222 size_t bts = 0;
223 if(mbts>0) {
224 bts = (bytes>mbts)?mbts:bytes;
225 save_html.append(inbuf,bts);
226 }
227 if(skipping<0) return bts;
228 }
229 if(skipping<0) return 0;
230 bool rp = parse(inbuf,bytes,false);
180 if(!rp) { 231 if(!rp) {
232 parser_choked = true;
181 skipping = -1; 233 skipping = -1;
182 bytes = 0; 234 if(!(xmode&xmode_html))
235 bytes = 0;
183 } 236 }
184 return bytes; 237 return bytes;
185 } 238 }
186 size_t header(void *p,size_t s,size_t nm) { 239 size_t header(void *p,size_t s,size_t nm) {
187 size_t bytes = s*nm; 240 size_t bytes = s*nm;
188 const char *h = (const char*)p; 241 const char *h = (const char*)p;
189 const char *colon = (const char*)memchr(p,':',bytes); 242 const char *colon = (const char*)memchr(p,':',bytes);
190 const char *space = (const char*)memchr(p,' ',bytes); 243 const char *space = (const char*)memchr(p,' ',bytes);
191 if(space && ( (!colon) || space<colon ) ) { 244 if(space && ( (!colon) || space<colon ) ) {
192 xrds_location.clear(); http_content_type.clear(); 245 xrds_location.clear(); http_content_type.clear();
193 }else if(colon) { 246 }else if(colon) {
194 const char *hv = ++colon; 247 const char *hv = ++colon;
195 int hnl = colon-h; 248 int hnl = colon-h;
196 int rb; 249 int rb;
197 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); 250 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
198 while(rb>0 && isspace(hv[rb-1])) --rb; 251 while(rb>0 && isspace(hv[rb-1])) --rb;
199 if(rb) { 252 if(rb) {
200 if( (hnl>=sizeof(XRDS_HEADER)) 253 if( (hnl>=sizeof(XRDS_HEADER))
201 && !strncasecmp(h,XRDS_HEADER":", 254 && !strncasecmp(h,XRDS_HEADER":",
202 sizeof(XRDS_HEADER)) ) { 255 sizeof(XRDS_HEADER)) ) {
203 xrds_location.assign(hv,rb); 256 xrds_location.assign(hv,rb);
204 }else if( (hnl>=sizeof(CT_HEADER)) 257 }else if( (hnl>=sizeof(CT_HEADER))
205 && !strncasecmp(h,CT_HEADER":", 258 && !strncasecmp(h,CT_HEADER":",
206 sizeof(CT_HEADER)) ) { 259 sizeof(CT_HEADER)) ) {
207 const char *sc = (const char*)memchr( 260 const char *sc = (const char*)memchr(
208 hv,';',rb); 261 hv,';',rb);
209 http_content_type.assign(hv,sc?(sc-hv):rb); 262 http_content_type.assign(hv,sc?(sc-hv):rb);
210 } 263 }
211 } 264 }
212 } 265 }
213 return curl_t::header(p,s,nm); 266 return curl_t::header(p,s,nm);
214 } 267 }
215 268
216 void start_element(const XML_Char *n,const XML_Char **a) { 269 void start_element(const XML_Char *n,const XML_Char **a) {
217 if(skipping<0) return; 270 if(skipping<0) return;
218 if(skipping) { 271 if(skipping) {
219 if(xmode&xmode_html) 272 if(xmode&xmode_html)
220 html_start_element(n,a); 273 html_start_element(n,a);
221 ++skipping; return; 274 ++skipping; return;
222 } 275 }
223 if(pt_stack.empty()) { 276 if(pt_stack.empty()) {
224 if(is_qelement(n,NSURI_XRDS "\tXRDS")) 277 if(is_qelement(n,NSURI_XRDS "\tXRDS"))
225 return; 278 return;
226 if(is_qelement(n,NSURI_XRD "\tXRD")) { 279 if(is_qelement(n,NSURI_XRD "\tXRD")) {
227 assert(xrd); 280 assert(xrd);
228 xrd->clear(); 281 xrd->clear();
229 pt_stack.push_back(n); 282 pt_stack.push_back(n);
230 }else if(xmode&xmode_html) { 283 }else if(xmode&xmode_html) {
231 html_start_element(n,a); 284 html_start_element(n,a);
232 }else{ 285 }else{
233 skipping = -1; stop_parser(); 286 skipping = -1; stop_parser();
234 } 287 }
235 }else{ 288 }else{
236 int pt_s = pt_stack.size(); 289 int pt_s = pt_stack.size();
237 if(pt_s==1) { 290 if(pt_s==1) {
238 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) { 291 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
239 assert(xrd); 292 assert(xrd);
240 cdata = &(xrd->canonical_ids.add(element_priority(a),string())); 293 cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
241 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) { 294 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
242 assert(xrd); 295 assert(xrd);
243 cdata = &(xrd->local_ids.add(element_priority(a),string())); 296 cdata = &(xrd->local_ids.add(element_priority(a),string()));
244 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { 297 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
245 assert(xrd); 298 assert(xrd);
246 cdata = &(xrd->provider_id); 299 cdata = &(xrd->provider_id);
247 }else if(is_qelement(n,NSURI_XRD "\tService")) { 300 }else if(is_qelement(n,NSURI_XRD "\tService")) {
248 assert(xrd); 301 assert(xrd);
249 xrd_service = &(xrd->services.add(element_priority(a), 302 xrd_service = &(xrd->services.add(element_priority(a),
250 service_t())); 303 service_t()));
251 pt_stack.push_back(n); 304 pt_stack.push_back(n);
252 }else if(is_qelement(n,NSURI_XRD "\tStatus")) { 305 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
253 for(;*a;) { 306 for(;*a;) {
254 if(!strcasecmp(*(a++),"code")) { 307 if(!strcasecmp(*(a++),"code")) {
255 if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) { 308 if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) {
256 cdata = &status_string; 309 cdata = &status_string;
257 pt_stack.push_back(n); 310 pt_stack.push_back(n);
258 break; 311 break;
259 } 312 }
260 } 313 }
261 } 314 }
262 }else if(is_qelement(n,NSURI_XRD "\tExpires")) { 315 }else if(is_qelement(n,NSURI_XRD "\tExpires")) {
263 assert(xrd); 316 assert(xrd);
264 cdata_buf.clear(); 317 cdata_buf.clear();
265 cdata = &cdata_buf; 318 cdata = &cdata_buf;
266 }else if(xmode&xmode_html) { 319 }else if(xmode&xmode_html) {
267 html_start_element(n,a); 320 html_start_element(n,a);
268 }else{ 321 }else{
269 skipping = 1; 322 skipping = 1;
270 } 323 }
271 }else if(pt_s==2) { 324 }else if(pt_s==2) {
272 if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) { 325 if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) {
273 if(is_qelement(n,NSURI_XRD "\tType")) { 326 if(is_qelement(n,NSURI_XRD "\tType")) {
274 assert(xrd); assert(xrd_service); 327 assert(xrd); assert(xrd_service);
275 cdata_buf.clear(); 328 cdata_buf.clear();
276 cdata = &cdata_buf; 329 cdata = &cdata_buf;
277 }else if(is_qelement(n,NSURI_XRD "\tURI")) { 330 }else if(is_qelement(n,NSURI_XRD "\tURI")) {
278 assert(xrd); assert(xrd_service); 331 assert(xrd); assert(xrd_service);
279 cdata = &(xrd_service->uris.add(element_priority(a),string())); 332 cdata = &(xrd_service->uris.add(element_priority(a),string()));
280 }else if(is_qelement(n,NSURI_XRD "\tLocalID") 333 }else if(is_qelement(n,NSURI_XRD "\tLocalID")
281 || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) { 334 || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) {
282 assert(xrd); assert(xrd_service); 335 assert(xrd); assert(xrd_service);
283 cdata = &(xrd_service->local_ids.add(element_priority(a),string())); 336 cdata = &(xrd_service->local_ids.add(element_priority(a),string()));
284 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { 337 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
285 assert(xrd); assert(xrd_service); 338 assert(xrd); assert(xrd_service);
286 cdata = &(xrd_service->provider_id); 339 cdata = &(xrd_service->provider_id);
287 }else{ 340 }else{
288 skipping = 1; 341 skipping = 1;
289 } 342 }
290 }else 343 }else
291 skipping = 1; 344 skipping = 1;
292 }else if(xmode&xmode_html) { 345 }else if(xmode&xmode_html) {
293 html_start_element(n,a); 346 html_start_element(n,a);
294 }else{ 347 }else{
295 skipping = 1; 348 skipping = 1;
296 } 349 }
297 } 350 }
298 } 351 }
299 void end_element(const XML_Char *n) { 352 void end_element(const XML_Char *n) {
300 if(skipping<0) return; 353 if(skipping<0) return;
301 if(skipping) { 354 if(skipping) {
302 --skipping; return; 355 --skipping; return;
303 } 356 }
304 if(is_qelement(n,NSURI_XRD "\tType")) { 357 if(is_qelement(n,NSURI_XRD "\tType")) {
305 assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf); 358 assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf);
306 xrd_service->types.insert(cdata_buf); 359 xrd_service->types.insert(cdata_buf);
307 }else if(is_qelement(n,NSURI_XRD "\tService")) { 360 }else if(is_qelement(n,NSURI_XRD "\tService")) {
308 assert(xrd); assert(xrd_service); 361 assert(xrd); assert(xrd_service);
309 assert(!pt_stack.empty()); 362 assert(!pt_stack.empty());
310 assert(pt_stack.back()==(NSURI_XRD "\tService")); 363 assert(pt_stack.back()==(NSURI_XRD "\tService"));
311 pt_stack.pop_back(); 364 pt_stack.pop_back();
312 xrd_service = 0; 365 xrd_service = 0;
313 }else if(is_qelement(n,NSURI_XRD "\tStatus")) { 366 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
314 assert(xrd); 367 assert(xrd);
315 if(is_qelement(pt_stack.back().c_str(),n)) { 368 if(is_qelement(pt_stack.back().c_str(),n)) {
316 assert(cdata==&status_string); 369 assert(cdata==&status_string);
317 pt_stack.pop_back(); 370 pt_stack.pop_back();
318 if(status_code!=100) { 371 if(status_code!=100) {
319 skipping = -1; stop_parser(); 372 skipping = -1; stop_parser();
320 } 373 }
321 } 374 }
322 }else if(is_qelement(n,NSURI_XRD "\tExpires")) { 375 }else if(is_qelement(n,NSURI_XRD "\tExpires")) {
323 assert(xrd); 376 assert(xrd);
324 xrd->expires = util::w3c_to_time(cdata_buf); 377 xrd->expires = util::w3c_to_time(cdata_buf);
325 }else if((xmode&xmode_html) && is_element(n,"head")) { 378 }else if((xmode&xmode_html) && is_element(n,"head")) {
326 skipping = -1; stop_parser(); 379 skipping = -1; stop_parser();
327 } 380 }
328 cdata = 0; 381 cdata = 0;
329 } 382 }
330 void character_data(const XML_Char *s,int l) { 383 void character_data(const XML_Char *s,int l) {
331 if(skipping) return; 384 if(skipping) return;
332 if(cdata) cdata->append(s,l); 385 if(cdata) cdata->append(s,l);
333 } 386 }
334 387
335 void html_start_element(const XML_Char *n,const XML_Char **a) { 388 void html_start_element(const XML_Char *n,const XML_Char **a) {
336 if(is_element(n,"meta")) { 389 if(is_element(n,"meta")) {
337 bool heq = false; 390 bool heq = false;
338 string l; 391 string l;
339 for(;*a;a+=2) { 392 for(;*a;a+=2) {
340 if(!( strcasecmp(a[0],"http-equiv") 393 if(!( strcasecmp(a[0],"http-equiv")
341 || strcasecmp(a[1],XRDS_HEADER) )) 394 || strcasecmp(a[1],XRDS_HEADER) ))
342 heq = true; 395 heq = true;
343 else if(!strcasecmp(a[0],"content")) 396 else if(!strcasecmp(a[0],"content"))
344 l.assign(a[1]); 397 l.assign(a[1]);
345 } 398 }
346 if(heq) 399 if(heq)
347 xrds_location = l; 400 xrds_location = l;
348 }else if(is_element(n,"link")) { 401 }else if(is_element(n,"link")) {
349 string rels; 402 string rels;
350 string href; 403 string href;
351 for(;*a;a+=2) { 404 for(;*a;a+=2) {
352 if( !strcasecmp(a[0],"rel") ) { 405 if( !strcasecmp(a[0],"rel") ) {
353 rels.assign(a[1]); 406 rels.assign(a[1]);
354 }else if( !strcasecmp(a[0],"href") ) { 407 }else if( !strcasecmp(a[0],"href") ) {
355 const char *ns = a[1]; 408 const char *ns = a[1];
356 for(;*ns && isspace(*ns);++ns); 409 for(;*ns && isspace(*ns);++ns);
357 href.assign(ns); 410 href.assign(ns);
358 string::size_type lns=href.find_last_not_of(whitespace); 411 string::size_type lns=href.find_last_not_of(whitespace);
359 href.erase(lns+1); 412 href.erase(lns+1);
360 } 413 }
361 } 414 }
362 for(string::size_type ns=rels.find_first_not_of(whitespace); 415 for(string::size_type ns=rels.find_first_not_of(whitespace);
363 ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { 416 ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) {
364 string::size_type s = rels.find_first_of(whitespace,ns); 417 string::size_type s = rels.find_first_of(whitespace,ns);
365 string rel; 418 string rel;
366 if(s==string::npos) { 419 if(s==string::npos) {
367 rel.assign(rels,ns,string::npos); 420 rel.assign(rels,ns,string::npos);
368 ns = string::npos; 421 ns = string::npos;
369 }else{ 422 }else{
370 rel.assign(rels,ns,s-ns); 423 rel.assign(rels,ns,s-ns);
371 ns = s; 424 ns = s;
372 } 425 }
373 if(rel=="openid.server") 426 if(rel=="openid.server")
374 html_openid1.uris.add(-1,href); 427 html_openid1.uris.add(-1,href);
375 else if(rel=="openid.delegate") 428 else if(rel=="openid.delegate")
376 html_openid1.local_ids.add(-1,href); 429 html_openid1.local_ids.add(-1,href);
377 else if(rel=="openid2.provider") 430 else if(rel=="openid2.provider")
378 html_openid2.uris.add(-1,href); 431 html_openid2.uris.add(-1,href);
379 else if(rel=="openid2.local_id") 432 else if(rel=="openid2.local_id")
380 html_openid2.local_ids.add(-1,href); 433 html_openid2.local_ids.add(-1,href);
381 } 434 }
382 }else if(is_element(n,"body")) { 435 }else if(is_element(n,"body")) {
383 skipping = -1; stop_parser(); 436 skipping = -1; stop_parser();
384 } 437 }
385 } 438 }
386 439
387 }; 440 };
388 441
389 void idiscover(idiscovery_t& result,const string& identity) { 442 void idiscover(idiscovery_t& result,const string& identity) {
390 idigger_t idigger; 443 idigger_t idigger;
391 idigger.discover(result,identity); 444 idigger.discover(result,identity);
392 } 445 }
393 446
394} 447}