summaryrefslogtreecommitdiffabout
path: root/lib
authorMichael Krelin <hacker@klever.net>2008-06-25 21:24:53 (UTC)
committer Michael Krelin <hacker@klever.net>2008-06-25 21:24:53 (UTC)
commit2744ea9808c3a6c50f8be78a9a71f6d3b2ba7e00 (patch) (unidiff)
treea9c124efee2b4edbeaeb646f1b94928bdc4e8c4c /lib
parent04aac0e097992df01e17fbbb62a5a7d9682bcea9 (diff)
downloadlibopkele-2744ea9808c3a6c50f8be78a9a71f6d3b2ba7e00.zip
libopkele-2744ea9808c3a6c50f8be78a9a71f6d3b2ba7e00.tar.gz
libopkele-2744ea9808c3a6c50f8be78a9a71f6d3b2ba7e00.tar.bz2
improved detection of htmltidy for debian and co.
Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc3
1 files changed, 2 insertions, 1 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 3b90977..d1989ec 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -1,583 +1,584 @@
1#include <list> 1#include <list>
2#include <opkele/curl.h> 2#include <opkele/curl.h>
3#include <opkele/expat.h> 3#include <opkele/expat.h>
4#include <opkele/uris.h> 4#include <opkele/uris.h>
5#include <opkele/discovery.h> 5#include <opkele/discovery.h>
6#include <opkele/exception.h> 6#include <opkele/exception.h>
7#include <opkele/util.h> 7#include <opkele/util.h>
8#include <opkele/tidy.h>
9#include <opkele/data.h> 8#include <opkele/data.h>
10#include <opkele/debug.h> 9#include <opkele/debug.h>
11 10
12#include "config.h" 11#include "config.h"
13 12
13#include <opkele/tidy.h>
14
14#define XRDS_HEADER "X-XRDS-Location" 15#define XRDS_HEADER "X-XRDS-Location"
15#define CT_HEADER "Content-Type" 16#define CT_HEADER "Content-Type"
16 17
17namespace opkele { 18namespace opkele {
18 using std::list; 19 using std::list;
19 using xrd::XRD_t; 20 using xrd::XRD_t;
20 using xrd::service_t; 21 using xrd::service_t;
21 22
22 /* TODO: the whole discovery thing needs cleanup and optimization due to 23 /* TODO: the whole discovery thing needs cleanup and optimization due to
23 * many changes of concept. */ 24 * many changes of concept. */
24 25
25 static const size_t max_html = 16384; 26 static const size_t max_html = 16384;
26 27
27 static const struct service_type_t { 28 static const struct service_type_t {
28 const char *uri; 29 const char *uri;
29 const char *forceid; 30 const char *forceid;
30 } op_service_types[] = { 31 } op_service_types[] = {
31 { STURI_OPENID20_OP, IDURI_SELECT20 }, 32 { STURI_OPENID20_OP, IDURI_SELECT20 },
32 { STURI_OPENID20, 0 }, 33 { STURI_OPENID20, 0 },
33 { STURI_OPENID11, 0 }, 34 { STURI_OPENID11, 0 },
34 { STURI_OPENID10, 0 } 35 { STURI_OPENID10, 0 }
35 }; 36 };
36 enum { 37 enum {
37 st_index_1 = 2, st_index_2 = 1 38 st_index_1 = 2, st_index_2 = 1
38 }; 39 };
39 40
40 41
41 static inline bool is_qelement(const XML_Char *n,const char *qen) { 42 static inline bool is_qelement(const XML_Char *n,const char *qen) {
42 return !strcasecmp(n,qen); 43 return !strcasecmp(n,qen);
43 } 44 }
44 static inline bool is_element(const XML_Char *n,const char *en) { 45 static inline bool is_element(const XML_Char *n,const char *en) {
45 if(!strcasecmp(n,en)) return true; 46 if(!strcasecmp(n,en)) return true;
46 int nl = strlen(n), enl = strlen(en); 47 int nl = strlen(n), enl = strlen(en);
47 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' 48 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
48 && !strcasecmp(&n[nl-enl],en) ) 49 && !strcasecmp(&n[nl-enl],en) )
49 return true; 50 return true;
50 return false; 51 return false;
51 } 52 }
52 53
53 static long element_priority(const XML_Char **a) { 54 static long element_priority(const XML_Char **a) {
54 for(;*a;++a) 55 for(;*a;++a)
55 if(!strcasecmp(*(a++),"priority")) { 56 if(!strcasecmp(*(a++),"priority")) {
56 long rv; 57 long rv;
57 return (sscanf(*a,"%ld",&rv)==1)?rv:-1; 58 return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
58 } 59 }
59 return -1; 60 return -1;
60 } 61 }
61 /* TODO: ideally all attributes should be 62 /* TODO: ideally all attributes should be
62 * retrieved in one run */ 63 * retrieved in one run */
63 static const char *element_attr(const XML_Char **a, const char *at) { 64 static const char *element_attr(const XML_Char **a, const char *at) {
64 for(;*a;++a) 65 for(;*a;++a)
65 if(!strcasecmp(*(a++),at)) { 66 if(!strcasecmp(*(a++),at)) {
66 return *a; 67 return *a;
67 } 68 }
68 return 0; 69 return 0;
69 } 70 }
70 71
71 class idigger_t : public util::curl_t, public util::expat_t { 72 class idigger_t : public util::curl_t, public util::expat_t {
72 public: 73 public:
73 string xri_proxy; 74 string xri_proxy;
74 75
75 enum { 76 enum {
76 xmode_html = 1, xmode_xrd = 2, xmode_cid = 4, 77 xmode_html = 1, xmode_xrd = 2, xmode_cid = 4,
77 xmode_noredirs = 8 78 xmode_noredirs = 8
78 }; 79 };
79 int xmode; 80 int xmode;
80 81
81 string xrds_location; 82 string xrds_location;
82 string http_content_type; 83 string http_content_type;
83 service_t html_openid1; 84 service_t html_openid1;
84 service_t html_openid2; 85 service_t html_openid2;
85 string cdata_buf; 86 string cdata_buf;
86 long status_code; 87 long status_code;
87 string status_string; 88 string status_string;
88 89
89 typedef list<string> pt_stack_t; 90 typedef list<string> pt_stack_t;
90 pt_stack_t pt_stack; 91 pt_stack_t pt_stack;
91 int skipping; 92 int skipping;
92 bool parser_choked; 93 bool parser_choked;
93 string save_html; 94 string save_html;
94 95
95 XRD_t *xrd; 96 XRD_t *xrd;
96 service_t *xrd_service; 97 service_t *xrd_service;
97 string* cdata; 98 string* cdata;
98 99
99 idigger_t() 100 idigger_t()
100 : util::curl_t(easy_init()), 101 : util::curl_t(easy_init()),
101 util::expat_t(0), 102 util::expat_t(0),
102 xri_proxy(XRI_PROXY_URL) { 103 xri_proxy(XRI_PROXY_URL) {
103 CURLcode r; 104 CURLcode r;
104 (r=misc_sets()) 105 (r=misc_sets())
105 || (r=set_write()) 106 || (r=set_write())
106 || (r=set_header()) 107 || (r=set_header())
107 ; 108 ;
108 if(r) 109 if(r)
109 throw exception_curl(OPKELE_CP_ "failed to set curly options",r); 110 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
110 } 111 }
111 ~idigger_t() throw() { } 112 ~idigger_t() throw() { }
112 113
113 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) { 114 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) {
114 idiscovery_t idis; 115 idiscovery_t idis;
115 idis.xri_identity = false; 116 idis.xri_identity = false;
116 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs)); 117 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs));
117 if(!xrds_location.empty()) { 118 if(!xrds_location.empty()) {
118 idis.clear(); 119 idis.clear();
119 discover_at(idis,xrds_location,xmode_xrd); 120 discover_at(idis,xrds_location,xmode_xrd);
120 } 121 }
121 idis.normalized_id = idis.canonicalized_id = yurl; 122 idis.normalized_id = idis.canonicalized_id = yurl;
122 service_type_t st; 123 service_type_t st;
123 for(st.uri=*types;*types;st.uri=*(++types)) 124 for(st.uri=*types;*types;st.uri=*(++types))
124 queue_endpoints(oi,idis,&st); 125 queue_endpoints(oi,idis,&st);
125 } 126 }
126 127
127 string discover(endpoint_discovery_iterator& oi,const string& identity) { 128 string discover(endpoint_discovery_iterator& oi,const string& identity) {
128 string rv; 129 string rv;
129 idiscovery_t idis; 130 idiscovery_t idis;
130 string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars); 131 string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars);
131 if(fsc==string::npos) 132 if(fsc==string::npos)
132 throw bad_input(OPKELE_CP_ "whitespace-only identity"); 133 throw bad_input(OPKELE_CP_ "whitespace-only identity");
133 string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars); 134 string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars);
134 assert(lsc!=string::npos); 135 assert(lsc!=string::npos);
135 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 136 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
136 fsc += sizeof("xri://")-1; 137 fsc += sizeof("xri://")-1;
137 if((fsc+1)>=lsc) 138 if((fsc+1)>=lsc)
138 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 139 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
139 string id(identity,fsc,lsc-fsc+1); 140 string id(identity,fsc,lsc-fsc+1);
140 idis.clear(); 141 idis.clear();
141 if(strchr(data::_iname_leaders,id[0])) { 142 if(strchr(data::_iname_leaders,id[0])) {
142 /* TODO: further normalize xri identity? Like folding case 143 /* TODO: further normalize xri identity? Like folding case
143 * or whatever... */ 144 * or whatever... */
144 rv = id; 145 rv = id;
145 set<string> cids; 146 set<string> cids;
146 for(const struct service_type_t *st=op_service_types; 147 for(const struct service_type_t *st=op_service_types;
147 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) { 148 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) {
148 idis.clear(); 149 idis.clear();
149 discover_at( idis, 150 discover_at( idis,
150 xri_proxy + util::url_encode(id)+ 151 xri_proxy + util::url_encode(id)+
151 "?_xrd_t="+util::url_encode(st->uri)+ 152 "?_xrd_t="+util::url_encode(st->uri)+
152 "&_xrd_r=application/xrd%2Bxml" 153 "&_xrd_r=application/xrd%2Bxml"
153 ";sep=true;refs=true", 154 ";sep=true;refs=true",
154 xmode_xrd ); 155 xmode_xrd );
155 if(status_code==241) continue; 156 if(status_code==241) continue;
156 if(status_code!=100) 157 if(status_code!=100)
157 throw failed_xri_resolution(OPKELE_CP_ 158 throw failed_xri_resolution(OPKELE_CP_
158 "XRI resolution failed with '"+status_string+"' message" 159 "XRI resolution failed with '"+status_string+"' message"
159 ", while looking for SEP with type '"+st->uri+"'", status_code); 160 ", while looking for SEP with type '"+st->uri+"'", status_code);
160 if(idis.xrd.canonical_ids.empty()) 161 if(idis.xrd.canonical_ids.empty())
161 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); 162 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
162 string cid = idis.xrd.canonical_ids.begin()->second; 163 string cid = idis.xrd.canonical_ids.begin()->second;
163 if(cids.find(cid)==cids.end()) { 164 if(cids.find(cid)==cids.end()) {
164 cids.insert(cid); 165 cids.insert(cid);
165 idis.clear(); 166 idis.clear();
166 discover_at( idis, 167 discover_at( idis,
167 xri_proxy + util::url_encode(id)+ 168 xri_proxy + util::url_encode(id)+
168 "?_xrd_t="+util::url_encode(st->uri)+ 169 "?_xrd_t="+util::url_encode(st->uri)+
169 "&_xrd_r=application/xrd%2Bxml" 170 "&_xrd_r=application/xrd%2Bxml"
170 ";sep=true;refs=true", 171 ";sep=true;refs=true",
171 xmode_xrd ); 172 xmode_xrd );
172 if(status_code==241) continue; 173 if(status_code==241) continue;
173 if(status_code!=100) 174 if(status_code!=100)
174 throw failed_xri_resolution(OPKELE_CP_ 175 throw failed_xri_resolution(OPKELE_CP_
175 "XRI resolution failed with '"+status_string+"' message" 176 "XRI resolution failed with '"+status_string+"' message"
176 ", while looking for SEP with type '"+st->uri+"'" 177 ", while looking for SEP with type '"+st->uri+"'"
177 " on canonical id", status_code); 178 " on canonical id", status_code);
178 } 179 }
179 idis.canonicalized_id = cid; 180 idis.canonicalized_id = cid;
180 idis.normalized_id = rv; idis.xri_identity = true; 181 idis.normalized_id = rv; idis.xri_identity = true;
181 queue_endpoints(oi,idis,st); 182 queue_endpoints(oi,idis,st);
182 } 183 }
183 }else{ 184 }else{
184 idis.xri_identity = false; 185 idis.xri_identity = false;
185 if(id.find("://")==string::npos) 186 if(id.find("://")==string::npos)
186 id.insert(0,"http://"); 187 id.insert(0,"http://");
187 string::size_type fp = id.find('#'); 188 string::size_type fp = id.find('#');
188 if(fp!=string::npos) { 189 if(fp!=string::npos) {
189 string::size_type qp = id.find('?'); 190 string::size_type qp = id.find('?');
190 if(qp==string::npos || qp<fp) 191 if(qp==string::npos || qp<fp)
191 id.erase(fp); 192 id.erase(fp);
192 else if(qp>fp) 193 else if(qp>fp)
193 id.erase(fp,qp-fp); 194 id.erase(fp,qp-fp);
194 } 195 }
195 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); 196 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id);
196 discover_at(idis,id,xmode_html|xmode_xrd); 197 discover_at(idis,id,xmode_html|xmode_xrd);
197 const char * eu = 0; 198 const char * eu = 0;
198 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 199 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
199 if(r) 200 if(r)
200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 201 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); 202 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) );
202 if(xrds_location.empty()) { 203 if(xrds_location.empty()) {
203 if(idis.xrd.empty()) 204 if(idis.xrd.empty())
204 html2xrd(oi,idis); 205 html2xrd(oi,idis);
205 else{ 206 else{
206 for(const service_type_t *st=op_service_types; 207 for(const service_type_t *st=op_service_types;
207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 208 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
208 queue_endpoints(oi,idis,st); 209 queue_endpoints(oi,idis,st);
209 } 210 }
210 }else{ 211 }else{
211 idis.clear(); 212 idis.clear();
212 idis.canonicalized_id = cid; 213 idis.canonicalized_id = cid;
213 discover_at(idis,xrds_location,xmode_xrd); 214 discover_at(idis,xrds_location,xmode_xrd);
214 if(idis.xrd.empty()) 215 if(idis.xrd.empty())
215 html2xrd(oi,idis); 216 html2xrd(oi,idis);
216 else{ 217 else{
217 for(const service_type_t *st=op_service_types; 218 for(const service_type_t *st=op_service_types;
218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 219 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
219 queue_endpoints(oi,idis,st); 220 queue_endpoints(oi,idis,st);
220 } 221 }
221 } 222 }
222 } 223 }
223 return rv; 224 return rv;
224 } 225 }
225 226
226 void discover_at(idiscovery_t& idis,const string& url,int xm) { 227 void discover_at(idiscovery_t& idis,const string& url,int xm) {
227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 228 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
228 if(r) 229 if(r)
229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 230 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 231 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 232 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
232 233
233 http_content_type.clear(); 234 http_content_type.clear();
234 xmode = xm; 235 xmode = xm;
235 prepare_to_parse(); 236 prepare_to_parse();
236 if(xmode&xmode_html) { 237 if(xmode&xmode_html) {
237 xrds_location.clear(); 238 xrds_location.clear();
238 save_html.clear(); 239 save_html.clear();
239 save_html.reserve(max_html); 240 save_html.reserve(max_html);
240 } 241 }
241 xrd = &idis.xrd; 242 xrd = &idis.xrd;
242 243
243 r = easy_perform(); 244 r = easy_perform();
244 if(r && r!=CURLE_WRITE_ERROR) 245 if(r && r!=CURLE_WRITE_ERROR)
245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 246 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
246 247
247 if(!parser_choked) { 248 if(!parser_choked) {
248 parse(0,0,true); 249 parse(0,0,true);
249 }else if(xmode&xmode_html){ 250 }else if(xmode&xmode_html){
250 /* TODO: do not bother if we've seen xml */ 251 /* TODO: do not bother if we've seen xml */
251 try { 252 try {
252 util::tidy_doc_t td = util::tidy_doc_t::create(); 253 util::tidy_doc_t td = util::tidy_doc_t::create();
253 if(!td) 254 if(!td)
254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 255 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
255#ifndef NDEBUG 256#ifndef NDEBUG
256 td.opt_set(TidyQuiet,false); 257 td.opt_set(TidyQuiet,false);
257 td.opt_set(TidyShowWarnings,false); 258 td.opt_set(TidyShowWarnings,false);
258#endif /* NDEBUG */ 259#endif /* NDEBUG */
259 td.opt_set(TidyForceOutput,true); 260 td.opt_set(TidyForceOutput,true);
260 td.opt_set(TidyXhtmlOut,true); 261 td.opt_set(TidyXhtmlOut,true);
261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 262 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
262 td.opt_set(TidyMark,false); 263 td.opt_set(TidyMark,false);
263 td.opt_set(TidyNumEntities,true); 264 td.opt_set(TidyNumEntities,true);
264 if(td.parse_string(save_html)<=0) 265 if(td.parse_string(save_html)<=0)
265 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 266 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
266 if(td.clean_and_repair()<=0) 267 if(td.clean_and_repair()<=0)
267 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 268 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
268 util::tidy_buf_t tide; 269 util::tidy_buf_t tide;
269 if(td.save_buffer(tide)<=0) 270 if(td.save_buffer(tide)<=0)
270 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 271 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
271 prepare_to_parse(); 272 prepare_to_parse();
272 parse(tide.c_str(),tide.size(),true); 273 parse(tide.c_str(),tide.size(),true);
273 }catch(exception_tidy& et) { } 274 }catch(exception_tidy& et) { }
274 } 275 }
275 save_html.clear(); 276 save_html.clear();
276 } 277 }
277 278
278 void prepare_to_parse() { 279 void prepare_to_parse() {
279 (*(expat_t*)this) = parser_create_ns(); 280 (*(expat_t*)this) = parser_create_ns();
280 set_user_data(); set_element_handler(); 281 set_user_data(); set_element_handler();
281 set_character_data_handler(); 282 set_character_data_handler();
282 283
283 if(xmode&xmode_html) { 284 if(xmode&xmode_html) {
284 html_openid1.clear(); html_openid2.clear(); 285 html_openid1.clear(); html_openid2.clear();
285 parser_choked = false; 286 parser_choked = false;
286 } 287 }
287 288
288 cdata = 0; xrd_service = 0; skipping = 0; 289 cdata = 0; xrd_service = 0; skipping = 0;
289 pt_stack.clear(); 290 pt_stack.clear();
290 status_code = 100; status_string.clear(); 291 status_code = 100; status_string.clear();
291 } 292 }
292 293
293 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 294 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
294 XRD_t& x = id.xrd; 295 XRD_t& x = id.xrd;
295 if(!html_openid2.uris.empty()) { 296 if(!html_openid2.uris.empty()) {
296 html_openid2.types.insert(STURI_OPENID20); 297 html_openid2.types.insert(STURI_OPENID20);
297 x.services.add(-1,html_openid2); 298 x.services.add(-1,html_openid2);
298 queue_endpoints(oi,id,&op_service_types[st_index_2]); 299 queue_endpoints(oi,id,&op_service_types[st_index_2]);
299 } 300 }
300 if(!html_openid1.uris.empty()) { 301 if(!html_openid1.uris.empty()) {
301 html_openid1.types.insert(STURI_OPENID11); 302 html_openid1.types.insert(STURI_OPENID11);
302 x.services.add(-1,html_openid1); 303 x.services.add(-1,html_openid1);
303 queue_endpoints(oi,id,&op_service_types[st_index_1]); 304 queue_endpoints(oi,id,&op_service_types[st_index_1]);
304 } 305 }
305 } 306 }
306 307
307 size_t write(void *p,size_t s,size_t nm) { 308 size_t write(void *p,size_t s,size_t nm) {
308 /* TODO: limit total size */ 309 /* TODO: limit total size */
309 size_t bytes = s*nm; 310 size_t bytes = s*nm;
310 const char *inbuf = (const char*)p; 311 const char *inbuf = (const char*)p;
311 if(xmode&xmode_html) { 312 if(xmode&xmode_html) {
312 size_t mbts = save_html.capacity()-save_html.size(); 313 size_t mbts = save_html.capacity()-save_html.size();
313 size_t bts = 0; 314 size_t bts = 0;
314 if(mbts>0) { 315 if(mbts>0) {
315 bts = (bytes>mbts)?mbts:bytes; 316 bts = (bytes>mbts)?mbts:bytes;
316 save_html.append(inbuf,bts); 317 save_html.append(inbuf,bts);
317 } 318 }
318 if(skipping<0) return bts; 319 if(skipping<0) return bts;
319 } 320 }
320 if(skipping<0) return 0; 321 if(skipping<0) return 0;
321 bool rp = parse(inbuf,bytes,false); 322 bool rp = parse(inbuf,bytes,false);
322 if(!rp) { 323 if(!rp) {
323 parser_choked = true; 324 parser_choked = true;
324 skipping = -1; 325 skipping = -1;
325 if(!(xmode&xmode_html)) 326 if(!(xmode&xmode_html))
326 bytes = 0; 327 bytes = 0;
327 } 328 }
328 return bytes; 329 return bytes;
329 } 330 }
330 size_t header(void *p,size_t s,size_t nm) { 331 size_t header(void *p,size_t s,size_t nm) {
331 size_t bytes = s*nm; 332 size_t bytes = s*nm;
332 const char *h = (const char*)p; 333 const char *h = (const char*)p;
333 const char *colon = (const char*)memchr(p,':',bytes); 334 const char *colon = (const char*)memchr(p,':',bytes);
334 const char *space = (const char*)memchr(p,' ',bytes); 335 const char *space = (const char*)memchr(p,' ',bytes);
335 if(space && ( (!colon) || space<colon ) ) { 336 if(space && ( (!colon) || space<colon ) ) {
336 xrds_location.clear(); http_content_type.clear(); 337 xrds_location.clear(); http_content_type.clear();
337 }else if(colon) { 338 }else if(colon) {
338 const char *hv = ++colon; 339 const char *hv = ++colon;
339 size_t hnl = colon-h; 340 size_t hnl = colon-h;
340 int rb; 341 int rb;
341 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); 342 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
342 while(rb>0 && isspace(hv[rb-1])) --rb; 343 while(rb>0 && isspace(hv[rb-1])) --rb;
343 if(rb) { 344 if(rb) {
344 if( (hnl>=sizeof(XRDS_HEADER)) 345 if( (hnl>=sizeof(XRDS_HEADER))
345 && !strncasecmp(h,XRDS_HEADER":", 346 && !strncasecmp(h,XRDS_HEADER":",
346 sizeof(XRDS_HEADER)) ) { 347 sizeof(XRDS_HEADER)) ) {
347 xrds_location.assign(hv,rb); 348 xrds_location.assign(hv,rb);
348 }else if( (hnl>=sizeof(CT_HEADER)) 349 }else if( (hnl>=sizeof(CT_HEADER))
349 && !strncasecmp(h,CT_HEADER":", 350 && !strncasecmp(h,CT_HEADER":",
350 sizeof(CT_HEADER)) ) { 351 sizeof(CT_HEADER)) ) {
351 const char *sc = (const char*)memchr( 352 const char *sc = (const char*)memchr(
352 hv,';',rb); 353 hv,';',rb);
353 http_content_type.assign(hv,sc?(sc-hv):rb); 354 http_content_type.assign(hv,sc?(sc-hv):rb);
354 } 355 }
355 } 356 }
356 } 357 }
357 return curl_t::header(p,s,nm); 358 return curl_t::header(p,s,nm);
358 } 359 }
359 360
360 void start_element(const XML_Char *n,const XML_Char **a) { 361 void start_element(const XML_Char *n,const XML_Char **a) {
361 if(skipping<0) return; 362 if(skipping<0) return;
362 if(skipping) { 363 if(skipping) {
363 if(xmode&xmode_html) 364 if(xmode&xmode_html)
364 html_start_element(n,a); 365 html_start_element(n,a);
365 ++skipping; return; 366 ++skipping; return;
366 } 367 }
367 if(pt_stack.empty()) { 368 if(pt_stack.empty()) {
368 if(is_qelement(n,NSURI_XRDS "\tXRDS")) 369 if(is_qelement(n,NSURI_XRDS "\tXRDS"))
369 return; 370 return;
370 if(is_qelement(n,NSURI_XRD "\tXRD")) { 371 if(is_qelement(n,NSURI_XRD "\tXRD")) {
371 assert(xrd); 372 assert(xrd);
372 xrd->clear(); 373 xrd->clear();
373 pt_stack.push_back(n); 374 pt_stack.push_back(n);
374 }else if(xmode&xmode_html) { 375 }else if(xmode&xmode_html) {
375 html_start_element(n,a); 376 html_start_element(n,a);
376 }else{ 377 }else{
377 skipping = -1; 378 skipping = -1;
378 } 379 }
379 }else{ 380 }else{
380 int pt_s = pt_stack.size(); 381 int pt_s = pt_stack.size();
381 if(pt_s==1) { 382 if(pt_s==1) {
382 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) { 383 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
383 assert(xrd); 384 assert(xrd);
384 cdata = &(xrd->canonical_ids.add(element_priority(a),string())); 385 cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
385 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) { 386 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
386 assert(xrd); 387 assert(xrd);
387 cdata = &(xrd->local_ids.add(element_priority(a),string())); 388 cdata = &(xrd->local_ids.add(element_priority(a),string()));
388 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { 389 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
389 assert(xrd); 390 assert(xrd);
390 cdata = &(xrd->provider_id); 391 cdata = &(xrd->provider_id);
391 }else if(is_qelement(n,NSURI_XRD "\tService")) { 392 }else if(is_qelement(n,NSURI_XRD "\tService")) {
392 assert(xrd); 393 assert(xrd);
393 xrd_service = &(xrd->services.add(element_priority(a), 394 xrd_service = &(xrd->services.add(element_priority(a),
394 service_t())); 395 service_t()));
395 pt_stack.push_back(n); 396 pt_stack.push_back(n);
396 }else if(is_qelement(n,NSURI_XRD "\tStatus")) { 397 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
397 for(;*a;) { 398 for(;*a;) {
398 if(!strcasecmp(*(a++),"code")) { 399 if(!strcasecmp(*(a++),"code")) {
399 if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) { 400 if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) {
400 cdata = &status_string; 401 cdata = &status_string;
401 pt_stack.push_back(n); 402 pt_stack.push_back(n);
402 break; 403 break;
403 } 404 }
404 }else 405 }else
405 ++a; 406 ++a;
406 } 407 }
407 }else if(is_qelement(n,NSURI_XRD "\tExpires")) { 408 }else if(is_qelement(n,NSURI_XRD "\tExpires")) {
408 assert(xrd); 409 assert(xrd);
409 cdata_buf.clear(); 410 cdata_buf.clear();
410 cdata = &cdata_buf; 411 cdata = &cdata_buf;
411 }else if(xmode&xmode_html) { 412 }else if(xmode&xmode_html) {
412 html_start_element(n,a); 413 html_start_element(n,a);
413 }else{ 414 }else{
414 skipping = 1; 415 skipping = 1;
415 } 416 }
416 }else if(pt_s==2) { 417 }else if(pt_s==2) {
417 if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) { 418 if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) {
418 if(is_qelement(n,NSURI_XRD "\tType")) { 419 if(is_qelement(n,NSURI_XRD "\tType")) {
419 assert(xrd); assert(xrd_service); 420 assert(xrd); assert(xrd_service);
420 cdata_buf.clear(); 421 cdata_buf.clear();
421 cdata = &cdata_buf; 422 cdata = &cdata_buf;
422 }else if(is_qelement(n,NSURI_XRD "\tURI")) { 423 }else if(is_qelement(n,NSURI_XRD "\tURI")) {
423 assert(xrd); assert(xrd_service); 424 assert(xrd); assert(xrd_service);
424 const char *append = element_attr(a,"append"); 425 const char *append = element_attr(a,"append");
425 xrd::uri_t& uri = xrd_service->uris.add(element_priority(a),xrd::uri_t("",append?append:"")); 426 xrd::uri_t& uri = xrd_service->uris.add(element_priority(a),xrd::uri_t("",append?append:""));
426 cdata = &uri.uri; 427 cdata = &uri.uri;
427 }else if(is_qelement(n,NSURI_XRD "\tLocalID") 428 }else if(is_qelement(n,NSURI_XRD "\tLocalID")
428 || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) { 429 || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) {
429 assert(xrd); assert(xrd_service); 430 assert(xrd); assert(xrd_service);
430 cdata = &(xrd_service->local_ids.add(element_priority(a),string())); 431 cdata = &(xrd_service->local_ids.add(element_priority(a),string()));
431 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { 432 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
432 assert(xrd); assert(xrd_service); 433 assert(xrd); assert(xrd_service);
433 cdata = &(xrd_service->provider_id); 434 cdata = &(xrd_service->provider_id);
434 }else{ 435 }else{
435 skipping = 1; 436 skipping = 1;
436 } 437 }
437 }else 438 }else
438 skipping = 1; 439 skipping = 1;
439 }else if(xmode&xmode_html) { 440 }else if(xmode&xmode_html) {
440 html_start_element(n,a); 441 html_start_element(n,a);
441 }else{ 442 }else{
442 skipping = 1; 443 skipping = 1;
443 } 444 }
444 } 445 }
445 } 446 }
446 void end_element(const XML_Char *n) { 447 void end_element(const XML_Char *n) {
447 if(skipping<0) return; 448 if(skipping<0) return;
448 if(skipping) { 449 if(skipping) {
449 --skipping; return; 450 --skipping; return;
450 } 451 }
451 if(is_qelement(n,NSURI_XRD "\tType")) { 452 if(is_qelement(n,NSURI_XRD "\tType")) {
452 assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf); 453 assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf);
453 xrd_service->types.insert(cdata_buf); 454 xrd_service->types.insert(cdata_buf);
454 }else if(is_qelement(n,NSURI_XRD "\tService")) { 455 }else if(is_qelement(n,NSURI_XRD "\tService")) {
455 assert(xrd); assert(xrd_service); 456 assert(xrd); assert(xrd_service);
456 assert(!pt_stack.empty()); 457 assert(!pt_stack.empty());
457 assert(pt_stack.back()==(NSURI_XRD "\tService")); 458 assert(pt_stack.back()==(NSURI_XRD "\tService"));
458 pt_stack.pop_back(); 459 pt_stack.pop_back();
459 xrd_service = 0; 460 xrd_service = 0;
460 }else if(is_qelement(n,NSURI_XRD "\tStatus")) { 461 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
461 assert(xrd); 462 assert(xrd);
462 if(is_qelement(pt_stack.back().c_str(),n)) { 463 if(is_qelement(pt_stack.back().c_str(),n)) {
463 assert(cdata==&status_string); 464 assert(cdata==&status_string);
464 pt_stack.pop_back(); 465 pt_stack.pop_back();
465 if(status_code!=100) 466 if(status_code!=100)
466 skipping = -1; 467 skipping = -1;
467 } 468 }
468 }else if(is_qelement(n,NSURI_XRD "\tExpires")) { 469 }else if(is_qelement(n,NSURI_XRD "\tExpires")) {
469 assert(xrd); 470 assert(xrd);
470 xrd->expires = util::w3c_to_time(cdata_buf); 471 xrd->expires = util::w3c_to_time(cdata_buf);
471 }else if((xmode&xmode_html) && is_element(n,"head")) { 472 }else if((xmode&xmode_html) && is_element(n,"head")) {
472 skipping = -1; 473 skipping = -1;
473 } 474 }
474 cdata = 0; 475 cdata = 0;
475 } 476 }
476 void character_data(const XML_Char *s,int l) { 477 void character_data(const XML_Char *s,int l) {
477 if(skipping) return; 478 if(skipping) return;
478 if(cdata) cdata->append(s,l); 479 if(cdata) cdata->append(s,l);
479 } 480 }
480 481
481 void html_start_element(const XML_Char *n,const XML_Char **a) { 482 void html_start_element(const XML_Char *n,const XML_Char **a) {
482 if(is_element(n,"meta")) { 483 if(is_element(n,"meta")) {
483 bool heq = false; 484 bool heq = false;
484 string l; 485 string l;
485 for(;*a;a+=2) { 486 for(;*a;a+=2) {
486 if(!( strcasecmp(a[0],"http-equiv") 487 if(!( strcasecmp(a[0],"http-equiv")
487 || strcasecmp(a[1],XRDS_HEADER) )) 488 || strcasecmp(a[1],XRDS_HEADER) ))
488 heq = true; 489 heq = true;
489 else if(!strcasecmp(a[0],"content")) 490 else if(!strcasecmp(a[0],"content"))
490 l.assign(a[1]); 491 l.assign(a[1]);
491 } 492 }
492 if(heq) 493 if(heq)
493 xrds_location = l; 494 xrds_location = l;
494 }else if(is_element(n,"link")) { 495 }else if(is_element(n,"link")) {
495 string rels; 496 string rels;
496 string href; 497 string href;
497 for(;*a;a+=2) { 498 for(;*a;a+=2) {
498 if( !strcasecmp(a[0],"rel") ) { 499 if( !strcasecmp(a[0],"rel") ) {
499 rels.assign(a[1]); 500 rels.assign(a[1]);
500 }else if( !strcasecmp(a[0],"href") ) { 501 }else if( !strcasecmp(a[0],"href") ) {
501 const char *ns = a[1]; 502 const char *ns = a[1];
502 for(;*ns && isspace(*ns);++ns); 503 for(;*ns && isspace(*ns);++ns);
503 href.assign(ns); 504 href.assign(ns);
504 string::size_type lns=href.find_last_not_of(data::_whitespace_chars); 505 string::size_type lns=href.find_last_not_of(data::_whitespace_chars);
505 href.erase(lns+1); 506 href.erase(lns+1);
506 } 507 }
507 } 508 }
508 for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars); 509 for(string::size_type ns=rels.find_first_not_of(data::_whitespace_chars);
509 ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) { 510 ns!=string::npos; ns=rels.find_first_not_of(data::_whitespace_chars,ns)) {
510 string::size_type s = rels.find_first_of(data::_whitespace_chars,ns); 511 string::size_type s = rels.find_first_of(data::_whitespace_chars,ns);
511 string rel; 512 string rel;
512 if(s==string::npos) { 513 if(s==string::npos) {
513 rel.assign(rels,ns,string::npos); 514 rel.assign(rels,ns,string::npos);
514 ns = string::npos; 515 ns = string::npos;
515 }else{ 516 }else{
516 rel.assign(rels,ns,s-ns); 517 rel.assign(rels,ns,s-ns);
517 ns = s; 518 ns = s;
518 } 519 }
519 if(rel=="openid.server") 520 if(rel=="openid.server")
520 html_openid1.uris.add(-1,xrd::uri_t(href)); 521 html_openid1.uris.add(-1,xrd::uri_t(href));
521 else if(rel=="openid.delegate") 522 else if(rel=="openid.delegate")
522 html_openid1.local_ids.add(-1,href); 523 html_openid1.local_ids.add(-1,href);
523 else if(rel=="openid2.provider") 524 else if(rel=="openid2.provider")
524 html_openid2.uris.add(-1,xrd::uri_t(href)); 525 html_openid2.uris.add(-1,xrd::uri_t(href));
525 else if(rel=="openid2.local_id") 526 else if(rel=="openid2.local_id")
526 html_openid2.local_ids.add(-1,href); 527 html_openid2.local_ids.add(-1,href);
527 } 528 }
528 }else if(is_element(n,"body")) { 529 }else if(is_element(n,"body")) {
529 skipping = -1; 530 skipping = -1;
530 } 531 }
531 } 532 }
532 533
533 void queue_endpoints(endpoint_discovery_iterator& oi, 534 void queue_endpoints(endpoint_discovery_iterator& oi,
534 const idiscovery_t &id, 535 const idiscovery_t &id,
535 const service_type_t *st) { 536 const service_type_t *st) {
536 openid_endpoint_t ep; 537 openid_endpoint_t ep;
537 ep.claimed_id = id.canonicalized_id; 538 ep.claimed_id = id.canonicalized_id;
538 for(xrd::services_t::const_iterator isvc=id.xrd.services.begin(); 539 for(xrd::services_t::const_iterator isvc=id.xrd.services.begin();
539 isvc!=id.xrd.services.end(); ++isvc) { 540 isvc!=id.xrd.services.end(); ++isvc) {
540 const xrd::service_t svc = isvc->second; 541 const xrd::service_t svc = isvc->second;
541 if(svc.types.find(st->uri)==svc.types.end()) continue; 542 if(svc.types.find(st->uri)==svc.types.end()) continue;
542 for(xrd::uris_t::const_iterator iu=svc.uris.begin();iu!=svc.uris.end();++iu) { 543 for(xrd::uris_t::const_iterator iu=svc.uris.begin();iu!=svc.uris.end();++iu) {
543 ep.uri = iu->second.uri; 544 ep.uri = iu->second.uri;
544 if(id.xri_identity) { 545 if(id.xri_identity) {
545 if(iu->second.append=="qxri") { 546 if(iu->second.append=="qxri") {
546 ep.uri += id.normalized_id; 547 ep.uri += id.normalized_id;
547 } /* TODO: else handle other append attribute values */ 548 } /* TODO: else handle other append attribute values */
548 } 549 }
549 if(st->forceid) { 550 if(st->forceid) {
550 ep.local_id = ep.claimed_id = st->forceid; 551 ep.local_id = ep.claimed_id = st->forceid;
551 *(oi++) = ep; 552 *(oi++) = ep;
552 }else{ 553 }else{
553 if(svc.local_ids.empty()) { 554 if(svc.local_ids.empty()) {
554 ep.local_id = ep.claimed_id; 555 ep.local_id = ep.claimed_id;
555 *(oi++) = ep; 556 *(oi++) = ep;
556 }else{ 557 }else{
557 for(xrd::local_ids_t::const_iterator ilid=svc.local_ids.begin(); 558 for(xrd::local_ids_t::const_iterator ilid=svc.local_ids.begin();
558 ilid!=svc.local_ids.end(); ++ilid) { 559 ilid!=svc.local_ids.end(); ++ilid) {
559 ep.local_id = ilid->second; 560 ep.local_id = ilid->second;
560 *(oi++) = ep; 561 *(oi++) = ep;
561 } 562 }
562 } 563 }
563 } 564 }
564 } 565 }
565 } 566 }
566 } 567 }
567 568
568 }; 569 };
569 570
570 string idiscover(endpoint_discovery_iterator oi,const string& identity) { 571 string idiscover(endpoint_discovery_iterator oi,const string& identity) {
571 idigger_t idigger; 572 idigger_t idigger;
572 return idigger.discover(oi,identity); 573 return idigger.discover(oi,identity);
573 } 574 }
574 575
575 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) try { 576 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) try {
576 idigger_t idigger; 577 idigger_t idigger;
577 idigger.yadiscover(oi,yurl,types,redirs); 578 idigger.yadiscover(oi,yurl,types,redirs);
578 }catch(exception_curl& ec) { 579 }catch(exception_curl& ec) {
579 if(redirs || ec._error!=CURLE_TOO_MANY_REDIRECTS) 580 if(redirs || ec._error!=CURLE_TOO_MANY_REDIRECTS)
580 throw; 581 throw;
581 } 582 }
582 583
583} 584}