summaryrefslogtreecommitdiffabout
path: root/lib
authorMichael Krelin <hacker@klever.net>2008-02-22 12:33:42 (UTC)
committer Michael Krelin <hacker@klever.net>2008-02-22 12:33:42 (UTC)
commit4c9564dce14fdc46ef6739426ee1cad87ed58ad1 (patch) (unidiff)
tree6f1f520d56b5e346abb6bf9514b697c7e2240819 /lib
parentdaf2d4bcb4a31df6b46d3da7a33ee3f98d85e464 (diff)
downloadlibopkele-4c9564dce14fdc46ef6739426ee1cad87ed58ad1.zip
libopkele-4c9564dce14fdc46ef6739426ee1cad87ed58ad1.tar.gz
libopkele-4c9564dce14fdc46ef6739426ee1cad87ed58ad1.tar.bz2
fix the discovery for the case when identity URL points to XRDS document.
Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc8
1 files changed, 7 insertions, 1 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 5913ad4..c118c80 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -75,257 +75,263 @@ namespace opkele {
75 enum { 75 enum {
76 xmode_html = 1, xmode_xrd = 2, xmode_cid = 4, 76 xmode_html = 1, xmode_xrd = 2, xmode_cid = 4,
77 xmode_noredirs = 8 77 xmode_noredirs = 8
78 }; 78 };
79 int xmode; 79 int xmode;
80 80
81 string xrds_location; 81 string xrds_location;
82 string http_content_type; 82 string http_content_type;
83 service_t html_openid1; 83 service_t html_openid1;
84 service_t html_openid2; 84 service_t html_openid2;
85 string cdata_buf; 85 string cdata_buf;
86 long status_code; 86 long status_code;
87 string status_string; 87 string status_string;
88 88
89 typedef list<string> pt_stack_t; 89 typedef list<string> pt_stack_t;
90 pt_stack_t pt_stack; 90 pt_stack_t pt_stack;
91 int skipping; 91 int skipping;
92 bool parser_choked; 92 bool parser_choked;
93 string save_html; 93 string save_html;
94 94
95 XRD_t *xrd; 95 XRD_t *xrd;
96 service_t *xrd_service; 96 service_t *xrd_service;
97 string* cdata; 97 string* cdata;
98 98
99 idigger_t() 99 idigger_t()
100 : util::curl_t(easy_init()), 100 : util::curl_t(easy_init()),
101 util::expat_t(0), 101 util::expat_t(0),
102 xri_proxy(XRI_PROXY_URL) { 102 xri_proxy(XRI_PROXY_URL) {
103 CURLcode r; 103 CURLcode r;
104 (r=misc_sets()) 104 (r=misc_sets())
105 || (r=set_write()) 105 || (r=set_write())
106 || (r=set_header()) 106 || (r=set_header())
107 ; 107 ;
108 if(r) 108 if(r)
109 throw exception_curl(OPKELE_CP_ "failed to set curly options",r); 109 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
110 } 110 }
111 ~idigger_t() throw() { } 111 ~idigger_t() throw() { }
112 112
113 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) { 113 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) {
114 idiscovery_t idis; 114 idiscovery_t idis;
115 idis.xri_identity = false; 115 idis.xri_identity = false;
116 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs)); 116 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs));
117 if(!xrds_location.empty()) { 117 if(!xrds_location.empty()) {
118 idis.clear(); 118 idis.clear();
119 discover_at(idis,xrds_location,xmode_xrd); 119 discover_at(idis,xrds_location,xmode_xrd);
120 } 120 }
121 idis.normalized_id = idis.canonicalized_id = yurl; 121 idis.normalized_id = idis.canonicalized_id = yurl;
122 service_type_t st; 122 service_type_t st;
123 for(st.uri=*types;*types;st.uri=*(++types)) 123 for(st.uri=*types;*types;st.uri=*(++types))
124 queue_endpoints(oi,idis,&st); 124 queue_endpoints(oi,idis,&st);
125 } 125 }
126 126
127 string discover(endpoint_discovery_iterator& oi,const string& identity) { 127 string discover(endpoint_discovery_iterator& oi,const string& identity) {
128 string rv; 128 string rv;
129 idiscovery_t idis; 129 idiscovery_t idis;
130 string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars); 130 string::size_type fsc = identity.find_first_not_of(data::_whitespace_chars);
131 if(fsc==string::npos) 131 if(fsc==string::npos)
132 throw bad_input(OPKELE_CP_ "whitespace-only identity"); 132 throw bad_input(OPKELE_CP_ "whitespace-only identity");
133 string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars); 133 string::size_type lsc = identity.find_last_not_of(data::_whitespace_chars);
134 assert(lsc!=string::npos); 134 assert(lsc!=string::npos);
135 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 135 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
136 fsc += sizeof("xri://")-1; 136 fsc += sizeof("xri://")-1;
137 if((fsc+1)>=lsc) 137 if((fsc+1)>=lsc)
138 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 138 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
139 string id(identity,fsc,lsc-fsc+1); 139 string id(identity,fsc,lsc-fsc+1);
140 idis.clear(); 140 idis.clear();
141 if(strchr(data::_iname_leaders,id[0])) { 141 if(strchr(data::_iname_leaders,id[0])) {
142 /* TODO: further normalize xri identity? Like folding case 142 /* TODO: further normalize xri identity? Like folding case
143 * or whatever... */ 143 * or whatever... */
144 rv = id; 144 rv = id;
145 set<string> cids; 145 set<string> cids;
146 for(const struct service_type_t *st=op_service_types; 146 for(const struct service_type_t *st=op_service_types;
147 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) { 147 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) {
148 idis.clear(); 148 idis.clear();
149 discover_at( idis, 149 discover_at( idis,
150 xri_proxy + util::url_encode(id)+ 150 xri_proxy + util::url_encode(id)+
151 "?_xrd_t="+util::url_encode(st->uri)+ 151 "?_xrd_t="+util::url_encode(st->uri)+
152 "&_xrd_r=application/xrd%2Bxml" 152 "&_xrd_r=application/xrd%2Bxml"
153 ";sep=true;refs=true", 153 ";sep=true;refs=true",
154 xmode_xrd ); 154 xmode_xrd );
155 if(status_code==241) continue; 155 if(status_code==241) continue;
156 if(status_code!=100) 156 if(status_code!=100)
157 throw failed_xri_resolution(OPKELE_CP_ 157 throw failed_xri_resolution(OPKELE_CP_
158 "XRI resolution failed with '"+status_string+"' message" 158 "XRI resolution failed with '"+status_string+"' message"
159 ", while looking for SEP with type '"+st->uri+"'", status_code); 159 ", while looking for SEP with type '"+st->uri+"'", status_code);
160 if(idis.xrd.canonical_ids.empty()) 160 if(idis.xrd.canonical_ids.empty())
161 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); 161 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
162 string cid = idis.xrd.canonical_ids.begin()->second; 162 string cid = idis.xrd.canonical_ids.begin()->second;
163 if(cids.find(cid)==cids.end()) { 163 if(cids.find(cid)==cids.end()) {
164 cids.insert(cid); 164 cids.insert(cid);
165 idis.clear(); 165 idis.clear();
166 discover_at( idis, 166 discover_at( idis,
167 xri_proxy + util::url_encode(id)+ 167 xri_proxy + util::url_encode(id)+
168 "?_xrd_t="+util::url_encode(st->uri)+ 168 "?_xrd_t="+util::url_encode(st->uri)+
169 "&_xrd_r=application/xrd%2Bxml" 169 "&_xrd_r=application/xrd%2Bxml"
170 ";sep=true;refs=true", 170 ";sep=true;refs=true",
171 xmode_xrd ); 171 xmode_xrd );
172 if(status_code==241) continue; 172 if(status_code==241) continue;
173 if(status_code!=100) 173 if(status_code!=100)
174 throw failed_xri_resolution(OPKELE_CP_ 174 throw failed_xri_resolution(OPKELE_CP_
175 "XRI resolution failed with '"+status_string+"' message" 175 "XRI resolution failed with '"+status_string+"' message"
176 ", while looking for SEP with type '"+st->uri+"'" 176 ", while looking for SEP with type '"+st->uri+"'"
177 " on canonical id", status_code); 177 " on canonical id", status_code);
178 } 178 }
179 idis.canonicalized_id = cid; 179 idis.canonicalized_id = cid;
180 idis.normalized_id = rv; idis.xri_identity = true; 180 idis.normalized_id = rv; idis.xri_identity = true;
181 queue_endpoints(oi,idis,st); 181 queue_endpoints(oi,idis,st);
182 } 182 }
183 }else{ 183 }else{
184 idis.xri_identity = false; 184 idis.xri_identity = false;
185 if(id.find("://")==string::npos) 185 if(id.find("://")==string::npos)
186 id.insert(0,"http://"); 186 id.insert(0,"http://");
187 string::size_type fp = id.find('#'); 187 string::size_type fp = id.find('#');
188 if(fp!=string::npos) { 188 if(fp!=string::npos) {
189 string::size_type qp = id.find('?'); 189 string::size_type qp = id.find('?');
190 if(qp==string::npos || qp<fp) 190 if(qp==string::npos || qp<fp)
191 id.erase(fp); 191 id.erase(fp);
192 else if(qp>fp) 192 else if(qp>fp)
193 id.erase(fp,qp-fp); 193 id.erase(fp,qp-fp);
194 } 194 }
195 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); 195 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id);
196 discover_at(idis,id,xmode_html|xmode_xrd); 196 discover_at(idis,id,xmode_html|xmode_xrd);
197 const char * eu = 0; 197 const char * eu = 0;
198 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 198 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
199 if(r) 199 if(r)
200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); 201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) );
202 if(xrds_location.empty()) { 202 if(xrds_location.empty()) {
203 html2xrd(oi,idis); 203 if(idis.xrd.empty())
204 html2xrd(oi,idis);
205 else{
206 for(const service_type_t *st=op_service_types;
207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
208 queue_endpoints(oi,idis,st);
209 }
204 }else{ 210 }else{
205 idis.clear(); 211 idis.clear();
206 idis.canonicalized_id = cid; 212 idis.canonicalized_id = cid;
207 discover_at(idis,xrds_location,xmode_xrd); 213 discover_at(idis,xrds_location,xmode_xrd);
208 if(idis.xrd.empty()) 214 if(idis.xrd.empty())
209 html2xrd(oi,idis); 215 html2xrd(oi,idis);
210 else{ 216 else{
211 for(const service_type_t *st=op_service_types; 217 for(const service_type_t *st=op_service_types;
212 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
213 queue_endpoints(oi,idis,st); 219 queue_endpoints(oi,idis,st);
214 } 220 }
215 } 221 }
216 } 222 }
217 return rv; 223 return rv;
218 } 224 }
219 225
220 void discover_at(idiscovery_t& idis,const string& url,int xm) { 226 void discover_at(idiscovery_t& idis,const string& url,int xm) {
221 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
222 if(r) 228 if(r)
223 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
224 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
225 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
226 232
227 http_content_type.clear(); 233 http_content_type.clear();
228 xmode = xm; 234 xmode = xm;
229 prepare_to_parse(); 235 prepare_to_parse();
230 if(xmode&xmode_html) { 236 if(xmode&xmode_html) {
231 xrds_location.clear(); 237 xrds_location.clear();
232 save_html.clear(); 238 save_html.clear();
233 save_html.reserve(max_html); 239 save_html.reserve(max_html);
234 } 240 }
235 xrd = &idis.xrd; 241 xrd = &idis.xrd;
236 242
237 r = easy_perform(); 243 r = easy_perform();
238 if(r && r!=CURLE_WRITE_ERROR) 244 if(r && r!=CURLE_WRITE_ERROR)
239 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
240 246
241 if(!parser_choked) { 247 if(!parser_choked) {
242 parse(0,0,true); 248 parse(0,0,true);
243 }else if(xmode&xmode_html){ 249 }else if(xmode&xmode_html){
244 /* TODO: do not bother if we've seen xml */ 250 /* TODO: do not bother if we've seen xml */
245 try { 251 try {
246 util::tidy_doc_t td = util::tidy_doc_t::create(); 252 util::tidy_doc_t td = util::tidy_doc_t::create();
247 if(!td) 253 if(!td)
248 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
249#ifndef NDEBUG 255#ifndef NDEBUG
250 td.opt_set(TidyQuiet,false); 256 td.opt_set(TidyQuiet,false);
251 td.opt_set(TidyShowWarnings,false); 257 td.opt_set(TidyShowWarnings,false);
252#endif /* NDEBUG */ 258#endif /* NDEBUG */
253 td.opt_set(TidyForceOutput,true); 259 td.opt_set(TidyForceOutput,true);
254 td.opt_set(TidyXhtmlOut,true); 260 td.opt_set(TidyXhtmlOut,true);
255 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
256 td.opt_set(TidyMark,false); 262 td.opt_set(TidyMark,false);
257 if(td.parse_string(save_html)<=0) 263 if(td.parse_string(save_html)<=0)
258 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 264 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
259 if(td.clean_and_repair()<=0) 265 if(td.clean_and_repair()<=0)
260 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 266 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
261 util::tidy_buf_t tide; 267 util::tidy_buf_t tide;
262 if(td.save_buffer(tide)<=0) 268 if(td.save_buffer(tide)<=0)
263 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 269 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
264 prepare_to_parse(); 270 prepare_to_parse();
265 parse(tide.c_str(),tide.size(),true); 271 parse(tide.c_str(),tide.size(),true);
266 }catch(exception_tidy& et) { } 272 }catch(exception_tidy& et) { }
267 } 273 }
268 save_html.clear(); 274 save_html.clear();
269 } 275 }
270 276
271 void prepare_to_parse() { 277 void prepare_to_parse() {
272 (*(expat_t*)this) = parser_create_ns(); 278 (*(expat_t*)this) = parser_create_ns();
273 set_user_data(); set_element_handler(); 279 set_user_data(); set_element_handler();
274 set_character_data_handler(); 280 set_character_data_handler();
275 281
276 if(xmode&xmode_html) { 282 if(xmode&xmode_html) {
277 html_openid1.clear(); html_openid2.clear(); 283 html_openid1.clear(); html_openid2.clear();
278 parser_choked = false; 284 parser_choked = false;
279 } 285 }
280 286
281 cdata = 0; xrd_service = 0; skipping = 0; 287 cdata = 0; xrd_service = 0; skipping = 0;
282 pt_stack.clear(); 288 pt_stack.clear();
283 status_code = 100; status_string.clear(); 289 status_code = 100; status_string.clear();
284 } 290 }
285 291
286 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 292 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
287 XRD_t& x = id.xrd; 293 XRD_t& x = id.xrd;
288 if(!html_openid2.uris.empty()) { 294 if(!html_openid2.uris.empty()) {
289 html_openid2.types.insert(STURI_OPENID20); 295 html_openid2.types.insert(STURI_OPENID20);
290 x.services.add(-1,html_openid2); 296 x.services.add(-1,html_openid2);
291 queue_endpoints(oi,id,&op_service_types[st_index_2]); 297 queue_endpoints(oi,id,&op_service_types[st_index_2]);
292 } 298 }
293 if(!html_openid1.uris.empty()) { 299 if(!html_openid1.uris.empty()) {
294 html_openid1.types.insert(STURI_OPENID11); 300 html_openid1.types.insert(STURI_OPENID11);
295 x.services.add(-1,html_openid1); 301 x.services.add(-1,html_openid1);
296 queue_endpoints(oi,id,&op_service_types[st_index_1]); 302 queue_endpoints(oi,id,&op_service_types[st_index_1]);
297 } 303 }
298 } 304 }
299 305
300 size_t write(void *p,size_t s,size_t nm) { 306 size_t write(void *p,size_t s,size_t nm) {
301 /* TODO: limit total size */ 307 /* TODO: limit total size */
302 size_t bytes = s*nm; 308 size_t bytes = s*nm;
303 const char *inbuf = (const char*)p; 309 const char *inbuf = (const char*)p;
304 if(xmode&xmode_html) { 310 if(xmode&xmode_html) {
305 size_t mbts = save_html.capacity()-save_html.size(); 311 size_t mbts = save_html.capacity()-save_html.size();
306 size_t bts = 0; 312 size_t bts = 0;
307 if(mbts>0) { 313 if(mbts>0) {
308 bts = (bytes>mbts)?mbts:bytes; 314 bts = (bytes>mbts)?mbts:bytes;
309 save_html.append(inbuf,bts); 315 save_html.append(inbuf,bts);
310 } 316 }
311 if(skipping<0) return bts; 317 if(skipping<0) return bts;
312 } 318 }
313 if(skipping<0) return 0; 319 if(skipping<0) return 0;
314 bool rp = parse(inbuf,bytes,false); 320 bool rp = parse(inbuf,bytes,false);
315 if(!rp) { 321 if(!rp) {
316 parser_choked = true; 322 parser_choked = true;
317 skipping = -1; 323 skipping = -1;
318 if(!(xmode&xmode_html)) 324 if(!(xmode&xmode_html))
319 bytes = 0; 325 bytes = 0;
320 } 326 }
321 return bytes; 327 return bytes;
322 } 328 }
323 size_t header(void *p,size_t s,size_t nm) { 329 size_t header(void *p,size_t s,size_t nm) {
324 size_t bytes = s*nm; 330 size_t bytes = s*nm;
325 const char *h = (const char*)p; 331 const char *h = (const char*)p;
326 const char *colon = (const char*)memchr(p,':',bytes); 332 const char *colon = (const char*)memchr(p,':',bytes);
327 const char *space = (const char*)memchr(p,' ',bytes); 333 const char *space = (const char*)memchr(p,' ',bytes);
328 if(space && ( (!colon) || space<colon ) ) { 334 if(space && ( (!colon) || space<colon ) ) {
329 xrds_location.clear(); http_content_type.clear(); 335 xrds_location.clear(); http_content_type.clear();
330 }else if(colon) { 336 }else if(colon) {
331 const char *hv = ++colon; 337 const char *hv = ++colon;