summaryrefslogtreecommitdiffabout
path: root/lib/discovery.cc
authorMichael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
committer Michael Krelin <hacker@klever.net>2008-04-06 19:26:50 (UTC)
commitaa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4 (patch) (unidiff)
treec64231565e123d678850a7b0560b348ee82b2dcc /lib/discovery.cc
parent752e484cd2fc239bc582a88fe7d62a225880ee3b (diff)
downloadlibopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.zip
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.gz
libopkele-aa3a6821f6aaaf4ae63bafbbc76da5f414a75fd4.tar.bz2
fix for discovery failing on some entities coming before the tag of interest
One of the numerous opportunities to express my gratitude by means of commit message to Joseph Smarr of Plaxo for spotting it. Thanks, Joseph! Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib/discovery.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc1
1 files changed, 1 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index c118c80..3b90977 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -135,256 +135,257 @@ namespace opkele {
135 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 135 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
136 fsc += sizeof("xri://")-1; 136 fsc += sizeof("xri://")-1;
137 if((fsc+1)>=lsc) 137 if((fsc+1)>=lsc)
138 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 138 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
139 string id(identity,fsc,lsc-fsc+1); 139 string id(identity,fsc,lsc-fsc+1);
140 idis.clear(); 140 idis.clear();
141 if(strchr(data::_iname_leaders,id[0])) { 141 if(strchr(data::_iname_leaders,id[0])) {
142 /* TODO: further normalize xri identity? Like folding case 142 /* TODO: further normalize xri identity? Like folding case
143 * or whatever... */ 143 * or whatever... */
144 rv = id; 144 rv = id;
145 set<string> cids; 145 set<string> cids;
146 for(const struct service_type_t *st=op_service_types; 146 for(const struct service_type_t *st=op_service_types;
147 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) { 147 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) {
148 idis.clear(); 148 idis.clear();
149 discover_at( idis, 149 discover_at( idis,
150 xri_proxy + util::url_encode(id)+ 150 xri_proxy + util::url_encode(id)+
151 "?_xrd_t="+util::url_encode(st->uri)+ 151 "?_xrd_t="+util::url_encode(st->uri)+
152 "&_xrd_r=application/xrd%2Bxml" 152 "&_xrd_r=application/xrd%2Bxml"
153 ";sep=true;refs=true", 153 ";sep=true;refs=true",
154 xmode_xrd ); 154 xmode_xrd );
155 if(status_code==241) continue; 155 if(status_code==241) continue;
156 if(status_code!=100) 156 if(status_code!=100)
157 throw failed_xri_resolution(OPKELE_CP_ 157 throw failed_xri_resolution(OPKELE_CP_
158 "XRI resolution failed with '"+status_string+"' message" 158 "XRI resolution failed with '"+status_string+"' message"
159 ", while looking for SEP with type '"+st->uri+"'", status_code); 159 ", while looking for SEP with type '"+st->uri+"'", status_code);
160 if(idis.xrd.canonical_ids.empty()) 160 if(idis.xrd.canonical_ids.empty())
161 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); 161 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
162 string cid = idis.xrd.canonical_ids.begin()->second; 162 string cid = idis.xrd.canonical_ids.begin()->second;
163 if(cids.find(cid)==cids.end()) { 163 if(cids.find(cid)==cids.end()) {
164 cids.insert(cid); 164 cids.insert(cid);
165 idis.clear(); 165 idis.clear();
166 discover_at( idis, 166 discover_at( idis,
167 xri_proxy + util::url_encode(id)+ 167 xri_proxy + util::url_encode(id)+
168 "?_xrd_t="+util::url_encode(st->uri)+ 168 "?_xrd_t="+util::url_encode(st->uri)+
169 "&_xrd_r=application/xrd%2Bxml" 169 "&_xrd_r=application/xrd%2Bxml"
170 ";sep=true;refs=true", 170 ";sep=true;refs=true",
171 xmode_xrd ); 171 xmode_xrd );
172 if(status_code==241) continue; 172 if(status_code==241) continue;
173 if(status_code!=100) 173 if(status_code!=100)
174 throw failed_xri_resolution(OPKELE_CP_ 174 throw failed_xri_resolution(OPKELE_CP_
175 "XRI resolution failed with '"+status_string+"' message" 175 "XRI resolution failed with '"+status_string+"' message"
176 ", while looking for SEP with type '"+st->uri+"'" 176 ", while looking for SEP with type '"+st->uri+"'"
177 " on canonical id", status_code); 177 " on canonical id", status_code);
178 } 178 }
179 idis.canonicalized_id = cid; 179 idis.canonicalized_id = cid;
180 idis.normalized_id = rv; idis.xri_identity = true; 180 idis.normalized_id = rv; idis.xri_identity = true;
181 queue_endpoints(oi,idis,st); 181 queue_endpoints(oi,idis,st);
182 } 182 }
183 }else{ 183 }else{
184 idis.xri_identity = false; 184 idis.xri_identity = false;
185 if(id.find("://")==string::npos) 185 if(id.find("://")==string::npos)
186 id.insert(0,"http://"); 186 id.insert(0,"http://");
187 string::size_type fp = id.find('#'); 187 string::size_type fp = id.find('#');
188 if(fp!=string::npos) { 188 if(fp!=string::npos) {
189 string::size_type qp = id.find('?'); 189 string::size_type qp = id.find('?');
190 if(qp==string::npos || qp<fp) 190 if(qp==string::npos || qp<fp)
191 id.erase(fp); 191 id.erase(fp);
192 else if(qp>fp) 192 else if(qp>fp)
193 id.erase(fp,qp-fp); 193 id.erase(fp,qp-fp);
194 } 194 }
195 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); 195 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id);
196 discover_at(idis,id,xmode_html|xmode_xrd); 196 discover_at(idis,id,xmode_html|xmode_xrd);
197 const char * eu = 0; 197 const char * eu = 0;
198 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 198 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
199 if(r) 199 if(r)
200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 200 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); 201 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) );
202 if(xrds_location.empty()) { 202 if(xrds_location.empty()) {
203 if(idis.xrd.empty()) 203 if(idis.xrd.empty())
204 html2xrd(oi,idis); 204 html2xrd(oi,idis);
205 else{ 205 else{
206 for(const service_type_t *st=op_service_types; 206 for(const service_type_t *st=op_service_types;
207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 207 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
208 queue_endpoints(oi,idis,st); 208 queue_endpoints(oi,idis,st);
209 } 209 }
210 }else{ 210 }else{
211 idis.clear(); 211 idis.clear();
212 idis.canonicalized_id = cid; 212 idis.canonicalized_id = cid;
213 discover_at(idis,xrds_location,xmode_xrd); 213 discover_at(idis,xrds_location,xmode_xrd);
214 if(idis.xrd.empty()) 214 if(idis.xrd.empty())
215 html2xrd(oi,idis); 215 html2xrd(oi,idis);
216 else{ 216 else{
217 for(const service_type_t *st=op_service_types; 217 for(const service_type_t *st=op_service_types;
218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 218 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
219 queue_endpoints(oi,idis,st); 219 queue_endpoints(oi,idis,st);
220 } 220 }
221 } 221 }
222 } 222 }
223 return rv; 223 return rv;
224 } 224 }
225 225
226 void discover_at(idiscovery_t& idis,const string& url,int xm) { 226 void discover_at(idiscovery_t& idis,const string& url,int xm) {
227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 227 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
228 if(r) 228 if(r)
229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 229 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 230 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 231 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
232 232
233 http_content_type.clear(); 233 http_content_type.clear();
234 xmode = xm; 234 xmode = xm;
235 prepare_to_parse(); 235 prepare_to_parse();
236 if(xmode&xmode_html) { 236 if(xmode&xmode_html) {
237 xrds_location.clear(); 237 xrds_location.clear();
238 save_html.clear(); 238 save_html.clear();
239 save_html.reserve(max_html); 239 save_html.reserve(max_html);
240 } 240 }
241 xrd = &idis.xrd; 241 xrd = &idis.xrd;
242 242
243 r = easy_perform(); 243 r = easy_perform();
244 if(r && r!=CURLE_WRITE_ERROR) 244 if(r && r!=CURLE_WRITE_ERROR)
245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 245 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
246 246
247 if(!parser_choked) { 247 if(!parser_choked) {
248 parse(0,0,true); 248 parse(0,0,true);
249 }else if(xmode&xmode_html){ 249 }else if(xmode&xmode_html){
250 /* TODO: do not bother if we've seen xml */ 250 /* TODO: do not bother if we've seen xml */
251 try { 251 try {
252 util::tidy_doc_t td = util::tidy_doc_t::create(); 252 util::tidy_doc_t td = util::tidy_doc_t::create();
253 if(!td) 253 if(!td)
254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 254 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
255#ifndef NDEBUG 255#ifndef NDEBUG
256 td.opt_set(TidyQuiet,false); 256 td.opt_set(TidyQuiet,false);
257 td.opt_set(TidyShowWarnings,false); 257 td.opt_set(TidyShowWarnings,false);
258#endif /* NDEBUG */ 258#endif /* NDEBUG */
259 td.opt_set(TidyForceOutput,true); 259 td.opt_set(TidyForceOutput,true);
260 td.opt_set(TidyXhtmlOut,true); 260 td.opt_set(TidyXhtmlOut,true);
261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 261 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
262 td.opt_set(TidyMark,false); 262 td.opt_set(TidyMark,false);
263 td.opt_set(TidyNumEntities,true);
263 if(td.parse_string(save_html)<=0) 264 if(td.parse_string(save_html)<=0)
264 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 265 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
265 if(td.clean_and_repair()<=0) 266 if(td.clean_and_repair()<=0)
266 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 267 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
267 util::tidy_buf_t tide; 268 util::tidy_buf_t tide;
268 if(td.save_buffer(tide)<=0) 269 if(td.save_buffer(tide)<=0)
269 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 270 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
270 prepare_to_parse(); 271 prepare_to_parse();
271 parse(tide.c_str(),tide.size(),true); 272 parse(tide.c_str(),tide.size(),true);
272 }catch(exception_tidy& et) { } 273 }catch(exception_tidy& et) { }
273 } 274 }
274 save_html.clear(); 275 save_html.clear();
275 } 276 }
276 277
277 void prepare_to_parse() { 278 void prepare_to_parse() {
278 (*(expat_t*)this) = parser_create_ns(); 279 (*(expat_t*)this) = parser_create_ns();
279 set_user_data(); set_element_handler(); 280 set_user_data(); set_element_handler();
280 set_character_data_handler(); 281 set_character_data_handler();
281 282
282 if(xmode&xmode_html) { 283 if(xmode&xmode_html) {
283 html_openid1.clear(); html_openid2.clear(); 284 html_openid1.clear(); html_openid2.clear();
284 parser_choked = false; 285 parser_choked = false;
285 } 286 }
286 287
287 cdata = 0; xrd_service = 0; skipping = 0; 288 cdata = 0; xrd_service = 0; skipping = 0;
288 pt_stack.clear(); 289 pt_stack.clear();
289 status_code = 100; status_string.clear(); 290 status_code = 100; status_string.clear();
290 } 291 }
291 292
292 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 293 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
293 XRD_t& x = id.xrd; 294 XRD_t& x = id.xrd;
294 if(!html_openid2.uris.empty()) { 295 if(!html_openid2.uris.empty()) {
295 html_openid2.types.insert(STURI_OPENID20); 296 html_openid2.types.insert(STURI_OPENID20);
296 x.services.add(-1,html_openid2); 297 x.services.add(-1,html_openid2);
297 queue_endpoints(oi,id,&op_service_types[st_index_2]); 298 queue_endpoints(oi,id,&op_service_types[st_index_2]);
298 } 299 }
299 if(!html_openid1.uris.empty()) { 300 if(!html_openid1.uris.empty()) {
300 html_openid1.types.insert(STURI_OPENID11); 301 html_openid1.types.insert(STURI_OPENID11);
301 x.services.add(-1,html_openid1); 302 x.services.add(-1,html_openid1);
302 queue_endpoints(oi,id,&op_service_types[st_index_1]); 303 queue_endpoints(oi,id,&op_service_types[st_index_1]);
303 } 304 }
304 } 305 }
305 306
306 size_t write(void *p,size_t s,size_t nm) { 307 size_t write(void *p,size_t s,size_t nm) {
307 /* TODO: limit total size */ 308 /* TODO: limit total size */
308 size_t bytes = s*nm; 309 size_t bytes = s*nm;
309 const char *inbuf = (const char*)p; 310 const char *inbuf = (const char*)p;
310 if(xmode&xmode_html) { 311 if(xmode&xmode_html) {
311 size_t mbts = save_html.capacity()-save_html.size(); 312 size_t mbts = save_html.capacity()-save_html.size();
312 size_t bts = 0; 313 size_t bts = 0;
313 if(mbts>0) { 314 if(mbts>0) {
314 bts = (bytes>mbts)?mbts:bytes; 315 bts = (bytes>mbts)?mbts:bytes;
315 save_html.append(inbuf,bts); 316 save_html.append(inbuf,bts);
316 } 317 }
317 if(skipping<0) return bts; 318 if(skipping<0) return bts;
318 } 319 }
319 if(skipping<0) return 0; 320 if(skipping<0) return 0;
320 bool rp = parse(inbuf,bytes,false); 321 bool rp = parse(inbuf,bytes,false);
321 if(!rp) { 322 if(!rp) {
322 parser_choked = true; 323 parser_choked = true;
323 skipping = -1; 324 skipping = -1;
324 if(!(xmode&xmode_html)) 325 if(!(xmode&xmode_html))
325 bytes = 0; 326 bytes = 0;
326 } 327 }
327 return bytes; 328 return bytes;
328 } 329 }
329 size_t header(void *p,size_t s,size_t nm) { 330 size_t header(void *p,size_t s,size_t nm) {
330 size_t bytes = s*nm; 331 size_t bytes = s*nm;
331 const char *h = (const char*)p; 332 const char *h = (const char*)p;
332 const char *colon = (const char*)memchr(p,':',bytes); 333 const char *colon = (const char*)memchr(p,':',bytes);
333 const char *space = (const char*)memchr(p,' ',bytes); 334 const char *space = (const char*)memchr(p,' ',bytes);
334 if(space && ( (!colon) || space<colon ) ) { 335 if(space && ( (!colon) || space<colon ) ) {
335 xrds_location.clear(); http_content_type.clear(); 336 xrds_location.clear(); http_content_type.clear();
336 }else if(colon) { 337 }else if(colon) {
337 const char *hv = ++colon; 338 const char *hv = ++colon;
338 size_t hnl = colon-h; 339 size_t hnl = colon-h;
339 int rb; 340 int rb;
340 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); 341 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
341 while(rb>0 && isspace(hv[rb-1])) --rb; 342 while(rb>0 && isspace(hv[rb-1])) --rb;
342 if(rb) { 343 if(rb) {
343 if( (hnl>=sizeof(XRDS_HEADER)) 344 if( (hnl>=sizeof(XRDS_HEADER))
344 && !strncasecmp(h,XRDS_HEADER":", 345 && !strncasecmp(h,XRDS_HEADER":",
345 sizeof(XRDS_HEADER)) ) { 346 sizeof(XRDS_HEADER)) ) {
346 xrds_location.assign(hv,rb); 347 xrds_location.assign(hv,rb);
347 }else if( (hnl>=sizeof(CT_HEADER)) 348 }else if( (hnl>=sizeof(CT_HEADER))
348 && !strncasecmp(h,CT_HEADER":", 349 && !strncasecmp(h,CT_HEADER":",
349 sizeof(CT_HEADER)) ) { 350 sizeof(CT_HEADER)) ) {
350 const char *sc = (const char*)memchr( 351 const char *sc = (const char*)memchr(
351 hv,';',rb); 352 hv,';',rb);
352 http_content_type.assign(hv,sc?(sc-hv):rb); 353 http_content_type.assign(hv,sc?(sc-hv):rb);
353 } 354 }
354 } 355 }
355 } 356 }
356 return curl_t::header(p,s,nm); 357 return curl_t::header(p,s,nm);
357 } 358 }
358 359
359 void start_element(const XML_Char *n,const XML_Char **a) { 360 void start_element(const XML_Char *n,const XML_Char **a) {
360 if(skipping<0) return; 361 if(skipping<0) return;
361 if(skipping) { 362 if(skipping) {
362 if(xmode&xmode_html) 363 if(xmode&xmode_html)
363 html_start_element(n,a); 364 html_start_element(n,a);
364 ++skipping; return; 365 ++skipping; return;
365 } 366 }
366 if(pt_stack.empty()) { 367 if(pt_stack.empty()) {
367 if(is_qelement(n,NSURI_XRDS "\tXRDS")) 368 if(is_qelement(n,NSURI_XRDS "\tXRDS"))
368 return; 369 return;
369 if(is_qelement(n,NSURI_XRD "\tXRD")) { 370 if(is_qelement(n,NSURI_XRD "\tXRD")) {
370 assert(xrd); 371 assert(xrd);
371 xrd->clear(); 372 xrd->clear();
372 pt_stack.push_back(n); 373 pt_stack.push_back(n);
373 }else if(xmode&xmode_html) { 374 }else if(xmode&xmode_html) {
374 html_start_element(n,a); 375 html_start_element(n,a);
375 }else{ 376 }else{
376 skipping = -1; 377 skipping = -1;
377 } 378 }
378 }else{ 379 }else{
379 int pt_s = pt_stack.size(); 380 int pt_s = pt_stack.size();
380 if(pt_s==1) { 381 if(pt_s==1) {
381 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) { 382 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
382 assert(xrd); 383 assert(xrd);
383 cdata = &(xrd->canonical_ids.add(element_priority(a),string())); 384 cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
384 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) { 385 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
385 assert(xrd); 386 assert(xrd);
386 cdata = &(xrd->local_ids.add(element_priority(a),string())); 387 cdata = &(xrd->local_ids.add(element_priority(a),string()));
387 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) { 388 }else if(is_qelement(n,NSURI_XRD "\tProviderID")) {
388 assert(xrd); 389 assert(xrd);
389 cdata = &(xrd->provider_id); 390 cdata = &(xrd->provider_id);
390 }else if(is_qelement(n,NSURI_XRD "\tService")) { 391 }else if(is_qelement(n,NSURI_XRD "\tService")) {