-rw-r--r-- | lib/discovery.cc | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc index 984e308..bd1f917 100644 --- a/lib/discovery.cc +++ b/lib/discovery.cc | |||
@@ -160,195 +160,198 @@ namespace opkele { | |||
160 | ", while looking for SEP with type '"+st->uri+"'", status_code); | 160 | ", while looking for SEP with type '"+st->uri+"'", status_code); |
161 | if(idis.xrd.canonical_ids.empty()) | 161 | if(idis.xrd.canonical_ids.empty()) |
162 | throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); | 162 | throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); |
163 | string cid = idis.xrd.canonical_ids.begin()->second; | 163 | string cid = idis.xrd.canonical_ids.begin()->second; |
164 | if(cids.find(cid)==cids.end()) { | 164 | if(cids.find(cid)==cids.end()) { |
165 | cids.insert(cid); | 165 | cids.insert(cid); |
166 | idis.clear(); | 166 | idis.clear(); |
167 | discover_at( idis, | 167 | discover_at( idis, |
168 | xri_proxy + util::url_encode(id)+ | 168 | xri_proxy + util::url_encode(id)+ |
169 | "?_xrd_t="+util::url_encode(st->uri)+ | 169 | "?_xrd_t="+util::url_encode(st->uri)+ |
170 | "&_xrd_r=application/xrd%2Bxml" | 170 | "&_xrd_r=application/xrd%2Bxml" |
171 | ";sep=true;refs=true", | 171 | ";sep=true;refs=true", |
172 | xmode_xrd ); | 172 | xmode_xrd ); |
173 | if(status_code==241) continue; | 173 | if(status_code==241) continue; |
174 | if(status_code!=100) | 174 | if(status_code!=100) |
175 | throw failed_xri_resolution(OPKELE_CP_ | 175 | throw failed_xri_resolution(OPKELE_CP_ |
176 | "XRI resolution failed with '"+status_string+"' message" | 176 | "XRI resolution failed with '"+status_string+"' message" |
177 | ", while looking for SEP with type '"+st->uri+"'" | 177 | ", while looking for SEP with type '"+st->uri+"'" |
178 | " on canonical id", status_code); | 178 | " on canonical id", status_code); |
179 | } | 179 | } |
180 | idis.canonicalized_id = cid; | 180 | idis.canonicalized_id = cid; |
181 | idis.normalized_id = rv; idis.xri_identity = true; | 181 | idis.normalized_id = rv; idis.xri_identity = true; |
182 | queue_endpoints(oi,idis,st); | 182 | queue_endpoints(oi,idis,st); |
183 | } | 183 | } |
184 | }else{ | 184 | }else{ |
185 | idis.xri_identity = false; | 185 | idis.xri_identity = false; |
186 | if(id.find("://")==string::npos) | 186 | if(id.find("://")==string::npos) |
187 | id.insert(0,"http://"); | 187 | id.insert(0,"http://"); |
188 | string::size_type fp = id.find('#'); | 188 | string::size_type fp = id.find('#'); |
189 | if(fp!=string::npos) { | 189 | if(fp!=string::npos) { |
190 | string::size_type qp = id.find('?'); | 190 | string::size_type qp = id.find('?'); |
191 | if(qp==string::npos || qp<fp) | 191 | if(qp==string::npos || qp<fp) |
192 | id.erase(fp); | 192 | id.erase(fp); |
193 | else if(qp>fp) | 193 | else if(qp>fp) |
194 | id.erase(fp,qp-fp); | 194 | id.erase(fp,qp-fp); |
195 | } | 195 | } |
196 | rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); | 196 | rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); |
197 | discover_at(idis,id,xmode_html|xmode_xrd); | 197 | discover_at(idis,id,xmode_html|xmode_xrd); |
198 | const char * eu = 0; | 198 | const char * eu = 0; |
199 | CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); | 199 | CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); |
200 | if(r) | 200 | if(r) |
201 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); | 201 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); |
202 | string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); | 202 | string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); |
203 | if(xrds_location.empty()) { | 203 | if(xrds_location.empty()) { |
204 | if(idis.xrd.empty()) | 204 | if(idis.xrd.empty()) |
205 | html2xrd(oi,idis); | 205 | html2xrd(oi,idis); |
206 | else{ | 206 | else{ |
207 | for(const service_type_t *st=op_service_types; | 207 | for(const service_type_t *st=op_service_types; |
208 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) | 208 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) |
209 | queue_endpoints(oi,idis,st); | 209 | queue_endpoints(oi,idis,st); |
210 | } | 210 | } |
211 | }else{ | 211 | }else{ |
212 | idis.clear(); | 212 | idis.clear(); |
213 | idis.canonicalized_id = cid; | 213 | idis.canonicalized_id = cid; |
214 | discover_at(idis,xrds_location,xmode_xrd); | 214 | discover_at(idis,xrds_location,xmode_xrd); |
215 | if(idis.xrd.empty()) | 215 | if(idis.xrd.empty()) |
216 | html2xrd(oi,idis); | 216 | html2xrd(oi,idis); |
217 | else{ | 217 | else{ |
218 | for(const service_type_t *st=op_service_types; | 218 | for(const service_type_t *st=op_service_types; |
219 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) | 219 | st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) |
220 | queue_endpoints(oi,idis,st); | 220 | queue_endpoints(oi,idis,st); |
221 | } | 221 | } |
222 | } | 222 | } |
223 | } | 223 | } |
224 | return rv; | 224 | return rv; |
225 | } | 225 | } |
226 | 226 | ||
227 | void discover_at(idiscovery_t& idis,const string& url,int xm) { | 227 | void discover_at(idiscovery_t& idis,const string& url,int xm) { |
228 | CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); | 228 | CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); |
229 | if(r) | 229 | if(r) |
230 | throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); | 230 | throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); |
231 | if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) | 231 | if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) |
232 | throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); | 232 | throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); |
233 | 233 | ||
234 | http_content_type.clear(); | 234 | http_content_type.clear(); |
235 | xmode = xm; | 235 | xmode = xm; |
236 | prepare_to_parse(); | 236 | prepare_to_parse(); |
237 | if(xmode&xmode_html) { | 237 | if(xmode&xmode_html) { |
238 | xrds_location.clear(); | 238 | xrds_location.clear(); |
239 | save_html.clear(); | 239 | save_html.clear(); |
240 | save_html.reserve(max_html); | 240 | save_html.reserve(max_html); |
241 | } | 241 | } |
242 | xrd = &idis.xrd; | 242 | xrd = &idis.xrd; |
243 | 243 | ||
244 | r = easy_perform(); | 244 | r = easy_perform(); |
245 | if(r && r!=CURLE_WRITE_ERROR) | 245 | if(r && r!=CURLE_WRITE_ERROR) |
246 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); | 246 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); |
247 | 247 | ||
248 | if(!parser_choked) { | 248 | if(!parser_choked) { |
249 | parse(0,0,true); | 249 | parse(0,0,true); |
250 | }else if(xmode&xmode_html){ | 250 | }else if(xmode&xmode_html){ |
251 | /* TODO: do not bother if we've seen xml */ | 251 | /* TODO: do not bother if we've seen xml */ |
252 | try { | 252 | try { |
253 | util::tidy_doc_t td = util::tidy_doc_t::create(); | 253 | util::tidy_doc_t td = util::tidy_doc_t::create(); |
254 | if(!td) | 254 | if(!td) |
255 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); | 255 | throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); |
256 | #ifndef NDEBUG | 256 | #ifdef NDEBUG |
257 | td.opt_set(TidyQuiet,false); | 257 | td.opt_set(TidyQuiet,true); |
258 | td.opt_set(TidyShowWarnings,false); | 258 | td.opt_set(TidyShowWarnings,false); |
259 | #else /* NDEBUG */ | ||
260 | td.opt_set(TidyQuiet,false); | ||
261 | td.opt_set(TidyShowWarnings,true); | ||
259 | #endif /* NDEBUG */ | 262 | #endif /* NDEBUG */ |
260 | td.opt_set(TidyForceOutput,true); | 263 | td.opt_set(TidyForceOutput,true); |
261 | td.opt_set(TidyXhtmlOut,true); | 264 | td.opt_set(TidyXhtmlOut,true); |
262 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); | 265 | td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); |
263 | td.opt_set(TidyMark,false); | 266 | td.opt_set(TidyMark,false); |
264 | td.opt_set(TidyNumEntities,true); | 267 | td.opt_set(TidyNumEntities,true); |
265 | if(td.parse_string(save_html)<=0) | 268 | if(td.parse_string(save_html)<=0) |
266 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); | 269 | throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); |
267 | if(td.clean_and_repair()<=0) | 270 | if(td.clean_and_repair()<=0) |
268 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); | 271 | throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); |
269 | util::tidy_buf_t tide; | 272 | util::tidy_buf_t tide; |
270 | if(td.save_buffer(tide)<=0) | 273 | if(td.save_buffer(tide)<=0) |
271 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); | 274 | throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); |
272 | prepare_to_parse(); | 275 | prepare_to_parse(); |
273 | parse(tide.c_str(),tide.size(),true); | 276 | parse(tide.c_str(),tide.size(),true); |
274 | }catch(exception_tidy& et) { } | 277 | }catch(exception_tidy& et) { } |
275 | } | 278 | } |
276 | save_html.clear(); | 279 | save_html.clear(); |
277 | } | 280 | } |
278 | 281 | ||
279 | void prepare_to_parse() { | 282 | void prepare_to_parse() { |
280 | (*(expat_t*)this) = parser_create_ns(); | 283 | (*(expat_t*)this) = parser_create_ns(); |
281 | set_user_data(); set_element_handler(); | 284 | set_user_data(); set_element_handler(); |
282 | set_character_data_handler(); | 285 | set_character_data_handler(); |
283 | 286 | ||
284 | if(xmode&xmode_html) { | 287 | if(xmode&xmode_html) { |
285 | html_openid1.clear(); html_openid2.clear(); | 288 | html_openid1.clear(); html_openid2.clear(); |
286 | parser_choked = false; | 289 | parser_choked = false; |
287 | } | 290 | } |
288 | 291 | ||
289 | cdata = 0; xrd_service = 0; skipping = 0; | 292 | cdata = 0; xrd_service = 0; skipping = 0; |
290 | pt_stack.clear(); | 293 | pt_stack.clear(); |
291 | status_code = 100; status_string.clear(); | 294 | status_code = 100; status_string.clear(); |
292 | } | 295 | } |
293 | 296 | ||
294 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { | 297 | void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { |
295 | XRD_t& x = id.xrd; | 298 | XRD_t& x = id.xrd; |
296 | if(!html_openid2.uris.empty()) { | 299 | if(!html_openid2.uris.empty()) { |
297 | html_openid2.types.insert(STURI_OPENID20); | 300 | html_openid2.types.insert(STURI_OPENID20); |
298 | x.services.add(-1,html_openid2); | 301 | x.services.add(-1,html_openid2); |
299 | queue_endpoints(oi,id,&op_service_types[st_index_2]); | 302 | queue_endpoints(oi,id,&op_service_types[st_index_2]); |
300 | } | 303 | } |
301 | if(!html_openid1.uris.empty()) { | 304 | if(!html_openid1.uris.empty()) { |
302 | html_openid1.types.insert(STURI_OPENID11); | 305 | html_openid1.types.insert(STURI_OPENID11); |
303 | x.services.add(-1,html_openid1); | 306 | x.services.add(-1,html_openid1); |
304 | queue_endpoints(oi,id,&op_service_types[st_index_1]); | 307 | queue_endpoints(oi,id,&op_service_types[st_index_1]); |
305 | } | 308 | } |
306 | } | 309 | } |
307 | 310 | ||
308 | size_t write(void *p,size_t s,size_t nm) { | 311 | size_t write(void *p,size_t s,size_t nm) { |
309 | /* TODO: limit total size */ | 312 | /* TODO: limit total size */ |
310 | size_t bytes = s*nm; | 313 | size_t bytes = s*nm; |
311 | const char *inbuf = (const char*)p; | 314 | const char *inbuf = (const char*)p; |
312 | if(xmode&xmode_html) { | 315 | if(xmode&xmode_html) { |
313 | size_t mbts = save_html.capacity()-save_html.size(); | 316 | size_t mbts = save_html.capacity()-save_html.size(); |
314 | size_t bts = 0; | 317 | size_t bts = 0; |
315 | if(mbts>0) { | 318 | if(mbts>0) { |
316 | bts = (bytes>mbts)?mbts:bytes; | 319 | bts = (bytes>mbts)?mbts:bytes; |
317 | save_html.append(inbuf,bts); | 320 | save_html.append(inbuf,bts); |
318 | } | 321 | } |
319 | if(skipping<0) return bts; | 322 | if(skipping<0) return bts; |
320 | } | 323 | } |
321 | if(skipping<0) return 0; | 324 | if(skipping<0) return 0; |
322 | bool rp = parse(inbuf,bytes,false); | 325 | bool rp = parse(inbuf,bytes,false); |
323 | if(!rp) { | 326 | if(!rp) { |
324 | parser_choked = true; | 327 | parser_choked = true; |
325 | skipping = -1; | 328 | skipping = -1; |
326 | if(!(xmode&xmode_html)) | 329 | if(!(xmode&xmode_html)) |
327 | bytes = 0; | 330 | bytes = 0; |
328 | } | 331 | } |
329 | return bytes; | 332 | return bytes; |
330 | } | 333 | } |
331 | size_t header(void *p,size_t s,size_t nm) { | 334 | size_t header(void *p,size_t s,size_t nm) { |
332 | size_t bytes = s*nm; | 335 | size_t bytes = s*nm; |
333 | const char *h = (const char*)p; | 336 | const char *h = (const char*)p; |
334 | const char *colon = (const char*)memchr(p,':',bytes); | 337 | const char *colon = (const char*)memchr(p,':',bytes); |
335 | const char *space = (const char*)memchr(p,' ',bytes); | 338 | const char *space = (const char*)memchr(p,' ',bytes); |
336 | if(space && ( (!colon) || space<colon ) ) { | 339 | if(space && ( (!colon) || space<colon ) ) { |
337 | xrds_location.clear(); http_content_type.clear(); | 340 | xrds_location.clear(); http_content_type.clear(); |
338 | }else if(colon) { | 341 | }else if(colon) { |
339 | const char *hv = ++colon; | 342 | const char *hv = ++colon; |
340 | size_t hnl = colon-h; | 343 | size_t hnl = colon-h; |
341 | int rb; | 344 | int rb; |
342 | for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb) ; | 345 | for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb) ; |
343 | while(rb>0 && isspace(hv[rb-1])) --rb; | 346 | while(rb>0 && isspace(hv[rb-1])) --rb; |
344 | if(rb) { | 347 | if(rb) { |
345 | if( (hnl>=sizeof(XRDS_HEADER)) | 348 | if( (hnl>=sizeof(XRDS_HEADER)) |
346 | && !strncasecmp(h,XRDS_HEADER":", | 349 | && !strncasecmp(h,XRDS_HEADER":", |
347 | sizeof(XRDS_HEADER)) ) { | 350 | sizeof(XRDS_HEADER)) ) { |
348 | xrds_location.assign(hv,rb); | 351 | xrds_location.assign(hv,rb); |
349 | }else if( (hnl>=sizeof(CT_HEADER)) | 352 | }else if( (hnl>=sizeof(CT_HEADER)) |
350 | && !strncasecmp(h,CT_HEADER":", | 353 | && !strncasecmp(h,CT_HEADER":", |
351 | sizeof(CT_HEADER)) ) { | 354 | sizeof(CT_HEADER)) ) { |
352 | const char *sc = (const char*)memchr( | 355 | const char *sc = (const char*)memchr( |
353 | hv,';',rb); | 356 | hv,';',rb); |
354 | http_content_type.assign(hv,sc?(sc-hv):rb); | 357 | http_content_type.assign(hv,sc?(sc-hv):rb); |