author | Michael Krelin <hacker@klever.net> | 2007-09-14 22:52:21 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2007-09-14 22:52:21 (UTC) |
commit | 5f1d69ac7753243b93761944e9444f01d8a7e5dd (patch) (unidiff) | |
tree | 43ded5d2da27a54f3fa806ddc6f054970d3cb6fc | |
parent | 429c48d3d08e6c2f6c385d8975f7b5bf5e67acf3 (diff) | |
download | libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.zip libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.gz libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.bz2 |
robustness improvements in link descovery mechanisms
accept stray spaces in link tags and inside relevant attribute values.
Thanks, Gen, for pointing it out.
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | lib/consumer.cc | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/consumer.cc b/lib/consumer.cc index 299b3bc..ff5da91 100644 --- a/lib/consumer.cc +++ b/lib/consumer.cc | |||
@@ -266,52 +266,52 @@ namespace opkele { | |||
266 | } | 266 | } |
267 | throw failed_check_authentication(OPKELE_CP_ "failed to verify response"); | 267 | throw failed_check_authentication(OPKELE_CP_ "failed to verify response"); |
268 | } | 268 | } |
269 | 269 | ||
270 | void consumer_t::retrieve_links(const string& url,string& server,string& delegate) { | 270 | void consumer_t::retrieve_links(const string& url,string& server,string& delegate) { |
271 | #if defined(USE_LIBPCRECPP) || defined(USE_PCREPP) | 271 | #if defined(USE_LIBPCRECPP) || defined(USE_PCREPP) |
272 | server.erase(); | 272 | server.erase(); |
273 | delegate.erase(); | 273 | delegate.erase(); |
274 | curl_t curl = curl_easy_init(); | 274 | curl_t curl = curl_easy_init(); |
275 | if(!curl) | 275 | if(!curl) |
276 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()"); | 276 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()"); |
277 | string html; | 277 | string html; |
278 | CURLcode r; | 278 | CURLcode r; |
279 | (r=curl_misc_sets(curl)) | 279 | (r=curl_misc_sets(curl)) |
280 | || (r=curl_easy_setopt(curl,CURLOPT_URL,url.c_str())) | 280 | || (r=curl_easy_setopt(curl,CURLOPT_URL,url.c_str())) |
281 | || (r=curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_curl_tostring)) | 281 | || (r=curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_curl_tostring)) |
282 | || (r=curl_easy_setopt(curl,CURLOPT_WRITEDATA,&html)) | 282 | || (r=curl_easy_setopt(curl,CURLOPT_WRITEDATA,&html)) |
283 | ; | 283 | ; |
284 | if(r) | 284 | if(r) |
285 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r); | 285 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r); |
286 | r = curl_easy_perform(curl); | 286 | r = curl_easy_perform(curl); |
287 | if(r && r!=CURLE_WRITE_ERROR) | 287 | if(r && r!=CURLE_WRITE_ERROR) |
288 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); | 288 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); |
289 | // strip out everything past body | 289 | // strip out everything past body |
290 | static const char *re_hdre = "<head[^>]*>", | 290 | static const char *re_hdre = "<\\s*head[^>]*>", |
291 | *re_lre = "<link\\b([^>]+)>", | 291 | *re_lre = "<\\s*link\\b([^>]+)>", |
292 | *re_rre = "\\brel=['\"]([^'\"]+)['\"]", | 292 | *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]", |
293 | *re_hre = "\\bhref=['\"]([^'\"]+)['\"]"; | 293 | *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]"; |
294 | #if defined(USE_LIBPCRECPP) | 294 | #if defined(USE_LIBPCRECPP) |
295 | static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL); | 295 | static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL); |
296 | static pcrecpp::RE | 296 | static pcrecpp::RE |
297 | bre("<body\\b.*",ro), hdre(re_hdre,ro), | 297 | bre("<body\\b.*",ro), hdre(re_hdre,ro), |
298 | lre(re_lre,ro), rre(re_rre), hre(re_hre,ro); | 298 | lre(re_lre,ro), rre(re_rre), hre(re_hre,ro); |
299 | bre.Replace("",&html); | 299 | bre.Replace("",&html); |
300 | pcrecpp::StringPiece hpiece(html); | 300 | pcrecpp::StringPiece hpiece(html); |
301 | if(!hdre.FindAndConsume(&hpiece)) | 301 | if(!hdre.FindAndConsume(&hpiece)) |
302 | throw bad_input(OPKELE_CP_ "failed to find head"); | 302 | throw bad_input(OPKELE_CP_ "failed to find head"); |
303 | string attrs; | 303 | string attrs; |
304 | while(lre.FindAndConsume(&hpiece,&attrs)) { | 304 | while(lre.FindAndConsume(&hpiece,&attrs)) { |
305 | pcrecpp::StringPiece rel, href; | 305 | pcrecpp::StringPiece rel, href; |
306 | if(!(rre.PartialMatch(attrs,&rel) && hre.PartialMatch(attrs,&href))) | 306 | if(!(rre.PartialMatch(attrs,&rel) && hre.PartialMatch(attrs,&href))) |
307 | continue; | 307 | continue; |
308 | if(rel=="openid.server") { | 308 | if(rel=="openid.server") { |
309 | href.CopyToString(&server); | 309 | href.CopyToString(&server); |
310 | if(!delegate.empty()) | 310 | if(!delegate.empty()) |
311 | break; | 311 | break; |
312 | }else if(rel=="openid.delegate") { | 312 | }else if(rel=="openid.delegate") { |
313 | href.CopyToString(&delegate); | 313 | href.CopyToString(&delegate); |
314 | if(!server.empty()) | 314 | if(!server.empty()) |
315 | break; | 315 | break; |
316 | } | 316 | } |
317 | } | 317 | } |