summaryrefslogtreecommitdiffabout
path: root/lib
authorMichael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
committer Michael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
commit5f1d69ac7753243b93761944e9444f01d8a7e5dd (patch) (unidiff)
tree43ded5d2da27a54f3fa806ddc6f054970d3cb6fc /lib
parent429c48d3d08e6c2f6c385d8975f7b5bf5e67acf3 (diff)
downloadlibopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.zip
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.gz
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.bz2
robustness improvements in link descovery mechanisms
accept stray spaces in link tags and inside relevant attribute values. Thanks, Gen, for pointing it out. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/consumer.cc8
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/consumer.cc b/lib/consumer.cc
index 299b3bc..ff5da91 100644
--- a/lib/consumer.cc
+++ b/lib/consumer.cc
@@ -266,52 +266,52 @@ namespace opkele {
266 } 266 }
267 throw failed_check_authentication(OPKELE_CP_ "failed to verify response"); 267 throw failed_check_authentication(OPKELE_CP_ "failed to verify response");
268 } 268 }
269 269
270 void consumer_t::retrieve_links(const string& url,string& server,string& delegate) { 270 void consumer_t::retrieve_links(const string& url,string& server,string& delegate) {
271#if defined(USE_LIBPCRECPP) || defined(USE_PCREPP) 271#if defined(USE_LIBPCRECPP) || defined(USE_PCREPP)
272 server.erase(); 272 server.erase();
273 delegate.erase(); 273 delegate.erase();
274 curl_t curl = curl_easy_init(); 274 curl_t curl = curl_easy_init();
275 if(!curl) 275 if(!curl)
276 throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()"); 276 throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()");
277 string html; 277 string html;
278 CURLcode r; 278 CURLcode r;
279 (r=curl_misc_sets(curl)) 279 (r=curl_misc_sets(curl))
280 || (r=curl_easy_setopt(curl,CURLOPT_URL,url.c_str())) 280 || (r=curl_easy_setopt(curl,CURLOPT_URL,url.c_str()))
281 || (r=curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_curl_tostring)) 281 || (r=curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_curl_tostring))
282 || (r=curl_easy_setopt(curl,CURLOPT_WRITEDATA,&html)) 282 || (r=curl_easy_setopt(curl,CURLOPT_WRITEDATA,&html))
283 ; 283 ;
284 if(r) 284 if(r)
285 throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r); 285 throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r);
286 r = curl_easy_perform(curl); 286 r = curl_easy_perform(curl);
287 if(r && r!=CURLE_WRITE_ERROR) 287 if(r && r!=CURLE_WRITE_ERROR)
288 throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); 288 throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r);
289 // strip out everything past body 289 // strip out everything past body
290 static const char *re_hdre = "<head[^>]*>", 290 static const char *re_hdre = "<\\s*head[^>]*>",
291 *re_lre = "<link\\b([^>]+)>", 291 *re_lre = "<\\s*link\\b([^>]+)>",
292 *re_rre = "\\brel=['\"]([^'\"]+)['\"]", 292 *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]",
293 *re_hre = "\\bhref=['\"]([^'\"]+)['\"]"; 293 *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]";
294#if defined(USE_LIBPCRECPP) 294#if defined(USE_LIBPCRECPP)
295 static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL); 295 static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL);
296 static pcrecpp::RE 296 static pcrecpp::RE
297 bre("<body\\b.*",ro), hdre(re_hdre,ro), 297 bre("<body\\b.*",ro), hdre(re_hdre,ro),
298 lre(re_lre,ro), rre(re_rre), hre(re_hre,ro); 298 lre(re_lre,ro), rre(re_rre), hre(re_hre,ro);
299 bre.Replace("",&html); 299 bre.Replace("",&html);
300 pcrecpp::StringPiece hpiece(html); 300 pcrecpp::StringPiece hpiece(html);
301 if(!hdre.FindAndConsume(&hpiece)) 301 if(!hdre.FindAndConsume(&hpiece))
302 throw bad_input(OPKELE_CP_ "failed to find head"); 302 throw bad_input(OPKELE_CP_ "failed to find head");
303 string attrs; 303 string attrs;
304 while(lre.FindAndConsume(&hpiece,&attrs)) { 304 while(lre.FindAndConsume(&hpiece,&attrs)) {
305 pcrecpp::StringPiece rel, href; 305 pcrecpp::StringPiece rel, href;
306 if(!(rre.PartialMatch(attrs,&rel) && hre.PartialMatch(attrs,&href))) 306 if(!(rre.PartialMatch(attrs,&rel) && hre.PartialMatch(attrs,&href)))
307 continue; 307 continue;
308 if(rel=="openid.server") { 308 if(rel=="openid.server") {
309 href.CopyToString(&server); 309 href.CopyToString(&server);
310 if(!delegate.empty()) 310 if(!delegate.empty())
311 break; 311 break;
312 }else if(rel=="openid.delegate") { 312 }else if(rel=="openid.delegate") {
313 href.CopyToString(&delegate); 313 href.CopyToString(&delegate);
314 if(!server.empty()) 314 if(!server.empty())
315 break; 315 break;
316 } 316 }
317 } 317 }