summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
committer Michael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
commit5f1d69ac7753243b93761944e9444f01d8a7e5dd (patch) (unidiff)
tree43ded5d2da27a54f3fa806ddc6f054970d3cb6fc
parent429c48d3d08e6c2f6c385d8975f7b5bf5e67acf3 (diff)
downloadlibopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.zip
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.gz
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.bz2
robustness improvements in link descovery mechanisms
accept stray spaces in link tags and inside relevant attribute values. Thanks, Gen, for pointing it out. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--lib/consumer.cc8
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/consumer.cc b/lib/consumer.cc
index 299b3bc..ff5da91 100644
--- a/lib/consumer.cc
+++ b/lib/consumer.cc
@@ -284,16 +284,16 @@ namespace opkele {
284 if(r) 284 if(r)
285 throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r); 285 throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r);
286 r = curl_easy_perform(curl); 286 r = curl_easy_perform(curl);
287 if(r && r!=CURLE_WRITE_ERROR) 287 if(r && r!=CURLE_WRITE_ERROR)
288 throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); 288 throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r);
289 // strip out everything past body 289 // strip out everything past body
290 static const char *re_hdre = "<head[^>]*>", 290 static const char *re_hdre = "<\\s*head[^>]*>",
291 *re_lre = "<link\\b([^>]+)>", 291 *re_lre = "<\\s*link\\b([^>]+)>",
292 *re_rre = "\\brel=['\"]([^'\"]+)['\"]", 292 *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]",
293 *re_hre = "\\bhref=['\"]([^'\"]+)['\"]"; 293 *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]";
294#if defined(USE_LIBPCRECPP) 294#if defined(USE_LIBPCRECPP)
295 static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL); 295 static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL);
296 static pcrecpp::RE 296 static pcrecpp::RE
297 bre("<body\\b.*",ro), hdre(re_hdre,ro), 297 bre("<body\\b.*",ro), hdre(re_hdre,ro),
298 lre(re_lre,ro), rre(re_rre), hre(re_hre,ro); 298 lre(re_lre,ro), rre(re_rre), hre(re_hre,ro);
299 bre.Replace("",&html); 299 bre.Replace("",&html);