summaryrefslogtreecommitdiffabout
path: root/lib/consumer.cc
authorMichael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
committer Michael Krelin <hacker@klever.net>2007-09-14 22:52:21 (UTC)
commit5f1d69ac7753243b93761944e9444f01d8a7e5dd (patch) (side-by-side diff)
tree43ded5d2da27a54f3fa806ddc6f054970d3cb6fc /lib/consumer.cc
parent429c48d3d08e6c2f6c385d8975f7b5bf5e67acf3 (diff)
downloadlibopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.zip
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.gz
libopkele-5f1d69ac7753243b93761944e9444f01d8a7e5dd.tar.bz2
robustness improvements in link descovery mechanisms
accept stray spaces in link tags and inside relevant attribute values. Thanks, Gen, for pointing it out. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib/consumer.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/consumer.cc8
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/consumer.cc b/lib/consumer.cc
index 299b3bc..ff5da91 100644
--- a/lib/consumer.cc
+++ b/lib/consumer.cc
@@ -287,10 +287,10 @@ namespace opkele {
if(r && r!=CURLE_WRITE_ERROR)
throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r);
// strip out everything past body
- static const char *re_hdre = "<head[^>]*>",
- *re_lre = "<link\\b([^>]+)>",
- *re_rre = "\\brel=['\"]([^'\"]+)['\"]",
- *re_hre = "\\bhref=['\"]([^'\"]+)['\"]";
+ static const char *re_hdre = "<\\s*head[^>]*>",
+ *re_lre = "<\\s*link\\b([^>]+)>",
+ *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]",
+ *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]";
#if defined(USE_LIBPCRECPP)
static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL);
static pcrecpp::RE