summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2007-08-09 00:01:12 (UTC)
committer Michael Krelin <hacker@klever.net>2007-08-09 00:01:12 (UTC)
commit94e566c912f97b4c9e47353e10e86649ee8060a4 (patch) (side-by-side diff)
treea11910bf677723a7eabccad161ec0b85426293ff
parent4696aae9da3500f600cedd482501a89d41fd27ec (diff)
downloadlibopkele-94e566c912f97b4c9e47353e10e86649ee8060a4.zip
libopkele-94e566c912f97b4c9e47353e10e86649ee8060a4.tar.gz
libopkele-94e566c912f97b4c9e47353e10e86649ee8060a4.tar.bz2
make canonicalize follow redirects
- renamed old consumer_t::canonicalize to consumer_t::normalize - made the real canonicalize virtual to allow caching layer
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--include/opkele/consumer.h11
-rw-r--r--lib/consumer.cc26
2 files changed, 34 insertions, 3 deletions
diff --git a/include/opkele/consumer.h b/include/opkele/consumer.h
index fdb6119..042e2d1 100644
--- a/include/opkele/consumer.h
+++ b/include/opkele/consumer.h
@@ -130,9 +130,16 @@ namespace opkele {
/**
- * make URL canonical, by adding http:// and trailing slash, if needed.
+ * normalize URL by adding http:// and trailing slash if needed.
+ * @param url
+ * @return normalized url
+ */
+ static string normalize(const string& url);
+
+ /**
+ * Canonicalize URL, by normalizing its appearance and following redirects.
* @param url
* @return canonicalized url
*/
- static string canonicalize(const string& url);
+ virtual string canonicalize(const string& url);
};
diff --git a/lib/consumer.cc b/lib/consumer.cc
index 282f0cc..dd8e150 100644
--- a/lib/consumer.cc
+++ b/lib/consumer.cc
@@ -311,5 +311,5 @@ namespace opkele {
}
- string consumer_t::canonicalize(const string& url) {
+ string consumer_t::normalize(const string& url) {
string rv = url;
// strip leading and trailing spaces
@@ -343,3 +343,27 @@ namespace opkele {
}
+ string consumer_t::canonicalize(const string& url) {
+ string rv = normalize(url);
+ curl_t curl = curl_easy_init();
+ if(!curl)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()");
+ string html;
+ CURLcode r;
+ (r=curl_misc_sets(curl))
+ || (r=curl_easy_setopt(curl,CURLOPT_URL,rv.c_str()))
+ || (r=curl_easy_setopt(curl,CURLOPT_NOBODY,1))
+ ;
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r);
+ r = curl_easy_perform(curl);
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r);
+ const char *eu = 0;
+ r = curl_easy_getinfo(curl,CURLINFO_EFFECTIVE_URL,&eu);
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_getinfo(..CURLINFO_EFFECTIVE_URL..)",r);
+ rv = eu;
+ return normalize(rv);
+ }
+
}