summaryrefslogtreecommitdiffabout
Side-by-side diff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--include/opkele/consumer.h11
-rw-r--r--lib/consumer.cc26
2 files changed, 34 insertions, 3 deletions
diff --git a/include/opkele/consumer.h b/include/opkele/consumer.h
index fdb6119..042e2d1 100644
--- a/include/opkele/consumer.h
+++ b/include/opkele/consumer.h
@@ -130,9 +130,16 @@ namespace opkele {
/**
- * make URL canonical, by adding http:// and trailing slash, if needed.
+ * normalize URL by adding http:// and trailing slash if needed.
+ * @param url
+ * @return normalized url
+ */
+ static string normalize(const string& url);
+
+ /**
+ * Canonicalize URL, by normalizing its appearance and following redirects.
* @param url
* @return canonicalized url
*/
- static string canonicalize(const string& url);
+ virtual string canonicalize(const string& url);
};
diff --git a/lib/consumer.cc b/lib/consumer.cc
index 282f0cc..dd8e150 100644
--- a/lib/consumer.cc
+++ b/lib/consumer.cc
@@ -311,5 +311,5 @@ namespace opkele {
}
- string consumer_t::canonicalize(const string& url) {
+ string consumer_t::normalize(const string& url) {
string rv = url;
// strip leading and trailing spaces
@@ -343,3 +343,27 @@ namespace opkele {
}
+ string consumer_t::canonicalize(const string& url) {
+ string rv = normalize(url);
+ curl_t curl = curl_easy_init();
+ if(!curl)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()");
+ string html;
+ CURLcode r;
+ (r=curl_misc_sets(curl))
+ || (r=curl_easy_setopt(curl,CURLOPT_URL,rv.c_str()))
+ || (r=curl_easy_setopt(curl,CURLOPT_NOBODY,1))
+ ;
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r);
+ r = curl_easy_perform(curl);
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r);
+ const char *eu = 0;
+ r = curl_easy_getinfo(curl,CURLINFO_EFFECTIVE_URL,&eu);
+ if(r)
+ throw exception_curl(OPKELE_CP_ "failed to curl_easy_getinfo(..CURLINFO_EFFECTIVE_URL..)",r);
+ rv = eu;
+ return normalize(rv);
+ }
+
}