-rw-r--r-- | include/opkele/consumer.h | 11 | ||||
-rw-r--r-- | lib/consumer.cc | 26 |
2 files changed, 34 insertions, 3 deletions
diff --git a/include/opkele/consumer.h b/include/opkele/consumer.h index fdb6119..042e2d1 100644 --- a/include/opkele/consumer.h +++ b/include/opkele/consumer.h @@ -130,9 +130,16 @@ namespace opkele { /** - * make URL canonical, by adding http:// and trailing slash, if needed. + * normalize URL by adding http:// and trailing slash if needed. + * @param url + * @return normalized url + */ + static string normalize(const string& url); + + /** + * Canonicalize URL, by normalizing its appearance and following redirects. * @param url * @return canonicalized url */ - static string canonicalize(const string& url); + virtual string canonicalize(const string& url); }; diff --git a/lib/consumer.cc b/lib/consumer.cc index 282f0cc..dd8e150 100644 --- a/lib/consumer.cc +++ b/lib/consumer.cc @@ -311,5 +311,5 @@ namespace opkele { } - string consumer_t::canonicalize(const string& url) { + string consumer_t::normalize(const string& url) { string rv = url; // strip leading and trailing spaces @@ -343,3 +343,27 @@ namespace opkele { } + string consumer_t::canonicalize(const string& url) { + string rv = normalize(url); + curl_t curl = curl_easy_init(); + if(!curl) + throw exception_curl(OPKELE_CP_ "failed to curl_easy_init()"); + string html; + CURLcode r; + (r=curl_misc_sets(curl)) + || (r=curl_easy_setopt(curl,CURLOPT_URL,rv.c_str())) + || (r=curl_easy_setopt(curl,CURLOPT_NOBODY,1)) + ; + if(r) + throw exception_curl(OPKELE_CP_ "failed to curl_easy_setopt()",r); + r = curl_easy_perform(curl); + if(r) + throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); + const char *eu = 0; + r = curl_easy_getinfo(curl,CURLINFO_EFFECTIVE_URL,&eu); + if(r) + throw exception_curl(OPKELE_CP_ "failed to curl_easy_getinfo(..CURLINFO_EFFECTIVE_URL..)",r); + rv = eu; + return normalize(rv); + } + } |