summaryrefslogtreecommitdiffabout
path: root/lib
authorMichael Krelin <hacker@klever.net>2007-12-02 21:48:18 (UTC)
committer Michael Krelin <hacker@klever.net>2007-12-02 21:51:08 (UTC)
commit262f1579f0a9138a01f06afea06d00155cefd4b5 (patch) (unidiff)
treefb4db0ee7b679a1957c63abbe6f6af1d2fa82531 /lib
parent73d98f3652b498b9a74b183bef395714c7d73fda (diff)
downloadlibopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.zip
libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.gz
libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.bz2
first cut on XRI resolver
This commit adds openid service resolver that does discovery using XRI (proxy only), Yadis protocol and html-based discovery. It uses expat as xml parsing engine, which makes it a bit more strict about html it receives, but I think failing to discover links in *severely* broken html is better than misdetecting links, hidden in comments or such. This is highly experimental code and needs more thoughts and testing. Thanks everyone pushing me towards this development. Namely Joseph, John, Gen. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/Makefile.am3
-rw-r--r--lib/openid_service_resolver.cc294
-rw-r--r--lib/util.cc16
3 files changed, 308 insertions, 5 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 7309353..b278faf 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,29 +1,30 @@
1lib_LTLIBRARIES = libopkele.la 1lib_LTLIBRARIES = libopkele.la
2 2
3DEFAULT_INCLUDES = -I${top_builddir} 3DEFAULT_INCLUDES = -I${top_builddir}
4INCLUDES = \ 4INCLUDES = \
5 -I${top_srcdir}/include/ \ 5 -I${top_srcdir}/include/ \
6 ${KONFORKA_CFLAGS} \ 6 ${KONFORKA_CFLAGS} \
7 ${OPENSSL_CFLAGS} \ 7 ${OPENSSL_CFLAGS} \
8 ${LIBCURL_CPPFLAGS} \ 8 ${LIBCURL_CPPFLAGS} \
9 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} 9 ${PCRE_CFLAGS} ${EXPAT_CFLAGS}
10libopkele_la_LIBADD = \ 10libopkele_la_LIBADD = \
11 ${LIBCURL} \ 11 ${LIBCURL} \
12 ${PCRE_LIBS} ${EXPAT_LIBS} \ 12 ${PCRE_LIBS} ${EXPAT_LIBS} \
13 ${OPENSSL_LIBS} \ 13 ${OPENSSL_LIBS} \
14 ${KONFORKA_LIBS} 14 ${KONFORKA_LIBS}
15 15
16libopkele_la_SOURCES = \ 16libopkele_la_SOURCES = \
17 params.cc \ 17 params.cc \
18 util.cc \ 18 util.cc \
19 server.cc \ 19 server.cc \
20 secret.cc \ 20 secret.cc \
21 data.cc \ 21 data.cc \
22 consumer.cc \ 22 consumer.cc \
23 exception.cc \ 23 exception.cc \
24 extension.cc \ 24 extension.cc \
25 sreg.cc \ 25 sreg.cc \
26 extension_chain.cc \ 26 extension_chain.cc \
27 curl.cc expat.cc 27 curl.cc expat.cc \
28 openid_service_resolver.cc
28libopkele_la_LDFLAGS = \ 29libopkele_la_LDFLAGS = \
29 -version-info 2:0:0 30 -version-info 2:0:0
diff --git a/lib/openid_service_resolver.cc b/lib/openid_service_resolver.cc
new file mode 100644
index 0000000..5f82955
--- a/dev/null
+++ b/lib/openid_service_resolver.cc
@@ -0,0 +1,294 @@
1#include <cctype>
2#include <opkele/exception.h>
3#include <opkele/util.h>
4#include <opkele/openid_service_resolver.h>
5#include <opkele/uris.h>
6
7#define LOCATION_HEADER "X-XRDS-Location"
8
9namespace opkele {
10 static const char *whitespace = " \t\r\n";
11
12 openid_service_resolver_t::openid_service_resolver_t(const string& xp)
13 : util::curl_t(easy_init()),
14 util::expat_t(0),
15 xri_proxy(xp.empty()?"http://beta.xri.net/":xp)
16 {
17 CURLcode r;
18 (r=misc_sets())
19 || (r=set_write())
20 || (r==set_header())
21 ;
22 if(r)
23 throw opkele::exception_curl(OPKELE_CP_ "failed to set curly options",r);
24 }
25
26 static bool is_element(const XML_Char *n,const char *en) {
27 if(!strcasecmp(n,en)) return true;
28 int nl = strlen(n), enl = strlen(en);
29 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
30 && !strcasecmp(&n[nl-enl],en) )
31 return true;
32 return false;
33 }
34 static inline bool is_qelement(const XML_Char *n,const char *qen) {
35 return !strcasecmp(n,qen);
36 }
37 static inline bool is_element(
38 const openid_service_resolver_t::parser_node_t& n,
39 const char *en) {
40 return is_element(n.element.c_str(),en);
41 }
42 static inline bool is_qelement(
43 const openid_service_resolver_t::parser_node_t& n,
44 const char *qen) {
45 return is_qelement(n.element.c_str(),qen);
46 }
47
48 void openid_service_resolver_t::start_element(const XML_Char *n,const XML_Char **a) {
49 if(state!=state_parse) return;
50 tree.push(n,a);
51 parser_node_t& t = tree.top();
52 if(is_element(n,"html") || is_element(n,"head")
53 || is_qelement(n,NSURI_XRDS "\tXRDS")
54 || is_qelement(n,NSURI_XRD "\tXRD") )
55 t.skip_tags = false;
56 else if(is_qelement(n,NSURI_XRD "\tService")
57 || is_qelement(n,NSURI_XRD "\tType")
58 || is_qelement(n,NSURI_XRD "\tURI")
59 || is_qelement(n,NSURI_OPENID10 "\tDelegate")
60 || is_qelement(n,NSURI_XRD "\tCanonicalID") )
61 t.skip_tags = t.skip_text = false;
62 else if(is_element(n,"body"))
63 state = state_stopping_body;
64 }
65 void openid_service_resolver_t::end_element(const XML_Char *n) {
66 if(state!=state_parse) return;
67 assert(tree.top().element == n);
68 pop_tag();
69 }
70 void openid_service_resolver_t::character_data(const XML_Char *s,int l) {
71 if(state!=state_parse) return;
72 if( !( tree.empty() || tree.top().skip_text ) )
73 tree.top().content.append(s,l);
74 }
75
76 static void copy_trim_whitespace(string& to,const string& from) {
77 string::size_type ns0 = from.find_first_not_of(whitespace);
78 if(ns0==string::npos) {
79 to.clear(); return;
80 }
81 string::size_type ns1 = from.find_last_not_of(whitespace);
82 assert(ns1!=string::npos);
83 to.assign(from,ns0,ns1-ns0+1);
84 }
85
86 void openid_service_resolver_t::pop_tag() {
87 assert(!tree.empty());
88 parser_node_t& t = tree.top();
89 if( is_element(t,"meta")
90 && !strcasecmp(t.attrs["http-equiv"].c_str(),LOCATION_HEADER) ) {
91 xrds_location = t.attrs["content"];
92 }else if( is_element(t,"link") ) {
93 parser_node_t::attrs_t::const_iterator ir = t.attrs.find("rel");
94 if(ir!=t.attrs.end()) {
95 const string& rels = ir->second;
96 for(string::size_type ns = rels.find_first_not_of(whitespace);
97 ns!=string::npos;
98 ns=rels.find_first_not_of(whitespace,ns)) {
99 string::size_type s = rels.find_first_of(whitespace,ns);
100 string rel;
101 if(s==string::npos) {
102 rel.assign(rels,ns,string::npos);
103 ns = string::npos;
104 }else{
105 rel.assign(rels,ns,s-ns);
106 ns = s;
107 }
108 if(rel=="openid.server")
109 copy_trim_whitespace(html_SEP.xrd_URI,t.attrs["href"]);
110 else if(rel=="openid.delegate")
111 copy_trim_whitespace(html_SEP.openid_Delegate,t.attrs["href"]);
112 }
113 }
114 }else if( is_element(t,"head") )
115 state = state_stopping_head;
116 else if( is_qelement(t,NSURI_XRD "\tXRD")) {
117 if( !(
118 (
119 xri_mode
120 && t.auth_info.canonical_id.empty()
121 ) ||
122 t.auth_info.auth_SEP.xrd_Type.empty()
123 ) )
124 auth_info = t.auth_info;
125 }else if( tree.size()>1 ) {
126 parser_node_t& p = tree.parent();
127 if( is_qelement(p,NSURI_XRD "\tService") ) {
128 if( is_qelement(t,NSURI_XRD "\tType") ) {
129 if(t.content==STURI_OPENID10) {
130 string tmp; copy_trim_whitespace(tmp,t.content);
131 p.auth_info.auth_SEP.xrd_Type.insert(tmp);
132 }
133 }else if( is_qelement(t,NSURI_XRD "\tURI") )
134 copy_trim_whitespace(p.auth_info.auth_SEP.xrd_URI,t.content);
135 else if( is_qelement(t,NSURI_OPENID10 "\tDelegate") )
136 copy_trim_whitespace(p.auth_info.auth_SEP.openid_Delegate,t.content);
137 }else if( is_qelement(p,NSURI_XRD "\tXRD") ) {
138 if(is_qelement(t,NSURI_XRD "\tService") ) {
139 if( !t.auth_info.auth_SEP.xrd_Type.empty() ) {
140 parser_node_t::attrs_t::const_iterator ip
141 = t.attrs.find("priority");
142 if(ip!=t.attrs.end()) {
143 const char *nptr = ip->second.c_str();
144 char *eptr = 0;
145 t.auth_info.auth_SEP.priority = strtol(nptr,&eptr,10);
146 if(nptr==eptr)
147 t.auth_info.auth_SEP.priority = LONG_MAX;
148 }
149 if( (t.auth_info.auth_SEP.priority < p.auth_info.auth_SEP.priority)
150 || p.auth_info.auth_SEP.xrd_Type.empty() )
151 p.auth_info.auth_SEP = t.auth_info.auth_SEP;
152 }
153 }else if( is_qelement(t,NSURI_XRD "\tCanonicalID") )
154 copy_trim_whitespace(p.auth_info.canonical_id,t.content);
155 }
156 }
157
158 tree.pop();
159 }
160
161 size_t openid_service_resolver_t::write(void *p,size_t s,size_t nm) {
162 if(state != state_parse)
163 return 0;
164 /* TODO: limit total size */
165 size_t bytes = s*nm;
166 parse((const char *)p,bytes,false);
167 return bytes;
168 }
169
170 size_t openid_service_resolver_t::header(void *p,size_t s,size_t nm) {
171 size_t bytes = s*nm;
172 const char *h = (const char *)p;
173 const char *colon = (const char*)memchr(p,':',bytes);
174 const char *space = (const char*)memchr(p,' ',bytes);
175 if(space && ( (!colon) || space<colon ) ) {
176 xrds_location.clear(); http_content_type.clear();
177 }else if(colon) {
178 const char *hv = ++colon;
179 int hnl = colon-h;
180 int rb;
181 for(rb = bytes-hnl-1;
182 rb>0 && isspace(*hv);
183 ++hv,--rb );
184 while(rb>0 && isspace(hv[rb-1]))
185 --rb;
186 if(rb) {
187 if( (hnl >= sizeof(LOCATION_HEADER))
188 && !strncasecmp(h,LOCATION_HEADER ":",
189 sizeof(LOCATION_HEADER)) ) {
190 xrds_location.assign(hv,rb);
191 }else if( (hnl >= sizeof("Content-Type"))
192 && !strncasecmp(h,"Content-Type:",
193 sizeof("Content-Type")) ) {
194 const char *sc = (const char*)memchr(
195 hv,';',rb);
196 http_content_type.assign(
197 hv,sc?(sc-hv):rb );
198 }
199 }
200 }
201 return curl_t::header(p,s,nm);
202 }
203
204 void openid_service_resolver_t::discover_service(const string& url,bool xri) {
205 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
206 if(r)
207 throw opkele::exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
208
209 (*(expat_t*)this) = parser_create_ns();
210 set_user_data(); set_element_handler();
211 set_character_data_handler();
212 tree.clear();
213 state = state_parse;
214
215 r = easy_perform();
216 if(r && r!=CURLE_WRITE_ERROR)
217 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
218
219 parse(0,0,true);
220 while(!tree.empty()) pop_tag();
221 }
222
223 const openid_auth_info_t& openid_service_resolver_t::resolve(const string& id) {
224 auth_info = openid_auth_info_t();
225 html_SEP = openid_auth_SEP_t();
226
227 string::size_type fns = id.find_first_not_of(whitespace);
228 if(fns==string::npos)
229 throw opkele::bad_input(OPKELE_CP_ "whitespace-only identity");
230 string::size_type lns = id.find_last_not_of(whitespace);
231 assert(lns!=string::npos);
232 if(!strncasecmp(
233 id.c_str()+fns,"xri://",
234 sizeof("xri://")-1))
235 fns+=sizeof("xri://")-1;
236 string nid(id,fns,lns-fns+1);
237 if(nid.empty())
238 throw opkele::bad_input(OPKELE_CP_ "nothing significant in identity");
239 if(strchr("=@+$!(",*nid.c_str())) {
240 discover_service(
241 xri_proxy + util::url_encode(nid) +
242 "?_xrd_t=" STURI_OPENID10 "&_xrd_r=application/xrd+xml;sep=true",
243 true );
244 if(auth_info.canonical_id.empty()
245 || auth_info.auth_SEP.xrd_Type.empty() )
246 throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service for XRI found");
247 return auth_info;
248 }else{
249 const char *np = nid.c_str();
250 if( (strncasecmp(np,"http",4) || strncmp(
251 tolower(*(np+4))=='s'? np+5 : np+4, "://", 3))
252#ifndef NDEBUG
253 && strncasecmp(np,"file:///",sizeof("file:///")-1)
254 #endif /* XXX: or how do I let tests work? */
255 )
256 nid.insert(0,"http://");
257 string::size_type fp = nid.find('#');
258 if(fp!=string::npos) {
259 string::size_type qp = nid.find('?');
260 if(qp==string::npos || qp<fp) {
261 nid.erase(fp);
262 }else if(qp>fp)
263 nid.erase(fp,qp-fp);
264 }
265 discover_service(nid);
266 const char *eu = 0;
267 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
268 if(r)
269 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
270 string canonicalized_id = util::rfc_3986_normalize_uri(eu);
271 if(xrds_location.empty()) {
272 if(auth_info.auth_SEP.xrd_Type.empty()) {
273 if(html_SEP.xrd_URI.empty())
274 throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service discovered");
275 auth_info.auth_SEP = html_SEP;
276 auth_info.auth_SEP.xrd_Type.clear(); auth_info.auth_SEP.xrd_Type.insert( STURI_OPENID10 );
277 auth_info.canonical_id = canonicalized_id;
278 }else{
279 if(auth_info.canonical_id.empty())
280 auth_info.canonical_id = canonicalized_id;
281 }
282 return auth_info;
283 }else{
284 discover_service(xrds_location);
285 if(auth_info.auth_SEP.xrd_Type.empty())
286 throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service found in Yadis document");
287 if(auth_info.canonical_id.empty())
288 auth_info.canonical_id = canonicalized_id;
289 return auth_info;
290 }
291 }
292 }
293
294}
diff --git a/lib/util.cc b/lib/util.cc
index ac70938..69d37b5 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -1,295 +1,303 @@
1#include <errno.h> 1#include <errno.h>
2#include <cassert> 2#include <cassert>
3#include <cctype> 3#include <cctype>
4#include <cstring> 4#include <cstring>
5#include <vector> 5#include <vector>
6#include <string> 6#include <string>
7#include <stack> 7#include <stack>
8#include <openssl/bio.h> 8#include <openssl/bio.h>
9#include <openssl/evp.h> 9#include <openssl/evp.h>
10#include <curl/curl.h> 10#include <curl/curl.h>
11#include "opkele/util.h" 11#include "opkele/util.h"
12#include "opkele/exception.h" 12#include "opkele/exception.h"
13 13
14namespace opkele { 14namespace opkele {
15 using namespace std; 15 using namespace std;
16 16
17 namespace util { 17 namespace util {
18 18
19 /* 19 /*
20 * base64 20 * base64
21 */ 21 */
22 string encode_base64(const void *data,size_t length) { 22 string encode_base64(const void *data,size_t length) {
23 BIO *b64 = 0, *bmem = 0; 23 BIO *b64 = 0, *bmem = 0;
24 try { 24 try {
25 b64 = BIO_new(BIO_f_base64()); 25 b64 = BIO_new(BIO_f_base64());
26 if(!b64) 26 if(!b64)
27 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); 27 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder");
28 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); 28 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL);
29 bmem = BIO_new(BIO_s_mem()); 29 bmem = BIO_new(BIO_s_mem());
30 BIO_set_flags(b64,BIO_CLOSE); 30 BIO_set_flags(b64,BIO_CLOSE);
31 if(!bmem) 31 if(!bmem)
32 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); 32 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer");
33 BIO_push(b64,bmem); 33 BIO_push(b64,bmem);
34 if(((size_t)BIO_write(b64,data,length))!=length) 34 if(((size_t)BIO_write(b64,data,length))!=length)
35 throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); 35 throw exception_openssl(OPKELE_CP_ "failed to BIO_write()");
36 if(BIO_flush(b64)!=1) 36 if(BIO_flush(b64)!=1)
37 throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); 37 throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()");
38 char *rvd; 38 char *rvd;
39 long rvl = BIO_get_mem_data(bmem,&rvd); 39 long rvl = BIO_get_mem_data(bmem,&rvd);
40 string rv(rvd,rvl); 40 string rv(rvd,rvl);
41 BIO_free_all(b64); 41 BIO_free_all(b64);
42 return rv; 42 return rv;
43 }catch(...) { 43 }catch(...) {
44 if(b64) BIO_free_all(b64); 44 if(b64) BIO_free_all(b64);
45 throw; 45 throw;
46 } 46 }
47 } 47 }
48 48
49 void decode_base64(const string& data,vector<unsigned char>& rv) { 49 void decode_base64(const string& data,vector<unsigned char>& rv) {
50 BIO *b64 = 0, *bmem = 0; 50 BIO *b64 = 0, *bmem = 0;
51 rv.clear(); 51 rv.clear();
52 try { 52 try {
53 bmem = BIO_new_mem_buf((void*)data.data(),data.size()); 53 bmem = BIO_new_mem_buf((void*)data.data(),data.size());
54 if(!bmem) 54 if(!bmem)
55 throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()"); 55 throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()");
56 b64 = BIO_new(BIO_f_base64()); 56 b64 = BIO_new(BIO_f_base64());
57 if(!b64) 57 if(!b64)
58 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder"); 58 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder");
59 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); 59 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL);
60 BIO_push(b64,bmem); 60 BIO_push(b64,bmem);
61 unsigned char tmp[512]; 61 unsigned char tmp[512];
62 size_t rb = 0; 62 size_t rb = 0;
63 while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0) 63 while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0)
64 rv.insert(rv.end(),tmp,&tmp[rb]); 64 rv.insert(rv.end(),tmp,&tmp[rb]);
65 BIO_free_all(b64); 65 BIO_free_all(b64);
66 }catch(...) { 66 }catch(...) {
67 if(b64) BIO_free_all(b64); 67 if(b64) BIO_free_all(b64);
68 throw; 68 throw;
69 } 69 }
70 } 70 }
71 71
72 /* 72 /*
73 * big numerics 73 * big numerics
74 */ 74 */
75 75
76 BIGNUM *base64_to_bignum(const string& b64) { 76 BIGNUM *base64_to_bignum(const string& b64) {
77 vector<unsigned char> bin; 77 vector<unsigned char> bin;
78 decode_base64(b64,bin); 78 decode_base64(b64,bin);
79 BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0); 79 BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0);
80 if(!rv) 80 if(!rv)
81 throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()"); 81 throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()");
82 return rv; 82 return rv;
83 } 83 }
84 84
85 BIGNUM *dec_to_bignum(const string& dec) { 85 BIGNUM *dec_to_bignum(const string& dec) {
86 BIGNUM *rv = 0; 86 BIGNUM *rv = 0;
87 if(!BN_dec2bn(&rv,dec.c_str())) 87 if(!BN_dec2bn(&rv,dec.c_str()))
88 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); 88 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()");
89 return rv; 89 return rv;
90 } 90 }
91 91
92 string bignum_to_base64(const BIGNUM *bn) { 92 string bignum_to_base64(const BIGNUM *bn) {
93 vector<unsigned char> bin(BN_num_bytes(bn)+1); 93 vector<unsigned char> bin(BN_num_bytes(bn)+1);
94 unsigned char *binptr = &(bin.front())+1; 94 unsigned char *binptr = &(bin.front())+1;
95 int l = BN_bn2bin(bn,binptr); 95 int l = BN_bn2bin(bn,binptr);
96 if(l && (*binptr)&0x80){ 96 if(l && (*binptr)&0x80){
97 (*(--binptr)) = 0; ++l; 97 (*(--binptr)) = 0; ++l;
98 } 98 }
99 return encode_base64(binptr,l); 99 return encode_base64(binptr,l);
100 } 100 }
101 101
102 /* 102 /*
103 * w3c times 103 * w3c times
104 */ 104 */
105 105
106 string time_to_w3c(time_t t) { 106 string time_to_w3c(time_t t) {
107 struct tm tm_t; 107 struct tm tm_t;
108 if(!gmtime_r(&t,&tm_t)) 108 if(!gmtime_r(&t,&tm_t))
109 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); 109 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()");
110 char rv[25]; 110 char rv[25];
111 if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t)) 111 if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t))
112 throw failed_conversion(OPKELE_CP_ "failed to strftime()"); 112 throw failed_conversion(OPKELE_CP_ "failed to strftime()");
113 return rv; 113 return rv;
114 } 114 }
115 115
116 time_t w3c_to_time(const string& w) { 116 time_t w3c_to_time(const string& w) {
117 struct tm tm_t; 117 struct tm tm_t;
118 memset(&tm_t,0,sizeof(tm_t)); 118 memset(&tm_t,0,sizeof(tm_t));
119 if( 119 if(
120 sscanf( 120 sscanf(
121 w.c_str(), 121 w.c_str(),
122 "%04d-%02d-%02dT%02d:%02d:%02dZ", 122 "%04d-%02d-%02dT%02d:%02d:%02dZ",
123 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, 123 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
124 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec 124 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec
125 ) != 6 ) 125 ) != 6 )
126 throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); 126 throw failed_conversion(OPKELE_CP_ "failed to sscanf()");
127 tm_t.tm_mon--; 127 tm_t.tm_mon--;
128 tm_t.tm_year-=1900; 128 tm_t.tm_year-=1900;
129 time_t rv = mktime(&tm_t); 129 time_t rv = mktime(&tm_t);
130 if(rv==(time_t)-1) 130 if(rv==(time_t)-1)
131 throw failed_conversion(OPKELE_CP_ "failed to mktime()"); 131 throw failed_conversion(OPKELE_CP_ "failed to mktime()");
132 return rv; 132 return rv;
133 } 133 }
134 134
135 /* 135 /*
136 * 136 *
137 */ 137 */
138 138
139 string url_encode(const string& str) { 139 string url_encode(const string& str) {
140 char * t = curl_escape(str.c_str(),str.length()); 140 char * t = curl_escape(str.c_str(),str.length());
141 if(!t) 141 if(!t)
142 throw failed_conversion(OPKELE_CP_ "failed to curl_escape()"); 142 throw failed_conversion(OPKELE_CP_ "failed to curl_escape()");
143 string rv(t); 143 string rv(t);
144 curl_free(t); 144 curl_free(t);
145 return rv; 145 return rv;
146 } 146 }
147 147
148 string long_to_string(long l) { 148 string long_to_string(long l) {
149 char rv[32]; 149 char rv[32];
150 int r=snprintf(rv,sizeof(rv),"%ld",l); 150 int r=snprintf(rv,sizeof(rv),"%ld",l);
151 if(r<0 || r>=(int)sizeof(rv)) 151 if(r<0 || r>=(int)sizeof(rv))
152 throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); 152 throw failed_conversion(OPKELE_CP_ "failed to snprintf()");
153 return rv; 153 return rv;
154 } 154 }
155 155
156 long string_to_long(const string& s) { 156 long string_to_long(const string& s) {
157 char *endptr = 0; 157 char *endptr = 0;
158 long rv = strtol(s.c_str(),&endptr,10); 158 long rv = strtol(s.c_str(),&endptr,10);
159 if((!endptr) || endptr==s.c_str()) 159 if((!endptr) || endptr==s.c_str())
160 throw failed_conversion(OPKELE_CP_ "failed to strtol()"); 160 throw failed_conversion(OPKELE_CP_ "failed to strtol()");
161 return rv; 161 return rv;
162 } 162 }
163 163
164 /* 164 /*
165 * Normalize URL according to the rules, described in rfc 3986, section 6 165 * Normalize URL according to the rules, described in rfc 3986, section 6
166 * 166 *
167 * - uppercase hext triplets (e.g. %ab -> %AB) 167 * - uppercase hext triplets (e.g. %ab -> %AB)
168 * - lowercase scheme and host 168 * - lowercase scheme and host
169 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, 169 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3,
170 * that is - [:alpha:][:digit:]._~- 170 * that is - [:alpha:][:digit:]._~-
171 * - remove dot segments 171 * - remove dot segments
172 * - remove empty and default ports 172 * - remove empty and default ports
173 * - if there's no path component, add '/' 173 * - if there's no path component, add '/'
174 */ 174 */
175 string rfc_3986_normalize_uri(const string& uri) { 175 string rfc_3986_normalize_uri(const string& uri) {
176 static const char *whitespace = " \t\r\n";
176 string rv; 177 string rv;
177 string::size_type colon = uri.find(':'); 178 string::size_type ns = uri.find_first_not_of(whitespace);
179 if(ns==string::npos)
180 throw bad_input(OPKELE_CP_ "Can't normalize empty URI");
181 string::size_type colon = uri.find(':',ns);
178 if(colon==string::npos) 182 if(colon==string::npos)
179 throw bad_input(OPKELE_CP_ "No scheme specified in URI"); 183 throw bad_input(OPKELE_CP_ "No scheme specified in URI");
180 transform( 184 transform(
181 uri.begin(), uri.begin()+colon+1, 185 uri.begin()+ns, uri.begin()+colon+1,
182 back_inserter(rv), ::tolower ); 186 back_inserter(rv), ::tolower );
183 bool s; 187 bool s;
184 if(rv=="http:") 188 if(rv=="http:")
185 s = false; 189 s = false;
186 else if(rv=="https:") 190 else if(rv=="https:")
187 s = true; 191 s = true;
192#ifndef NDEBUG
193 else if(rv=="file:")
194 s = false;
195#endif /* XXX: or try to make tests work some other way */
188 else 196 else
189 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); 197 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here");
190 string::size_type ul = uri.length(); 198 string::size_type ul = uri.find_last_not_of(whitespace)+1;
191 if(ul <= (colon+3)) 199 if(ul <= (colon+3))
192 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); 200 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered");
193 if(uri[colon+1]!='/' || uri[colon+2]!='/') 201 if(uri[colon+1]!='/' || uri[colon+2]!='/')
194 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); 202 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component");
195 rv += "//"; 203 rv += "//";
196 string::size_type interesting = uri.find_first_of(":/#?",colon+3); 204 string::size_type interesting = uri.find_first_of(":/#?",colon+3);
197 if(interesting==string::npos) { 205 if(interesting==string::npos) {
198 transform( 206 transform(
199 uri.begin()+colon+3,uri.end(), 207 uri.begin()+colon+3,uri.begin()+ul,
200 back_inserter(rv), ::tolower ); 208 back_inserter(rv), ::tolower );
201 rv += '/'; return rv; 209 rv += '/'; return rv;
202 } 210 }
203 transform( 211 transform(
204 uri.begin()+colon+3,uri.begin()+interesting, 212 uri.begin()+colon+3,uri.begin()+interesting,
205 back_inserter(rv), ::tolower ); 213 back_inserter(rv), ::tolower );
206 bool qf = false; 214 bool qf = false;
207 char ic = uri[interesting]; 215 char ic = uri[interesting];
208 if(ic==':') { 216 if(ic==':') {
209 string::size_type ni = uri.find_first_of("/#?%",interesting+1); 217 string::size_type ni = uri.find_first_of("/#?%",interesting+1);
210 const char *nptr = uri.data()+interesting+1; 218 const char *nptr = uri.data()+interesting+1;
211 char *eptr = 0; 219 char *eptr = 0;
212 long port = strtol(nptr,&eptr,10); 220 long port = strtol(nptr,&eptr,10);
213 if( (port>0) && (port<65535) && port!=(s?443:80) ) { 221 if( (port>0) && (port<65535) && port!=(s?443:80) ) {
214 char tmp[6]; 222 char tmp[6];
215 snprintf(tmp,sizeof(tmp),"%ld",port); 223 snprintf(tmp,sizeof(tmp),"%ld",port);
216 rv += ':'; rv += tmp; 224 rv += ':'; rv += tmp;
217 } 225 }
218 if(ni==string::npos) { 226 if(ni==string::npos) {
219 rv += '/'; return rv; 227 rv += '/'; return rv;
220 } 228 }
221 interesting = ni; 229 interesting = ni;
222 }else if(ic!='/') { 230 }else if(ic!='/') {
223 rv += '/'; rv += ic; 231 rv += '/'; rv += ic;
224 qf = true; 232 qf = true;
225 ++interesting; 233 ++interesting;
226 } 234 }
227 string::size_type n = interesting; 235 string::size_type n = interesting;
228 char tmp[3] = { 0,0,0 }; 236 char tmp[3] = { 0,0,0 };
229 stack<string::size_type> psegs; psegs.push(rv.length()); 237 stack<string::size_type> psegs; psegs.push(rv.length());
230 string pseg; 238 string pseg;
231 for(;n<ul;) { 239 for(;n<ul;) {
232 string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n); 240 string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n);
233 if(unsafe==string::npos) { 241 if(unsafe==string::npos) {
234 pseg.append(uri,n,ul-n-1); n = ul-1; 242 pseg.append(uri,n,ul-n-1); n = ul-1;
235 }else{ 243 }else{
236 pseg.append(uri,n,unsafe-n); 244 pseg.append(uri,n,unsafe-n);
237 n = unsafe; 245 n = unsafe;
238 } 246 }
239 char c = uri[n++]; 247 char c = uri[n++];
240 if(c=='%') { 248 if(c=='%') {
241 if((n+1)>=ul) 249 if((n+1)>=ul)
242 throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character"); 250 throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character");
243 tmp[0] = uri[n++]; 251 tmp[0] = uri[n++];
244 tmp[1] = uri[n++]; 252 tmp[1] = uri[n++];
245 if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) )) 253 if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) ))
246 throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized"); 254 throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized");
247 int cc = strtol(tmp,0,16); 255 int cc = strtol(tmp,0,16);
248 if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) ) 256 if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) )
249 pseg += cc; 257 pseg += cc;
250 else{ 258 else{
251 pseg += '%'; 259 pseg += '%';
252 pseg += toupper(tmp[0]); pseg += toupper(tmp[1]); 260 pseg += toupper(tmp[0]); pseg += toupper(tmp[1]);
253 } 261 }
254 }else if(qf) { 262 }else if(qf) {
255 rv += pseg; rv += c; 263 rv += pseg; rv += c;
256 pseg.clear(); 264 pseg.clear();
257 }else if(n>=ul || strchr("?/#",c)) { 265 }else if(n>=ul || strchr("?/#",c)) {
258 if(pseg.empty() || pseg==".") { 266 if(pseg.empty() || pseg==".") {
259 }else if(pseg=="..") { 267 }else if(pseg=="..") {
260 if(psegs.size()>1) { 268 if(psegs.size()>1) {
261 rv.resize(psegs.top()); psegs.pop(); 269 rv.resize(psegs.top()); psegs.pop();
262 } 270 }
263 }else{ 271 }else{
264 psegs.push(rv.length()); 272 psegs.push(rv.length());
265 if(c!='/') { 273 if(c!='/') {
266 pseg += c; 274 pseg += c;
267 qf = true; 275 qf = true;
268 } 276 }
269 rv += '/'; rv += pseg; 277 rv += '/'; rv += pseg;
270 } 278 }
271 if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) { 279 if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) {
272 rv += '/'; 280 rv += '/';
273 if(n<ul) 281 if(n<ul)
274 qf = true; 282 qf = true;
275 }else if(strchr("?#",c)) { 283 }else if(strchr("?#",c)) {
276 if(psegs.size()==1 && psegs.top()==rv.length()) 284 if(psegs.size()==1 && psegs.top()==rv.length())
277 rv += '/'; 285 rv += '/';
278 if(pseg.empty()) 286 if(pseg.empty())
279 rv += c; 287 rv += c;
280 qf = true; 288 qf = true;
281 } 289 }
282 pseg.clear(); 290 pseg.clear();
283 }else{ 291 }else{
284 pseg += c; 292 pseg += c;
285 } 293 }
286 } 294 }
287 if(!pseg.empty()) { 295 if(!pseg.empty()) {
288 rv += '/'; rv += pseg; 296 rv += '/'; rv += pseg;
289 } 297 }
290 return rv; 298 return rv;
291 } 299 }
292 300
293 } 301 }
294 302
295} 303}