-rw-r--r-- | lib/Makefile.am | 2 | ||||
-rw-r--r-- | lib/discovery.cc | 375 | ||||
-rw-r--r-- | lib/openid_service_resolver.cc | 298 | ||||
-rw-r--r-- | lib/util.cc | 22 |
4 files changed, 388 insertions, 309 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am index b278faf..185411f 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am | |||
@@ -1,30 +1,30 @@ | |||
1 | lib_LTLIBRARIES = libopkele.la | 1 | lib_LTLIBRARIES = libopkele.la |
2 | 2 | ||
3 | DEFAULT_INCLUDES = -I${top_builddir} | 3 | DEFAULT_INCLUDES = -I${top_builddir} |
4 | INCLUDES = \ | 4 | INCLUDES = \ |
5 | -I${top_srcdir}/include/ \ | 5 | -I${top_srcdir}/include/ \ |
6 | ${KONFORKA_CFLAGS} \ | 6 | ${KONFORKA_CFLAGS} \ |
7 | ${OPENSSL_CFLAGS} \ | 7 | ${OPENSSL_CFLAGS} \ |
8 | ${LIBCURL_CPPFLAGS} \ | 8 | ${LIBCURL_CPPFLAGS} \ |
9 | ${PCRE_CFLAGS} ${EXPAT_CFLAGS} | 9 | ${PCRE_CFLAGS} ${EXPAT_CFLAGS} |
10 | libopkele_la_LIBADD = \ | 10 | libopkele_la_LIBADD = \ |
11 | ${LIBCURL} \ | 11 | ${LIBCURL} \ |
12 | ${PCRE_LIBS} ${EXPAT_LIBS} \ | 12 | ${PCRE_LIBS} ${EXPAT_LIBS} \ |
13 | ${OPENSSL_LIBS} \ | 13 | ${OPENSSL_LIBS} \ |
14 | ${KONFORKA_LIBS} | 14 | ${KONFORKA_LIBS} |
15 | 15 | ||
16 | libopkele_la_SOURCES = \ | 16 | libopkele_la_SOURCES = \ |
17 | params.cc \ | 17 | params.cc \ |
18 | util.cc \ | 18 | util.cc \ |
19 | server.cc \ | 19 | server.cc \ |
20 | secret.cc \ | 20 | secret.cc \ |
21 | data.cc \ | 21 | data.cc \ |
22 | consumer.cc \ | 22 | consumer.cc \ |
23 | exception.cc \ | 23 | exception.cc \ |
24 | extension.cc \ | 24 | extension.cc \ |
25 | sreg.cc \ | 25 | sreg.cc \ |
26 | extension_chain.cc \ | 26 | extension_chain.cc \ |
27 | curl.cc expat.cc \ | 27 | curl.cc expat.cc \ |
28 | openid_service_resolver.cc | 28 | discovery.cc |
29 | libopkele_la_LDFLAGS = \ | 29 | libopkele_la_LDFLAGS = \ |
30 | -version-info 2:0:0 | 30 | -version-info 2:0:0 |
diff --git a/lib/discovery.cc b/lib/discovery.cc new file mode 100644 index 0000000..a35ce32 --- a/dev/null +++ b/lib/discovery.cc | |||
@@ -0,0 +1,375 @@ | |||
1 | #include <iostream> | ||
2 | using namespace std; | ||
3 | #include <list> | ||
4 | #include <opkele/curl.h> | ||
5 | #include <opkele/expat.h> | ||
6 | #include <opkele/uris.h> | ||
7 | #include <opkele/discovery.h> | ||
8 | #include <opkele/exception.h> | ||
9 | #include <opkele/util.h> | ||
10 | |||
11 | #include "config.h" | ||
12 | |||
13 | #define XRDS_HEADER "X-XRDS-Location" | ||
14 | #define CT_HEADER "Content-Type" | ||
15 | |||
16 | namespace opkele { | ||
17 | using std::list; | ||
18 | using xrd::XRD_t; | ||
19 | using xrd::service_t; | ||
20 | |||
21 | static const char *whitespace = " \t\r\n"; | ||
22 | static const char *i_leaders = "=@+$!("; | ||
23 | |||
24 | static inline bool is_qelement(const XML_Char *n,const char *qen) { | ||
25 | return !strcasecmp(n,qen); | ||
26 | } | ||
27 | static inline bool is_element(const XML_Char *n,const char *en) { | ||
28 | if(!strcasecmp(n,en)) return true; | ||
29 | int nl = strlen(n), enl = strlen(en); | ||
30 | if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' | ||
31 | && !strcasecmp(&n[nl-enl],en) ) | ||
32 | return true; | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | static long element_priority(const XML_Char **a) { | ||
37 | for(;*a;++a) | ||
38 | if(!strcasecmp(*(a++),"priority")) { | ||
39 | long rv; | ||
40 | return (sscanf(*a,"%ld",&rv)==1)?rv:-1; | ||
41 | } | ||
42 | return -1; | ||
43 | } | ||
44 | |||
45 | class idigger_t : public util::curl_t, public util::expat_t { | ||
46 | public: | ||
47 | string xri_proxy; | ||
48 | |||
49 | enum { | ||
50 | xmode_html = 1, xmode_xrd = 2 | ||
51 | }; | ||
52 | int xmode; | ||
53 | |||
54 | string xrds_location; | ||
55 | string http_content_type; | ||
56 | service_t html_openid1; | ||
57 | service_t html_openid2; | ||
58 | string cdata_buf; | ||
59 | long status_code; | ||
60 | string status_string; | ||
61 | |||
62 | typedef list<string> pt_stack_t; | ||
63 | pt_stack_t pt_stack; | ||
64 | int skipping; | ||
65 | |||
66 | XRD_t *xrd; | ||
67 | service_t *xrd_service; | ||
68 | string* cdata; | ||
69 | |||
70 | idigger_t() | ||
71 | : util::curl_t(easy_init()), | ||
72 | util::expat_t(0), | ||
73 | xri_proxy(XRI_PROXY_URL) { | ||
74 | CURLcode r; | ||
75 | (r=misc_sets()) | ||
76 | || (r=set_write()) | ||
77 | || (r=set_header()) | ||
78 | ; | ||
79 | if(r) | ||
80 | throw exception_curl(OPKELE_CP_ "failed to set curly options",r); | ||
81 | } | ||
82 | ~idigger_t() throw() { } | ||
83 | |||
84 | void discover(idiscovery_t& result,const string& identity) { | ||
85 | result.clear(); | ||
86 | string::size_type fsc = identity.find_first_not_of(whitespace); | ||
87 | if(fsc==string::npos) | ||
88 | throw bad_input(OPKELE_CP_ "whtiespace-only identity"); | ||
89 | string::size_type lsc = identity.find_last_not_of(whitespace); | ||
90 | assert(lsc!=string::npos); | ||
91 | if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) | ||
92 | fsc += sizeof("xri://")-1; | ||
93 | if((fsc+1)>=lsc) | ||
94 | throw bad_input(OPKELE_CP_ "not a character of importance in identity"); | ||
95 | string id(identity,fsc,lsc-fsc+1); | ||
96 | if(strchr(i_leaders,id[0])) { | ||
97 | result.normalized_id = id; | ||
98 | /* TODO: further canonicalize xri identity? Like folding case or whatever... */ | ||
99 | discover_at( | ||
100 | result, | ||
101 | xri_proxy + util::url_encode(id)+ | ||
102 | "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd); | ||
103 | if(status_code!=100) | ||
104 | throw failed_xri_resolution(OPKELE_CP_ | ||
105 | "XRI resolution failed with '"+status_string+"' message",status_code); | ||
106 | if(result.xrd.canonical_ids.empty()) | ||
107 | throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); | ||
108 | }else{ | ||
109 | if(id.find("://")==string::npos) | ||
110 | id.insert(0,"http://"); | ||
111 | string::size_type fp = id.find('#'); | ||
112 | if(fp!=string::npos) { | ||
113 | string::size_type qp = id.find('?'); | ||
114 | if(qp==string::npos || qp<fp) | ||
115 | id.erase(fp); | ||
116 | else if(qp>fp) | ||
117 | id.erase(fp,qp-fp); | ||
118 | } | ||
119 | result.normalized_id = util::rfc_3986_normalize_uri(id); | ||
120 | discover_at(result,id,xmode_html|xmode_xrd); | ||
121 | const char * eu = 0; | ||
122 | CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); | ||
123 | if(r) | ||
124 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); | ||
125 | result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */ | ||
126 | if(xrds_location.empty()) { | ||
127 | html2xrd(result.xrd); | ||
128 | }else{ | ||
129 | discover_at(result,xrds_location,xmode_xrd); | ||
130 | if(result.xrd.empty()) | ||
131 | html2xrd(result.xrd); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | |||
136 | void discover_at(idiscovery_t& result,const string& url,int xm) { | ||
137 | CURLcode r = easy_setopt(CURLOPT_URL,url.c_str()); | ||
138 | if(r) | ||
139 | throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r); | ||
140 | |||
141 | (*(expat_t*)this) = parser_create_ns(); | ||
142 | set_user_data(); set_element_handler(); | ||
143 | set_character_data_handler(); | ||
144 | |||
145 | xrds_location.clear(); http_content_type.clear(); | ||
146 | xmode = xm; | ||
147 | if(xmode&xmode_html) { | ||
148 | xrds_location.clear(); | ||
149 | html_openid1.clear(); html_openid2.clear(); | ||
150 | } | ||
151 | xrd = &result.xrd; | ||
152 | cdata = 0; xrd_service = 0; skipping = 0; | ||
153 | status_code = 100; status_string.clear(); | ||
154 | |||
155 | r = easy_perform(); | ||
156 | if(r && r!=CURLE_WRITE_ERROR) | ||
157 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); | ||
158 | |||
159 | parse(0,0,true); | ||
160 | } | ||
161 | |||
162 | void html2xrd(XRD_t& x) { | ||
163 | if(!html_openid1.uris.empty()) { | ||
164 | html_openid1.types.insert(STURI_OPENID11); | ||
165 | x.services.add(-1,html_openid1); | ||
166 | } | ||
167 | if(!html_openid2.uris.empty()) { | ||
168 | html_openid2.types.insert(STURI_OPENID20); | ||
169 | x.services.add(-1,html_openid2); | ||
170 | } | ||
171 | } | ||
172 | |||
173 | size_t write(void *p,size_t s,size_t nm) { | ||
174 | if(skipping<0) return 0; | ||
175 | /* TODO: limit total size */ | ||
176 | size_t bytes = s*nm; | ||
177 | parse((const char *)p,bytes,false); | ||
178 | return bytes; | ||
179 | } | ||
180 | size_t header(void *p,size_t s,size_t nm) { | ||
181 | size_t bytes = s*nm; | ||
182 | const char *h = (const char*)p; | ||
183 | const char *colon = (const char*)memchr(p,':',bytes); | ||
184 | const char *space = (const char*)memchr(p,' ',bytes); | ||
185 | if(space && ( (!colon) || space<colon ) ) { | ||
186 | xrds_location.clear(); http_content_type.clear(); | ||
187 | }else if(colon) { | ||
188 | const char *hv = ++colon; | ||
189 | int hnl = colon-h; | ||
190 | int rb; | ||
191 | for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); | ||
192 | while(rb>0 && isspace(hv[rb-1])) --rb; | ||
193 | if(rb) { | ||
194 | if( (hnl>=sizeof(XRDS_HEADER)) | ||
195 | && !strncasecmp(h,XRDS_HEADER":", | ||
196 | sizeof(XRDS_HEADER)) ) { | ||
197 | xrds_location.assign(hv,rb); | ||
198 | }else if( (hnl>=sizeof(CT_HEADER)) | ||
199 | && !strncasecmp(h,CT_HEADER":", | ||
200 | sizeof(CT_HEADER)) ) { | ||
201 | const char *sc = (const char*)memchr( | ||
202 | hv,';',rb); | ||
203 | http_content_type.assign(hv,sc?(sc-hv):rb); | ||
204 | } | ||
205 | } | ||
206 | } | ||
207 | return curl_t::header(p,s,nm); | ||
208 | } | ||
209 | |||
210 | void start_element(const XML_Char *n,const XML_Char **a) { | ||
211 | if(skipping<0) return; | ||
212 | if(skipping) { | ||
213 | if(xmode&xmode_html) | ||
214 | html_start_element(n,a); | ||
215 | ++skipping; return; | ||
216 | } | ||
217 | if(pt_stack.empty()) { | ||
218 | if(is_qelement(n,NSURI_XRDS "\tXRDS")) | ||
219 | return; | ||
220 | if(is_qelement(n,NSURI_XRD "\tXRD")) { | ||
221 | assert(xrd); | ||
222 | xrd->clear(); | ||
223 | pt_stack.push_back(n); | ||
224 | }else if(xmode&xmode_html) { | ||
225 | html_start_element(n,a); | ||
226 | }else{ | ||
227 | skipping = -1; | ||
228 | } | ||
229 | }else{ | ||
230 | int pt_s = pt_stack.size(); | ||
231 | if(pt_s==1) { | ||
232 | /* TODO: xrd:XRD/xrd:Expires */ | ||
233 | if(is_qelement(n,NSURI_XRD "\tCanonicalID")) { | ||
234 | assert(xrd); | ||
235 | cdata = &(xrd->canonical_ids.add(element_priority(a),string())); | ||
236 | }else if(is_qelement(n,NSURI_XRD "\tLocalID")) { | ||
237 | assert(xrd); | ||
238 | cdata = &(xrd->local_ids.add(element_priority(a),string())); | ||
239 | }else if(is_qelement(n,NSURI_XRD "\tService")) { | ||
240 | assert(xrd); | ||
241 | xrd_service = &(xrd->services.add(element_priority(a), | ||
242 | service_t())); | ||
243 | pt_stack.push_back(n); | ||
244 | }else if(is_qelement(n,NSURI_XRD "\tStatus")) { | ||
245 | for(;*a;) { | ||
246 | if(!strcasecmp(*(a++),"code")) { | ||
247 | if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) { | ||
248 | cdata = &status_string; | ||
249 | pt_stack.push_back(n); | ||
250 | break; | ||
251 | } | ||
252 | } | ||
253 | } | ||
254 | }else if(xmode&xmode_html) { | ||
255 | html_start_element(n,a); | ||
256 | }else{ | ||
257 | skipping = 1; | ||
258 | } | ||
259 | }else if(pt_s==2) { | ||
260 | if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) { | ||
261 | if(is_qelement(n,NSURI_XRD "\tType")) { | ||
262 | assert(xrd); assert(xrd_service); | ||
263 | cdata_buf.clear(); | ||
264 | cdata = &cdata_buf; | ||
265 | }else if(is_qelement(n,NSURI_XRD "\tURI")) { | ||
266 | assert(xrd); assert(xrd_service); | ||
267 | cdata = &(xrd_service->uris.add(element_priority(a),string())); | ||
268 | }else if(is_qelement(n,NSURI_XRD "\tLocalID") | ||
269 | || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) { | ||
270 | assert(xrd); assert(xrd_service); | ||
271 | cdata = &(xrd_service->uris.add(element_priority(a),string())); | ||
272 | }else{ | ||
273 | skipping = 1; | ||
274 | } | ||
275 | }else | ||
276 | skipping = 1; | ||
277 | }else if(xmode&xmode_html) { | ||
278 | html_start_element(n,a); | ||
279 | }else{ | ||
280 | skipping = 1; | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | void end_element(const XML_Char *n) { | ||
285 | if(skipping<0) return; | ||
286 | if(skipping) { | ||
287 | --skipping; return; | ||
288 | } | ||
289 | if(is_qelement(n,NSURI_XRD "\tType")) { | ||
290 | assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf); | ||
291 | xrd_service->types.insert(cdata_buf); | ||
292 | }else if(is_qelement(n,NSURI_XRD "\tService")) { | ||
293 | assert(xrd); assert(xrd_service); | ||
294 | assert(!pt_stack.empty()); | ||
295 | assert(pt_stack.back()==(NSURI_XRD "\tService")); | ||
296 | pt_stack.pop_back(); | ||
297 | xrd_service = 0; | ||
298 | }else if(is_qelement(n,NSURI_XRD "\tStatus")) { | ||
299 | assert(xrd); | ||
300 | if(is_qelement(pt_stack.back().c_str(),n)) { | ||
301 | assert(cdata==&status_string); | ||
302 | pt_stack.pop_back(); | ||
303 | if(status_code!=100) | ||
304 | skipping = -1; | ||
305 | } | ||
306 | }else if((xmode&xmode_html) && is_element(n,"head")) { | ||
307 | skipping = -1; | ||
308 | } | ||
309 | cdata = 0; | ||
310 | } | ||
311 | void character_data(const XML_Char *s,int l) { | ||
312 | if(skipping) return; | ||
313 | if(cdata) cdata->append(s,l); | ||
314 | } | ||
315 | |||
316 | void html_start_element(const XML_Char *n,const XML_Char **a) { | ||
317 | if(is_element(n,"meta")) { | ||
318 | bool heq = false; | ||
319 | string l; | ||
320 | for(;*a;a+=2) { | ||
321 | if(!( strcasecmp(a[0],"http-equiv") | ||
322 | || strcasecmp(a[1],XRDS_HEADER) )) | ||
323 | heq = true; | ||
324 | else if(!strcasecmp(a[0],"content")) | ||
325 | l.assign(a[1]); | ||
326 | } | ||
327 | if(heq) | ||
328 | xrds_location = l; | ||
329 | }else if(is_element(n,"link")) { | ||
330 | string rels; | ||
331 | string href; | ||
332 | for(;*a;a+=2) { | ||
333 | if( !strcasecmp(a[0],"rel") ) { | ||
334 | rels.assign(a[1]); | ||
335 | }else if( !strcasecmp(a[0],"href") ) { | ||
336 | const char *ns = a[1]; | ||
337 | for(;*ns && isspace(*ns);++ns); | ||
338 | href.assign(ns); | ||
339 | string::size_type lns=href.find_last_not_of(whitespace); | ||
340 | href.erase(lns+1); | ||
341 | } | ||
342 | } | ||
343 | for(string::size_type ns=rels.find_first_not_of(whitespace); | ||
344 | ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) { | ||
345 | string::size_type s = rels.find_first_of(whitespace,ns); | ||
346 | string rel; | ||
347 | if(s==string::npos) { | ||
348 | rel.assign(rels,ns,string::npos); | ||
349 | ns = string::npos; | ||
350 | }else{ | ||
351 | rel.assign(rels,ns,s-ns); | ||
352 | ns = s; | ||
353 | } | ||
354 | if(rel=="openid.server") | ||
355 | html_openid1.uris.add(-1,href); | ||
356 | else if(rel=="openid.delegate") | ||
357 | html_openid1.local_ids.add(-1,href); | ||
358 | else if(rel=="openid2.provider") | ||
359 | html_openid2.uris.add(-1,href); | ||
360 | else if(rel=="openid2.local_id") | ||
361 | html_openid2.local_ids.add(-1,href); | ||
362 | } | ||
363 | }else if(is_element(n,"body")) { | ||
364 | skipping = -1; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | }; | ||
369 | |||
370 | void idiscover(idiscovery_t& result,const string& identity) { | ||
371 | idigger_t idigger; | ||
372 | idigger.discover(result,identity); | ||
373 | } | ||
374 | |||
375 | } | ||
diff --git a/lib/openid_service_resolver.cc b/lib/openid_service_resolver.cc deleted file mode 100644 index 505e5b2..0000000 --- a/lib/openid_service_resolver.cc +++ b/dev/null | |||
@@ -1,298 +0,0 @@ | |||
1 | #include <cctype> | ||
2 | #include <opkele/exception.h> | ||
3 | #include <opkele/util.h> | ||
4 | #include <opkele/openid_service_resolver.h> | ||
5 | #include <opkele/uris.h> | ||
6 | |||
7 | #define LOCATION_HEADER "X-XRDS-Location" | ||
8 | |||
9 | namespace opkele { | ||
10 | static const char *whitespace = " \t\r\n"; | ||
11 | |||
12 | openid_service_resolver_t::openid_service_resolver_t(const string& xp) | ||
13 | : util::curl_t(easy_init()), | ||
14 | util::expat_t(0), | ||
15 | xri_proxy(xp.empty()?"http://beta.xri.net/":xp) | ||
16 | { | ||
17 | CURLcode r; | ||
18 | (r=misc_sets()) | ||
19 | || (r=set_write()) | ||
20 | || (r==set_header()) | ||
21 | ; | ||
22 | if(r) | ||
23 | throw opkele::exception_curl(OPKELE_CP_ "failed to set curly options",r); | ||
24 | } | ||
25 | |||
26 | static bool is_element(const XML_Char *n,const char *en) { | ||
27 | if(!strcasecmp(n,en)) return true; | ||
28 | int nl = strlen(n), enl = strlen(en); | ||
29 | if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' | ||
30 | && !strcasecmp(&n[nl-enl],en) ) | ||
31 | return true; | ||
32 | return false; | ||
33 | } | ||
34 | static inline bool is_qelement(const XML_Char *n,const char *qen) { | ||
35 | return !strcasecmp(n,qen); | ||
36 | } | ||
37 | static inline bool is_element( | ||
38 | const openid_service_resolver_t::parser_node_t& n, | ||
39 | const char *en) { | ||
40 | return is_element(n.element.c_str(),en); | ||
41 | } | ||
42 | static inline bool is_qelement( | ||
43 | const openid_service_resolver_t::parser_node_t& n, | ||
44 | const char *qen) { | ||
45 | return is_qelement(n.element.c_str(),qen); | ||
46 | } | ||
47 | |||
48 | void openid_service_resolver_t::start_element(const XML_Char *n,const XML_Char **a) { | ||
49 | if(state!=state_parse) return; | ||
50 | tree.push(n,a); | ||
51 | parser_node_t& t = tree.top(); | ||
52 | if(is_element(n,"html") || is_element(n,"head") | ||
53 | || is_qelement(n,NSURI_XRDS "\tXRDS") | ||
54 | || is_qelement(n,NSURI_XRD "\tXRD") ) | ||
55 | t.skip_tags = false; | ||
56 | else if(is_qelement(n,NSURI_XRD "\tService") | ||
57 | || is_qelement(n,NSURI_XRD "\tType") | ||
58 | || is_qelement(n,NSURI_XRD "\tURI") | ||
59 | || is_qelement(n,NSURI_OPENID10 "\tDelegate") | ||
60 | || is_qelement(n,NSURI_XRD "\tCanonicalID") ) | ||
61 | t.skip_tags = t.skip_text = false; | ||
62 | else if(is_element(n,"body")) | ||
63 | state = state_stopping_body; | ||
64 | } | ||
65 | void openid_service_resolver_t::end_element(const XML_Char *n) { | ||
66 | if(state!=state_parse) return; | ||
67 | assert(tree.top().element == n); | ||
68 | pop_tag(); | ||
69 | } | ||
70 | void openid_service_resolver_t::character_data(const XML_Char *s,int l) { | ||
71 | if(state!=state_parse) return; | ||
72 | if( !( tree.empty() || tree.top().skip_text ) ) | ||
73 | tree.top().content.append(s,l); | ||
74 | } | ||
75 | |||
76 | static void copy_trim_whitespace(string& to,const string& from) { | ||
77 | string::size_type ns0 = from.find_first_not_of(whitespace); | ||
78 | if(ns0==string::npos) { | ||
79 | to.clear(); return; | ||
80 | } | ||
81 | string::size_type ns1 = from.find_last_not_of(whitespace); | ||
82 | assert(ns1!=string::npos); | ||
83 | to.assign(from,ns0,ns1-ns0+1); | ||
84 | } | ||
85 | |||
86 | void openid_service_resolver_t::pop_tag() { | ||
87 | assert(!tree.empty()); | ||
88 | parser_node_t& t = tree.top(); | ||
89 | if( is_element(t,"meta") | ||
90 | && !strcasecmp(t.attrs["http-equiv"].c_str(),LOCATION_HEADER) ) { | ||
91 | xrds_location = t.attrs["content"]; | ||
92 | }else if( is_element(t,"link") ) { | ||
93 | parser_node_t::attrs_t::const_iterator ir = t.attrs.find("rel"); | ||
94 | if(ir!=t.attrs.end()) { | ||
95 | const string& rels = ir->second; | ||
96 | for(string::size_type ns = rels.find_first_not_of(whitespace); | ||
97 | ns!=string::npos; | ||
98 | ns=rels.find_first_not_of(whitespace,ns)) { | ||
99 | string::size_type s = rels.find_first_of(whitespace,ns); | ||
100 | string rel; | ||
101 | if(s==string::npos) { | ||
102 | rel.assign(rels,ns,string::npos); | ||
103 | ns = string::npos; | ||
104 | }else{ | ||
105 | rel.assign(rels,ns,s-ns); | ||
106 | ns = s; | ||
107 | } | ||
108 | if(rel=="openid.server") | ||
109 | copy_trim_whitespace(html_SEP.xrd_URI,t.attrs["href"]); | ||
110 | else if(rel=="openid.delegate") | ||
111 | copy_trim_whitespace(html_SEP.openid_Delegate,t.attrs["href"]); | ||
112 | } | ||
113 | } | ||
114 | }else if( is_element(t,"head") ) | ||
115 | state = state_stopping_head; | ||
116 | else if( is_qelement(t,NSURI_XRD "\tXRD")) { | ||
117 | if( !( | ||
118 | ( | ||
119 | xri_mode | ||
120 | && t.auth_info.canonical_id.empty() | ||
121 | ) || | ||
122 | t.auth_info.auth_SEP.xrd_Type.empty() | ||
123 | ) ) | ||
124 | auth_info = t.auth_info; | ||
125 | }else if( tree.size()>1 ) { | ||
126 | parser_node_t& p = tree.parent(); | ||
127 | if( is_qelement(p,NSURI_XRD "\tService") ) { | ||
128 | if( is_qelement(t,NSURI_XRD "\tType") ) { | ||
129 | if(t.content==STURI_OPENID10) { | ||
130 | string tmp; copy_trim_whitespace(tmp,t.content); | ||
131 | p.auth_info.auth_SEP.xrd_Type.insert(tmp); | ||
132 | } | ||
133 | }else if( is_qelement(t,NSURI_XRD "\tURI") ) | ||
134 | copy_trim_whitespace(p.auth_info.auth_SEP.xrd_URI,t.content); | ||
135 | else if( is_qelement(t,NSURI_OPENID10 "\tDelegate") ) | ||
136 | copy_trim_whitespace(p.auth_info.auth_SEP.openid_Delegate,t.content); | ||
137 | }else if( is_qelement(p,NSURI_XRD "\tXRD") ) { | ||
138 | if(is_qelement(t,NSURI_XRD "\tService") ) { | ||
139 | if( !t.auth_info.auth_SEP.xrd_Type.empty() ) { | ||
140 | parser_node_t::attrs_t::const_iterator ip | ||
141 | = t.attrs.find("priority"); | ||
142 | if(ip!=t.attrs.end()) { | ||
143 | const char *nptr = ip->second.c_str(); | ||
144 | char *eptr = 0; | ||
145 | t.auth_info.auth_SEP.priority = strtol(nptr,&eptr,10); | ||
146 | if(nptr==eptr) | ||
147 | t.auth_info.auth_SEP.priority = LONG_MAX; | ||
148 | } | ||
149 | if( (t.auth_info.auth_SEP.priority < p.auth_info.auth_SEP.priority) | ||
150 | || p.auth_info.auth_SEP.xrd_Type.empty() ) | ||
151 | p.auth_info.auth_SEP = t.auth_info.auth_SEP; | ||
152 | } | ||
153 | }else if( is_qelement(t,NSURI_XRD "\tCanonicalID") ) | ||
154 | copy_trim_whitespace(p.auth_info.canonical_id,t.content); | ||
155 | } | ||
156 | } | ||
157 | |||
158 | tree.pop(); | ||
159 | } | ||
160 | |||
161 | size_t openid_service_resolver_t::write(void *p,size_t s,size_t nm) { | ||
162 | if(state != state_parse) | ||
163 | return 0; | ||
164 | /* TODO: limit total size */ | ||
165 | size_t bytes = s*nm; | ||
166 | parse((const char *)p,bytes,false); | ||
167 | return bytes; | ||
168 | } | ||
169 | |||
170 | size_t openid_service_resolver_t::header(void *p,size_t s,size_t nm) { | ||
171 | size_t bytes = s*nm; | ||
172 | const char *h = (const char *)p; | ||
173 | const char *colon = (const char*)memchr(p,':',bytes); | ||
174 | const char *space = (const char*)memchr(p,' ',bytes); | ||
175 | if(space && ( (!colon) || space<colon ) ) { | ||
176 | xrds_location.clear(); http_content_type.clear(); | ||
177 | }else if(colon) { | ||
178 | const char *hv = ++colon; | ||
179 | int hnl = colon-h; | ||
180 | int rb; | ||
181 | for(rb = bytes-hnl-1; | ||
182 | rb>0 && isspace(*hv); | ||
183 | ++hv,--rb ); | ||
184 | while(rb>0 && isspace(hv[rb-1])) | ||
185 | --rb; | ||
186 | if(rb) { | ||
187 | if( (hnl >= sizeof(LOCATION_HEADER)) | ||
188 | && !strncasecmp(h,LOCATION_HEADER ":", | ||
189 | sizeof(LOCATION_HEADER)) ) { | ||
190 | xrds_location.assign(hv,rb); | ||
191 | }else if( (hnl >= sizeof("Content-Type")) | ||
192 | && !strncasecmp(h,"Content-Type:", | ||
193 | sizeof("Content-Type")) ) { | ||
194 | const char *sc = (const char*)memchr( | ||
195 | hv,';',rb); | ||
196 | http_content_type.assign( | ||
197 | hv,sc?(sc-hv):rb ); | ||
198 | } | ||
199 | } | ||
200 | } | ||
201 | return curl_t::header(p,s,nm); | ||
202 | } | ||
203 | |||
204 | void openid_service_resolver_t::discover_service(const string& url,bool xri) { | ||
205 | CURLcode r = easy_setopt(CURLOPT_URL,url.c_str()); | ||
206 | if(r) | ||
207 | throw opkele::exception_curl(OPKELE_CP_ "failed to set curly urlie",r); | ||
208 | |||
209 | (*(expat_t*)this) = parser_create_ns(); | ||
210 | set_user_data(); set_element_handler(); | ||
211 | set_character_data_handler(); | ||
212 | tree.clear(); | ||
213 | state = state_parse; | ||
214 | |||
215 | r = easy_perform(); | ||
216 | if(r && r!=CURLE_WRITE_ERROR) | ||
217 | throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); | ||
218 | |||
219 | parse(0,0,true); | ||
220 | while(!tree.empty()) pop_tag(); | ||
221 | } | ||
222 | |||
223 | const openid_auth_info_t& openid_service_resolver_t::resolve(const string& id) { | ||
224 | auth_info = openid_auth_info_t(); | ||
225 | html_SEP = openid_auth_SEP_t(); | ||
226 | |||
227 | string::size_type fns = id.find_first_not_of(whitespace); | ||
228 | if(fns==string::npos) | ||
229 | throw opkele::bad_input(OPKELE_CP_ "whitespace-only identity"); | ||
230 | string::size_type lns = id.find_last_not_of(whitespace); | ||
231 | assert(lns!=string::npos); | ||
232 | if(!strncasecmp( | ||
233 | id.c_str()+fns,"xri://", | ||
234 | sizeof("xri://")-1)) | ||
235 | fns+=sizeof("xri://")-1; | ||
236 | string nid(id,fns,lns-fns+1); | ||
237 | if(nid.empty()) | ||
238 | throw opkele::bad_input(OPKELE_CP_ "nothing significant in identity"); | ||
239 | if(strchr("=@+$!(",*nid.c_str())) { | ||
240 | discover_service( | ||
241 | xri_proxy + util::url_encode(nid) + | ||
242 | "?_xrd_t=" STURI_OPENID10 "&_xrd_r=application/xrd+xml;sep=true", | ||
243 | true ); | ||
244 | if(auth_info.canonical_id.empty() | ||
245 | || auth_info.auth_SEP.xrd_Type.empty() ) | ||
246 | throw opkele::failed_lookup(OPKELE_CP_ "no OpenID service for XRI found"); | ||
247 | }else{ | ||
248 | const char *np = nid.c_str(); | ||
249 | if( (strncasecmp(np,"http",4) || strncmp( | ||
250 | tolower(*(np+4))=='s'? np+5 : np+4, "://", 3)) | ||
251 | #ifndef NDEBUG | ||
252 | && strncasecmp(np,"file:///",sizeof("file:///")-1) | ||
253 | #endif /* XXX: or how do I let tests work? */ | ||
254 | ) | ||
255 | nid.insert(0,"http://"); | ||
256 | string::size_type fp = nid.find('#'); | ||
257 | if(fp!=string::npos) { | ||
258 | string::size_type qp = nid.find('?'); | ||
259 | if(qp==string::npos || qp<fp) { | ||
260 | nid.erase(fp); | ||
261 | }else if(qp>fp) | ||
262 | nid.erase(fp,qp-fp); | ||
263 | } | ||
264 | discover_service(nid); | ||
265 | const char *eu = 0; | ||
266 | CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); | ||
267 | if(r) | ||
268 | throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); | ||
269 | string canonicalized_id = util::rfc_3986_normalize_uri(eu); | ||
270 | if(xrds_location.empty()) { | ||
271 | if(auth_info.auth_SEP.xrd_Type.empty()) { | ||
272 | if(html_SEP.xrd_URI.empty()) | ||
273 | throw opkele::failed_lookup(OPKELE_CP_ "no OpenID 1.0 service discovered"); | ||
274 | auth_info.auth_SEP = html_SEP; | ||
275 | auth_info.auth_SEP.xrd_Type.clear(); auth_info.auth_SEP.xrd_Type.insert( STURI_OPENID10 ); | ||
276 | auth_info.canonical_id = canonicalized_id; | ||
277 | }else{ | ||
278 | if(auth_info.canonical_id.empty()) | ||
279 | auth_info.canonical_id = canonicalized_id; | ||
280 | } | ||
281 | }else{ | ||
282 | discover_service(xrds_location); | ||
283 | if(auth_info.auth_SEP.xrd_Type.empty()) { | ||
284 | if(html_SEP.xrd_URI.empty()) | ||
285 | throw opkele::failed_lookup(OPKELE_CP_ "no OpenID 1.0 service discovered"); | ||
286 | auth_info.auth_SEP = html_SEP; | ||
287 | auth_info.auth_SEP.xrd_Type.clear(); auth_info.auth_SEP.xrd_Type.insert( STURI_OPENID10 ); | ||
288 | auth_info.canonical_id = canonicalized_id; | ||
289 | }else{ | ||
290 | if(auth_info.canonical_id.empty()) | ||
291 | auth_info.canonical_id = canonicalized_id; | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | return auth_info; | ||
296 | } | ||
297 | |||
298 | } | ||
diff --git a/lib/util.cc b/lib/util.cc index 69d37b5..83f0eef 100644 --- a/lib/util.cc +++ b/lib/util.cc | |||
@@ -1,303 +1,305 @@ | |||
1 | #include <errno.h> | 1 | #include <errno.h> |
2 | #include <cassert> | 2 | #include <cassert> |
3 | #include <cctype> | 3 | #include <cctype> |
4 | #include <cstring> | 4 | #include <cstring> |
5 | #include <vector> | 5 | #include <vector> |
6 | #include <string> | 6 | #include <string> |
7 | #include <stack> | 7 | #include <stack> |
8 | #include <openssl/bio.h> | 8 | #include <openssl/bio.h> |
9 | #include <openssl/evp.h> | 9 | #include <openssl/evp.h> |
10 | #include <curl/curl.h> | 10 | #include <curl/curl.h> |
11 | #include "opkele/util.h" | 11 | #include "opkele/util.h" |
12 | #include "opkele/exception.h" | 12 | #include "opkele/exception.h" |
13 | 13 | ||
14 | namespace opkele { | 14 | namespace opkele { |
15 | using namespace std; | 15 | using namespace std; |
16 | 16 | ||
17 | namespace util { | 17 | namespace util { |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * base64 | 20 | * base64 |
21 | */ | 21 | */ |
22 | string encode_base64(const void *data,size_t length) { | 22 | string encode_base64(const void *data,size_t length) { |
23 | BIO *b64 = 0, *bmem = 0; | 23 | BIO *b64 = 0, *bmem = 0; |
24 | try { | 24 | try { |
25 | b64 = BIO_new(BIO_f_base64()); | 25 | b64 = BIO_new(BIO_f_base64()); |
26 | if(!b64) | 26 | if(!b64) |
27 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); | 27 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); |
28 | BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); | 28 | BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); |
29 | bmem = BIO_new(BIO_s_mem()); | 29 | bmem = BIO_new(BIO_s_mem()); |
30 | BIO_set_flags(b64,BIO_CLOSE); | 30 | BIO_set_flags(b64,BIO_CLOSE); |
31 | if(!bmem) | 31 | if(!bmem) |
32 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); | 32 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); |
33 | BIO_push(b64,bmem); | 33 | BIO_push(b64,bmem); |
34 | if(((size_t)BIO_write(b64,data,length))!=length) | 34 | if(((size_t)BIO_write(b64,data,length))!=length) |
35 | throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); | 35 | throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); |
36 | if(BIO_flush(b64)!=1) | 36 | if(BIO_flush(b64)!=1) |
37 | throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); | 37 | throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); |
38 | char *rvd; | 38 | char *rvd; |
39 | long rvl = BIO_get_mem_data(bmem,&rvd); | 39 | long rvl = BIO_get_mem_data(bmem,&rvd); |
40 | string rv(rvd,rvl); | 40 | string rv(rvd,rvl); |
41 | BIO_free_all(b64); | 41 | BIO_free_all(b64); |
42 | return rv; | 42 | return rv; |
43 | }catch(...) { | 43 | }catch(...) { |
44 | if(b64) BIO_free_all(b64); | 44 | if(b64) BIO_free_all(b64); |
45 | throw; | 45 | throw; |
46 | } | 46 | } |
47 | } | 47 | } |
48 | 48 | ||
49 | void decode_base64(const string& data,vector<unsigned char>& rv) { | 49 | void decode_base64(const string& data,vector<unsigned char>& rv) { |
50 | BIO *b64 = 0, *bmem = 0; | 50 | BIO *b64 = 0, *bmem = 0; |
51 | rv.clear(); | 51 | rv.clear(); |
52 | try { | 52 | try { |
53 | bmem = BIO_new_mem_buf((void*)data.data(),data.size()); | 53 | bmem = BIO_new_mem_buf((void*)data.data(),data.size()); |
54 | if(!bmem) | 54 | if(!bmem) |
55 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()"); | 55 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()"); |
56 | b64 = BIO_new(BIO_f_base64()); | 56 | b64 = BIO_new(BIO_f_base64()); |
57 | if(!b64) | 57 | if(!b64) |
58 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder"); | 58 | throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder"); |
59 | BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); | 59 | BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); |
60 | BIO_push(b64,bmem); | 60 | BIO_push(b64,bmem); |
61 | unsigned char tmp[512]; | 61 | unsigned char tmp[512]; |
62 | size_t rb = 0; | 62 | size_t rb = 0; |
63 | while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0) | 63 | while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0) |
64 | rv.insert(rv.end(),tmp,&tmp[rb]); | 64 | rv.insert(rv.end(),tmp,&tmp[rb]); |
65 | BIO_free_all(b64); | 65 | BIO_free_all(b64); |
66 | }catch(...) { | 66 | }catch(...) { |
67 | if(b64) BIO_free_all(b64); | 67 | if(b64) BIO_free_all(b64); |
68 | throw; | 68 | throw; |
69 | } | 69 | } |
70 | } | 70 | } |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * big numerics | 73 | * big numerics |
74 | */ | 74 | */ |
75 | 75 | ||
76 | BIGNUM *base64_to_bignum(const string& b64) { | 76 | BIGNUM *base64_to_bignum(const string& b64) { |
77 | vector<unsigned char> bin; | 77 | vector<unsigned char> bin; |
78 | decode_base64(b64,bin); | 78 | decode_base64(b64,bin); |
79 | BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0); | 79 | BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0); |
80 | if(!rv) | 80 | if(!rv) |
81 | throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()"); | 81 | throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()"); |
82 | return rv; | 82 | return rv; |
83 | } | 83 | } |
84 | 84 | ||
85 | BIGNUM *dec_to_bignum(const string& dec) { | 85 | BIGNUM *dec_to_bignum(const string& dec) { |
86 | BIGNUM *rv = 0; | 86 | BIGNUM *rv = 0; |
87 | if(!BN_dec2bn(&rv,dec.c_str())) | 87 | if(!BN_dec2bn(&rv,dec.c_str())) |
88 | throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); | 88 | throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); |
89 | return rv; | 89 | return rv; |
90 | } | 90 | } |
91 | 91 | ||
92 | string bignum_to_base64(const BIGNUM *bn) { | 92 | string bignum_to_base64(const BIGNUM *bn) { |
93 | vector<unsigned char> bin(BN_num_bytes(bn)+1); | 93 | vector<unsigned char> bin(BN_num_bytes(bn)+1); |
94 | unsigned char *binptr = &(bin.front())+1; | 94 | unsigned char *binptr = &(bin.front())+1; |
95 | int l = BN_bn2bin(bn,binptr); | 95 | int l = BN_bn2bin(bn,binptr); |
96 | if(l && (*binptr)&0x80){ | 96 | if(l && (*binptr)&0x80){ |
97 | (*(--binptr)) = 0; ++l; | 97 | (*(--binptr)) = 0; ++l; |
98 | } | 98 | } |
99 | return encode_base64(binptr,l); | 99 | return encode_base64(binptr,l); |
100 | } | 100 | } |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * w3c times | 103 | * w3c times |
104 | */ | 104 | */ |
105 | 105 | ||
106 | string time_to_w3c(time_t t) { | 106 | string time_to_w3c(time_t t) { |
107 | struct tm tm_t; | 107 | struct tm tm_t; |
108 | if(!gmtime_r(&t,&tm_t)) | 108 | if(!gmtime_r(&t,&tm_t)) |
109 | throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); | 109 | throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); |
110 | char rv[25]; | 110 | char rv[25]; |
111 | if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t)) | 111 | if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t)) |
112 | throw failed_conversion(OPKELE_CP_ "failed to strftime()"); | 112 | throw failed_conversion(OPKELE_CP_ "failed to strftime()"); |
113 | return rv; | 113 | return rv; |
114 | } | 114 | } |
115 | 115 | ||
116 | time_t w3c_to_time(const string& w) { | 116 | time_t w3c_to_time(const string& w) { |
117 | struct tm tm_t; | 117 | struct tm tm_t; |
118 | memset(&tm_t,0,sizeof(tm_t)); | 118 | memset(&tm_t,0,sizeof(tm_t)); |
119 | if( | 119 | if( |
120 | sscanf( | 120 | sscanf( |
121 | w.c_str(), | 121 | w.c_str(), |
122 | "%04d-%02d-%02dT%02d:%02d:%02dZ", | 122 | "%04d-%02d-%02dT%02d:%02d:%02dZ", |
123 | &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, | 123 | &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, |
124 | &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec | 124 | &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec |
125 | ) != 6 ) | 125 | ) != 6 ) |
126 | throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); | 126 | throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); |
127 | tm_t.tm_mon--; | 127 | tm_t.tm_mon--; |
128 | tm_t.tm_year-=1900; | 128 | tm_t.tm_year-=1900; |
129 | time_t rv = mktime(&tm_t); | 129 | time_t rv = mktime(&tm_t); |
130 | if(rv==(time_t)-1) | 130 | if(rv==(time_t)-1) |
131 | throw failed_conversion(OPKELE_CP_ "failed to mktime()"); | 131 | throw failed_conversion(OPKELE_CP_ "failed to mktime()"); |
132 | return rv; | 132 | return rv; |
133 | } | 133 | } |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * | 136 | * |
137 | */ | 137 | */ |
138 | 138 | ||
139 | string url_encode(const string& str) { | 139 | string url_encode(const string& str) { |
140 | char * t = curl_escape(str.c_str(),str.length()); | 140 | char * t = curl_escape(str.c_str(),str.length()); |
141 | if(!t) | 141 | if(!t) |
142 | throw failed_conversion(OPKELE_CP_ "failed to curl_escape()"); | 142 | throw failed_conversion(OPKELE_CP_ "failed to curl_escape()"); |
143 | string rv(t); | 143 | string rv(t); |
144 | curl_free(t); | 144 | curl_free(t); |
145 | return rv; | 145 | return rv; |
146 | } | 146 | } |
147 | 147 | ||
148 | string long_to_string(long l) { | 148 | string long_to_string(long l) { |
149 | char rv[32]; | 149 | char rv[32]; |
150 | int r=snprintf(rv,sizeof(rv),"%ld",l); | 150 | int r=snprintf(rv,sizeof(rv),"%ld",l); |
151 | if(r<0 || r>=(int)sizeof(rv)) | 151 | if(r<0 || r>=(int)sizeof(rv)) |
152 | throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); | 152 | throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); |
153 | return rv; | 153 | return rv; |
154 | } | 154 | } |
155 | 155 | ||
156 | long string_to_long(const string& s) { | 156 | long string_to_long(const string& s) { |
157 | char *endptr = 0; | 157 | char *endptr = 0; |
158 | long rv = strtol(s.c_str(),&endptr,10); | 158 | long rv = strtol(s.c_str(),&endptr,10); |
159 | if((!endptr) || endptr==s.c_str()) | 159 | if((!endptr) || endptr==s.c_str()) |
160 | throw failed_conversion(OPKELE_CP_ "failed to strtol()"); | 160 | throw failed_conversion(OPKELE_CP_ "failed to strtol()"); |
161 | return rv; | 161 | return rv; |
162 | } | 162 | } |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Normalize URL according to the rules, described in rfc 3986, section 6 | 165 | * Normalize URL according to the rules, described in rfc 3986, section 6 |
166 | * | 166 | * |
167 | * - uppercase hext triplets (e.g. %ab -> %AB) | 167 | * - uppercase hext triplets (e.g. %ab -> %AB) |
168 | * - lowercase scheme and host | 168 | * - lowercase scheme and host |
169 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, | 169 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, |
170 | * that is - [:alpha:][:digit:]._~- | 170 | * that is - [:alpha:][:digit:]._~- |
171 | * - remove dot segments | 171 | * - remove dot segments |
172 | * - remove empty and default ports | 172 | * - remove empty and default ports |
173 | * - if there's no path component, add '/' | 173 | * - if there's no path component, add '/' |
174 | */ | 174 | */ |
175 | string rfc_3986_normalize_uri(const string& uri) { | 175 | string rfc_3986_normalize_uri(const string& uri) { |
176 | static const char *whitespace = " \t\r\n"; | 176 | static const char *whitespace = " \t\r\n"; |
177 | string rv; | 177 | string rv; |
178 | string::size_type ns = uri.find_first_not_of(whitespace); | 178 | string::size_type ns = uri.find_first_not_of(whitespace); |
179 | if(ns==string::npos) | 179 | if(ns==string::npos) |
180 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); | 180 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); |
181 | string::size_type colon = uri.find(':',ns); | 181 | string::size_type colon = uri.find(':',ns); |
182 | if(colon==string::npos) | 182 | if(colon==string::npos) |
183 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); | 183 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); |
184 | transform( | 184 | transform( |
185 | uri.begin()+ns, uri.begin()+colon+1, | 185 | uri.begin()+ns, uri.begin()+colon+1, |
186 | back_inserter(rv), ::tolower ); | 186 | back_inserter(rv), ::tolower ); |
187 | bool s; | 187 | bool s; |
188 | if(rv=="http:") | ||
189 | s = false; | ||
190 | else if(rv=="https:") | ||
191 | s = true; | ||
192 | #ifndef NDEBUG | ||
193 | else if(rv=="file:") | ||
194 | s = false; | ||
195 | #endif /* XXX: or try to make tests work some other way */ | ||
196 | else | ||
197 | throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); | ||
198 | string::size_type ul = uri.find_last_not_of(whitespace)+1; | 188 | string::size_type ul = uri.find_last_not_of(whitespace)+1; |
199 | if(ul <= (colon+3)) | 189 | if(ul <= (colon+3)) |
200 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); | 190 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); |
201 | if(uri[colon+1]!='/' || uri[colon+2]!='/') | 191 | if(uri[colon+1]!='/' || uri[colon+2]!='/') |
202 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); | 192 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); |
193 | if(rv=="http:") | ||
194 | s = false; | ||
195 | else if(rv=="https:") | ||
196 | s = true; | ||
197 | else{ | ||
198 | /* TODO: support more schemes. | ||
199 | * e.g. xri. How do we normalize | ||
200 | * xri? | ||
201 | */ | ||
202 | rv.append(uri,colon+1,ul-colon-1); | ||
203 | return rv; | ||
204 | } | ||
203 | rv += "//"; | 205 | rv += "//"; |
204 | string::size_type interesting = uri.find_first_of(":/#?",colon+3); | 206 | string::size_type interesting = uri.find_first_of(":/#?",colon+3); |
205 | if(interesting==string::npos) { | 207 | if(interesting==string::npos) { |
206 | transform( | 208 | transform( |
207 | uri.begin()+colon+3,uri.begin()+ul, | 209 | uri.begin()+colon+3,uri.begin()+ul, |
208 | back_inserter(rv), ::tolower ); | 210 | back_inserter(rv), ::tolower ); |
209 | rv += '/'; return rv; | 211 | rv += '/'; return rv; |
210 | } | 212 | } |
211 | transform( | 213 | transform( |
212 | uri.begin()+colon+3,uri.begin()+interesting, | 214 | uri.begin()+colon+3,uri.begin()+interesting, |
213 | back_inserter(rv), ::tolower ); | 215 | back_inserter(rv), ::tolower ); |
214 | bool qf = false; | 216 | bool qf = false; |
215 | char ic = uri[interesting]; | 217 | char ic = uri[interesting]; |
216 | if(ic==':') { | 218 | if(ic==':') { |
217 | string::size_type ni = uri.find_first_of("/#?%",interesting+1); | 219 | string::size_type ni = uri.find_first_of("/#?%",interesting+1); |
218 | const char *nptr = uri.data()+interesting+1; | 220 | const char *nptr = uri.data()+interesting+1; |
219 | char *eptr = 0; | 221 | char *eptr = 0; |
220 | long port = strtol(nptr,&eptr,10); | 222 | long port = strtol(nptr,&eptr,10); |
221 | if( (port>0) && (port<65535) && port!=(s?443:80) ) { | 223 | if( (port>0) && (port<65535) && port!=(s?443:80) ) { |
222 | char tmp[6]; | 224 | char tmp[6]; |
223 | snprintf(tmp,sizeof(tmp),"%ld",port); | 225 | snprintf(tmp,sizeof(tmp),"%ld",port); |
224 | rv += ':'; rv += tmp; | 226 | rv += ':'; rv += tmp; |
225 | } | 227 | } |
226 | if(ni==string::npos) { | 228 | if(ni==string::npos) { |
227 | rv += '/'; return rv; | 229 | rv += '/'; return rv; |
228 | } | 230 | } |
229 | interesting = ni; | 231 | interesting = ni; |
230 | }else if(ic!='/') { | 232 | }else if(ic!='/') { |
231 | rv += '/'; rv += ic; | 233 | rv += '/'; rv += ic; |
232 | qf = true; | 234 | qf = true; |
233 | ++interesting; | 235 | ++interesting; |
234 | } | 236 | } |
235 | string::size_type n = interesting; | 237 | string::size_type n = interesting; |
236 | char tmp[3] = { 0,0,0 }; | 238 | char tmp[3] = { 0,0,0 }; |
237 | stack<string::size_type> psegs; psegs.push(rv.length()); | 239 | stack<string::size_type> psegs; psegs.push(rv.length()); |
238 | string pseg; | 240 | string pseg; |
239 | for(;n<ul;) { | 241 | for(;n<ul;) { |
240 | string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n); | 242 | string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n); |
241 | if(unsafe==string::npos) { | 243 | if(unsafe==string::npos) { |
242 | pseg.append(uri,n,ul-n-1); n = ul-1; | 244 | pseg.append(uri,n,ul-n-1); n = ul-1; |
243 | }else{ | 245 | }else{ |
244 | pseg.append(uri,n,unsafe-n); | 246 | pseg.append(uri,n,unsafe-n); |
245 | n = unsafe; | 247 | n = unsafe; |
246 | } | 248 | } |
247 | char c = uri[n++]; | 249 | char c = uri[n++]; |
248 | if(c=='%') { | 250 | if(c=='%') { |
249 | if((n+1)>=ul) | 251 | if((n+1)>=ul) |
250 | throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character"); | 252 | throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character"); |
251 | tmp[0] = uri[n++]; | 253 | tmp[0] = uri[n++]; |
252 | tmp[1] = uri[n++]; | 254 | tmp[1] = uri[n++]; |
253 | if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) )) | 255 | if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) )) |
254 | throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized"); | 256 | throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized"); |
255 | int cc = strtol(tmp,0,16); | 257 | int cc = strtol(tmp,0,16); |
256 | if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) ) | 258 | if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) ) |
257 | pseg += cc; | 259 | pseg += cc; |
258 | else{ | 260 | else{ |
259 | pseg += '%'; | 261 | pseg += '%'; |
260 | pseg += toupper(tmp[0]); pseg += toupper(tmp[1]); | 262 | pseg += toupper(tmp[0]); pseg += toupper(tmp[1]); |
261 | } | 263 | } |
262 | }else if(qf) { | 264 | }else if(qf) { |
263 | rv += pseg; rv += c; | 265 | rv += pseg; rv += c; |
264 | pseg.clear(); | 266 | pseg.clear(); |
265 | }else if(n>=ul || strchr("?/#",c)) { | 267 | }else if(n>=ul || strchr("?/#",c)) { |
266 | if(pseg.empty() || pseg==".") { | 268 | if(pseg.empty() || pseg==".") { |
267 | }else if(pseg=="..") { | 269 | }else if(pseg=="..") { |
268 | if(psegs.size()>1) { | 270 | if(psegs.size()>1) { |
269 | rv.resize(psegs.top()); psegs.pop(); | 271 | rv.resize(psegs.top()); psegs.pop(); |
270 | } | 272 | } |
271 | }else{ | 273 | }else{ |
272 | psegs.push(rv.length()); | 274 | psegs.push(rv.length()); |
273 | if(c!='/') { | 275 | if(c!='/') { |
274 | pseg += c; | 276 | pseg += c; |
275 | qf = true; | 277 | qf = true; |
276 | } | 278 | } |
277 | rv += '/'; rv += pseg; | 279 | rv += '/'; rv += pseg; |
278 | } | 280 | } |
279 | if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) { | 281 | if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) { |
280 | rv += '/'; | 282 | rv += '/'; |
281 | if(n<ul) | 283 | if(n<ul) |
282 | qf = true; | 284 | qf = true; |
283 | }else if(strchr("?#",c)) { | 285 | }else if(strchr("?#",c)) { |
284 | if(psegs.size()==1 && psegs.top()==rv.length()) | 286 | if(psegs.size()==1 && psegs.top()==rv.length()) |
285 | rv += '/'; | 287 | rv += '/'; |
286 | if(pseg.empty()) | 288 | if(pseg.empty()) |
287 | rv += c; | 289 | rv += c; |
288 | qf = true; | 290 | qf = true; |
289 | } | 291 | } |
290 | pseg.clear(); | 292 | pseg.clear(); |
291 | }else{ | 293 | }else{ |
292 | pseg += c; | 294 | pseg += c; |
293 | } | 295 | } |
294 | } | 296 | } |
295 | if(!pseg.empty()) { | 297 | if(!pseg.empty()) { |
296 | rv += '/'; rv += pseg; | 298 | rv += '/'; rv += pseg; |
297 | } | 299 | } |
298 | return rv; | 300 | return rv; |
299 | } | 301 | } |
300 | 302 | ||
301 | } | 303 | } |
302 | 304 | ||
303 | } | 305 | } |