summaryrefslogtreecommitdiffabout
path: root/lib/discovery.cc
authorMichael Krelin <hacker@klever.net>2007-12-09 17:22:06 (UTC)
committer Michael Krelin <hacker@klever.net>2007-12-09 22:08:24 (UTC)
commitc34adc6e274c3dbb63af99ca566000e7d218244c (patch) (unidiff)
tree705624c208deb4eaf8d07c119a883e6f4f35236e /lib/discovery.cc
parent60fdaff7888b455b4d07eadc905cefd20f1ddd3c (diff)
downloadlibopkele-c34adc6e274c3dbb63af99ca566000e7d218244c.zip
libopkele-c34adc6e274c3dbb63af99ca566000e7d218244c.tar.gz
libopkele-c34adc6e274c3dbb63af99ca566000e7d218244c.tar.bz2
reworked identity resolution and service discovery
The discovery, which does both XRDS-based (Yadis, XRI, for XRI, using proxy) and HTML-based search, now returns results in opkele:idiscovery_t structure. It uses expat-based parser idigger_t, which itself is not exposed via any header files, but hidden in lib/discovery.cc, the discovery testing program is renamed from openid_resolve to idiscover. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib/discovery.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc375
1 files changed, 375 insertions, 0 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
new file mode 100644
index 0000000..a35ce32
--- a/dev/null
+++ b/lib/discovery.cc
@@ -0,0 +1,375 @@
1#include <iostream>
2using namespace std;
3#include <list>
4#include <opkele/curl.h>
5#include <opkele/expat.h>
6#include <opkele/uris.h>
7#include <opkele/discovery.h>
8#include <opkele/exception.h>
9#include <opkele/util.h>
10
11#include "config.h"
12
13#define XRDS_HEADER "X-XRDS-Location"
14#define CT_HEADER "Content-Type"
15
16namespace opkele {
17 using std::list;
18 using xrd::XRD_t;
19 using xrd::service_t;
20
21 static const char *whitespace = " \t\r\n";
22 static const char *i_leaders = "=@+$!(";
23
24 static inline bool is_qelement(const XML_Char *n,const char *qen) {
25 return !strcasecmp(n,qen);
26 }
27 static inline bool is_element(const XML_Char *n,const char *en) {
28 if(!strcasecmp(n,en)) return true;
29 int nl = strlen(n), enl = strlen(en);
30 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
31 && !strcasecmp(&n[nl-enl],en) )
32 return true;
33 return false;
34 }
35
36 static long element_priority(const XML_Char **a) {
37 for(;*a;++a)
38 if(!strcasecmp(*(a++),"priority")) {
39 long rv;
40 return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
41 }
42 return -1;
43 }
44
45 class idigger_t : public util::curl_t, public util::expat_t {
46 public:
47 string xri_proxy;
48
49 enum {
50 xmode_html = 1, xmode_xrd = 2
51 };
52 int xmode;
53
54 string xrds_location;
55 string http_content_type;
56 service_t html_openid1;
57 service_t html_openid2;
58 string cdata_buf;
59 long status_code;
60 string status_string;
61
62 typedef list<string> pt_stack_t;
63 pt_stack_t pt_stack;
64 int skipping;
65
66 XRD_t *xrd;
67 service_t *xrd_service;
68 string* cdata;
69
70 idigger_t()
71 : util::curl_t(easy_init()),
72 util::expat_t(0),
73 xri_proxy(XRI_PROXY_URL) {
74 CURLcode r;
75 (r=misc_sets())
76 || (r=set_write())
77 || (r=set_header())
78 ;
79 if(r)
80 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
81 }
82 ~idigger_t() throw() { }
83
84 void discover(idiscovery_t& result,const string& identity) {
85 result.clear();
86 string::size_type fsc = identity.find_first_not_of(whitespace);
87 if(fsc==string::npos)
88 throw bad_input(OPKELE_CP_ "whtiespace-only identity");
89 string::size_type lsc = identity.find_last_not_of(whitespace);
90 assert(lsc!=string::npos);
91 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
92 fsc += sizeof("xri://")-1;
93 if((fsc+1)>=lsc)
94 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
95 string id(identity,fsc,lsc-fsc+1);
96 if(strchr(i_leaders,id[0])) {
97 result.normalized_id = id;
98 /* TODO: further canonicalize xri identity? Like folding case or whatever... */
99 discover_at(
100 result,
101 xri_proxy + util::url_encode(id)+
102 "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd);
103 if(status_code!=100)
104 throw failed_xri_resolution(OPKELE_CP_
105 "XRI resolution failed with '"+status_string+"' message",status_code);
106 if(result.xrd.canonical_ids.empty())
107 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
108 }else{
109 if(id.find("://")==string::npos)
110 id.insert(0,"http://");
111 string::size_type fp = id.find('#');
112 if(fp!=string::npos) {
113 string::size_type qp = id.find('?');
114 if(qp==string::npos || qp<fp)
115 id.erase(fp);
116 else if(qp>fp)
117 id.erase(fp,qp-fp);
118 }
119 result.normalized_id = util::rfc_3986_normalize_uri(id);
120 discover_at(result,id,xmode_html|xmode_xrd);
121 const char * eu = 0;
122 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
123 if(r)
124 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
125 result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */
126 if(xrds_location.empty()) {
127 html2xrd(result.xrd);
128 }else{
129 discover_at(result,xrds_location,xmode_xrd);
130 if(result.xrd.empty())
131 html2xrd(result.xrd);
132 }
133 }
134 }
135
136 void discover_at(idiscovery_t& result,const string& url,int xm) {
137 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
138 if(r)
139 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
140
141 (*(expat_t*)this) = parser_create_ns();
142 set_user_data(); set_element_handler();
143 set_character_data_handler();
144
145 xrds_location.clear(); http_content_type.clear();
146 xmode = xm;
147 if(xmode&xmode_html) {
148 xrds_location.clear();
149 html_openid1.clear(); html_openid2.clear();
150 }
151 xrd = &result.xrd;
152 cdata = 0; xrd_service = 0; skipping = 0;
153 status_code = 100; status_string.clear();
154
155 r = easy_perform();
156 if(r && r!=CURLE_WRITE_ERROR)
157 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
158
159 parse(0,0,true);
160 }
161
162 void html2xrd(XRD_t& x) {
163 if(!html_openid1.uris.empty()) {
164 html_openid1.types.insert(STURI_OPENID11);
165 x.services.add(-1,html_openid1);
166 }
167 if(!html_openid2.uris.empty()) {
168 html_openid2.types.insert(STURI_OPENID20);
169 x.services.add(-1,html_openid2);
170 }
171 }
172
173 size_t write(void *p,size_t s,size_t nm) {
174 if(skipping<0) return 0;
175 /* TODO: limit total size */
176 size_t bytes = s*nm;
177 parse((const char *)p,bytes,false);
178 return bytes;
179 }
180 size_t header(void *p,size_t s,size_t nm) {
181 size_t bytes = s*nm;
182 const char *h = (const char*)p;
183 const char *colon = (const char*)memchr(p,':',bytes);
184 const char *space = (const char*)memchr(p,' ',bytes);
185 if(space && ( (!colon) || space<colon ) ) {
186 xrds_location.clear(); http_content_type.clear();
187 }else if(colon) {
188 const char *hv = ++colon;
189 int hnl = colon-h;
190 int rb;
191 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
192 while(rb>0 && isspace(hv[rb-1])) --rb;
193 if(rb) {
194 if( (hnl>=sizeof(XRDS_HEADER))
195 && !strncasecmp(h,XRDS_HEADER":",
196 sizeof(XRDS_HEADER)) ) {
197 xrds_location.assign(hv,rb);
198 }else if( (hnl>=sizeof(CT_HEADER))
199 && !strncasecmp(h,CT_HEADER":",
200 sizeof(CT_HEADER)) ) {
201 const char *sc = (const char*)memchr(
202 hv,';',rb);
203 http_content_type.assign(hv,sc?(sc-hv):rb);
204 }
205 }
206 }
207 return curl_t::header(p,s,nm);
208 }
209
210 void start_element(const XML_Char *n,const XML_Char **a) {
211 if(skipping<0) return;
212 if(skipping) {
213 if(xmode&xmode_html)
214 html_start_element(n,a);
215 ++skipping; return;
216 }
217 if(pt_stack.empty()) {
218 if(is_qelement(n,NSURI_XRDS "\tXRDS"))
219 return;
220 if(is_qelement(n,NSURI_XRD "\tXRD")) {
221 assert(xrd);
222 xrd->clear();
223 pt_stack.push_back(n);
224 }else if(xmode&xmode_html) {
225 html_start_element(n,a);
226 }else{
227 skipping = -1;
228 }
229 }else{
230 int pt_s = pt_stack.size();
231 if(pt_s==1) {
232 /* TODO: xrd:XRD/xrd:Expires */
233 if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
234 assert(xrd);
235 cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
236 }else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
237 assert(xrd);
238 cdata = &(xrd->local_ids.add(element_priority(a),string()));
239 }else if(is_qelement(n,NSURI_XRD "\tService")) {
240 assert(xrd);
241 xrd_service = &(xrd->services.add(element_priority(a),
242 service_t()));
243 pt_stack.push_back(n);
244 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
245 for(;*a;) {
246 if(!strcasecmp(*(a++),"code")) {
247 if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) {
248 cdata = &status_string;
249 pt_stack.push_back(n);
250 break;
251 }
252 }
253 }
254 }else if(xmode&xmode_html) {
255 html_start_element(n,a);
256 }else{
257 skipping = 1;
258 }
259 }else if(pt_s==2) {
260 if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) {
261 if(is_qelement(n,NSURI_XRD "\tType")) {
262 assert(xrd); assert(xrd_service);
263 cdata_buf.clear();
264 cdata = &cdata_buf;
265 }else if(is_qelement(n,NSURI_XRD "\tURI")) {
266 assert(xrd); assert(xrd_service);
267 cdata = &(xrd_service->uris.add(element_priority(a),string()));
268 }else if(is_qelement(n,NSURI_XRD "\tLocalID")
269 || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) {
270 assert(xrd); assert(xrd_service);
271 cdata = &(xrd_service->uris.add(element_priority(a),string()));
272 }else{
273 skipping = 1;
274 }
275 }else
276 skipping = 1;
277 }else if(xmode&xmode_html) {
278 html_start_element(n,a);
279 }else{
280 skipping = 1;
281 }
282 }
283 }
284 void end_element(const XML_Char *n) {
285 if(skipping<0) return;
286 if(skipping) {
287 --skipping; return;
288 }
289 if(is_qelement(n,NSURI_XRD "\tType")) {
290 assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf);
291 xrd_service->types.insert(cdata_buf);
292 }else if(is_qelement(n,NSURI_XRD "\tService")) {
293 assert(xrd); assert(xrd_service);
294 assert(!pt_stack.empty());
295 assert(pt_stack.back()==(NSURI_XRD "\tService"));
296 pt_stack.pop_back();
297 xrd_service = 0;
298 }else if(is_qelement(n,NSURI_XRD "\tStatus")) {
299 assert(xrd);
300 if(is_qelement(pt_stack.back().c_str(),n)) {
301 assert(cdata==&status_string);
302 pt_stack.pop_back();
303 if(status_code!=100)
304 skipping = -1;
305 }
306 }else if((xmode&xmode_html) && is_element(n,"head")) {
307 skipping = -1;
308 }
309 cdata = 0;
310 }
311 void character_data(const XML_Char *s,int l) {
312 if(skipping) return;
313 if(cdata) cdata->append(s,l);
314 }
315
316 void html_start_element(const XML_Char *n,const XML_Char **a) {
317 if(is_element(n,"meta")) {
318 bool heq = false;
319 string l;
320 for(;*a;a+=2) {
321 if(!( strcasecmp(a[0],"http-equiv")
322 || strcasecmp(a[1],XRDS_HEADER) ))
323 heq = true;
324 else if(!strcasecmp(a[0],"content"))
325 l.assign(a[1]);
326 }
327 if(heq)
328 xrds_location = l;
329 }else if(is_element(n,"link")) {
330 string rels;
331 string href;
332 for(;*a;a+=2) {
333 if( !strcasecmp(a[0],"rel") ) {
334 rels.assign(a[1]);
335 }else if( !strcasecmp(a[0],"href") ) {
336 const char *ns = a[1];
337 for(;*ns && isspace(*ns);++ns);
338 href.assign(ns);
339 string::size_type lns=href.find_last_not_of(whitespace);
340 href.erase(lns+1);
341 }
342 }
343 for(string::size_type ns=rels.find_first_not_of(whitespace);
344 ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) {
345 string::size_type s = rels.find_first_of(whitespace,ns);
346 string rel;
347 if(s==string::npos) {
348 rel.assign(rels,ns,string::npos);
349 ns = string::npos;
350 }else{
351 rel.assign(rels,ns,s-ns);
352 ns = s;
353 }
354 if(rel=="openid.server")
355 html_openid1.uris.add(-1,href);
356 else if(rel=="openid.delegate")
357 html_openid1.local_ids.add(-1,href);
358 else if(rel=="openid2.provider")
359 html_openid2.uris.add(-1,href);
360 else if(rel=="openid2.local_id")
361 html_openid2.local_ids.add(-1,href);
362 }
363 }else if(is_element(n,"body")) {
364 skipping = -1;
365 }
366 }
367
368 };
369
370 void idiscover(idiscovery_t& result,const string& identity) {
371 idigger_t idigger;
372 idigger.discover(result,identity);
373 }
374
375}