summaryrefslogtreecommitdiffabout
Unidiff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--lib/Makefile.am4
-rw-r--r--lib/discovery.cc75
2 files changed, 66 insertions, 13 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9c73e96..989de28 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,31 +1,31 @@
1lib_LTLIBRARIES = libopkele.la 1lib_LTLIBRARIES = libopkele.la
2 2
3AM_CPPFLAGS = ${CPPFLAGS_DEBUG} 3AM_CPPFLAGS = ${CPPFLAGS_DEBUG}
4DEFAULT_INCLUDES = -I${top_builddir} 4DEFAULT_INCLUDES = -I${top_builddir}
5INCLUDES = \ 5INCLUDES = \
6 -I${top_srcdir}/include/ \ 6 -I${top_srcdir}/include/ \
7 ${KONFORKA_CFLAGS} \ 7 ${KONFORKA_CFLAGS} \
8 ${OPENSSL_CFLAGS} \ 8 ${OPENSSL_CFLAGS} \
9 ${LIBCURL_CPPFLAGS} \ 9 ${LIBCURL_CPPFLAGS} \
10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} 10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} ${TIDY_CFLAGS}
11libopkele_la_LIBADD = \ 11libopkele_la_LIBADD = \
12 ${LIBCURL} \ 12 ${LIBCURL} \
13 ${PCRE_LIBS} ${EXPAT_LIBS} \ 13 ${PCRE_LIBS} ${EXPAT_LIBS} \
14 ${OPENSSL_LIBS} \ 14 ${OPENSSL_LIBS} \
15 ${KONFORKA_LIBS} 15 ${KONFORKA_LIBS} ${TIDY_LIBS}
16 16
17libopkele_la_SOURCES = \ 17libopkele_la_SOURCES = \
18 params.cc \ 18 params.cc \
19 util.cc \ 19 util.cc \
20 server.cc \ 20 server.cc \
21 secret.cc \ 21 secret.cc \
22 data.cc \ 22 data.cc \
23 consumer.cc \ 23 consumer.cc \
24 exception.cc \ 24 exception.cc \
25 extension.cc \ 25 extension.cc \
26 sreg.cc \ 26 sreg.cc \
27 extension_chain.cc \ 27 extension_chain.cc \
28 curl.cc expat.cc \ 28 curl.cc expat.cc \
29 discovery.cc 29 discovery.cc
30libopkele_la_LDFLAGS = \ 30libopkele_la_LDFLAGS = \
31 -version-info 2:0:0 31 -version-info 2:0:0
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 8729cfb..a308b56 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -1,230 +1,283 @@
1#include <list> 1#include <list>
2#include <opkele/curl.h> 2#include <opkele/curl.h>
3#include <opkele/expat.h> 3#include <opkele/expat.h>
4#include <opkele/uris.h> 4#include <opkele/uris.h>
5#include <opkele/discovery.h> 5#include <opkele/discovery.h>
6#include <opkele/exception.h> 6#include <opkele/exception.h>
7#include <opkele/util.h> 7#include <opkele/util.h>
8#include <opkele/tidy.h>
8#include <opkele/debug.h> 9#include <opkele/debug.h>
9 10
10#include "config.h" 11#include "config.h"
11 12
12#define XRDS_HEADER "X-XRDS-Location" 13#define XRDS_HEADER "X-XRDS-Location"
13#define CT_HEADER "Content-Type" 14#define CT_HEADER "Content-Type"
14 15
15namespace opkele { 16namespace opkele {
16 using std::list; 17 using std::list;
17 using xrd::XRD_t; 18 using xrd::XRD_t;
18 using xrd::service_t; 19 using xrd::service_t;
19 20
20 static const char *whitespace = " \t\r\n"; 21 static const char *whitespace = " \t\r\n";
21 static const char *i_leaders = "=@+$!("; 22 static const char *i_leaders = "=@+$!(";
23 static const size_t max_html = 16384;
22 24
23 static inline bool is_qelement(const XML_Char *n,const char *qen) { 25 static inline bool is_qelement(const XML_Char *n,const char *qen) {
24 return !strcasecmp(n,qen); 26 return !strcasecmp(n,qen);
25 } 27 }
26 static inline bool is_element(const XML_Char *n,const char *en) { 28 static inline bool is_element(const XML_Char *n,const char *en) {
27 if(!strcasecmp(n,en)) return true; 29 if(!strcasecmp(n,en)) return true;
28 int nl = strlen(n), enl = strlen(en); 30 int nl = strlen(n), enl = strlen(en);
29 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t' 31 if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
30 && !strcasecmp(&n[nl-enl],en) ) 32 && !strcasecmp(&n[nl-enl],en) )
31 return true; 33 return true;
32 return false; 34 return false;
33 } 35 }
34 36
35 static long element_priority(const XML_Char **a) { 37 static long element_priority(const XML_Char **a) {
36 for(;*a;++a) 38 for(;*a;++a)
37 if(!strcasecmp(*(a++),"priority")) { 39 if(!strcasecmp(*(a++),"priority")) {
38 long rv; 40 long rv;
39 return (sscanf(*a,"%ld",&rv)==1)?rv:-1; 41 return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
40 } 42 }
41 return -1; 43 return -1;
42 } 44 }
43 45
44 class idigger_t : public util::curl_t, public util::expat_t { 46 class idigger_t : public util::curl_t, public util::expat_t {
45 public: 47 public:
46 string xri_proxy; 48 string xri_proxy;
47 49
48 enum { 50 enum {
49 xmode_html = 1, xmode_xrd = 2 51 xmode_html = 1, xmode_xrd = 2
50 }; 52 };
51 int xmode; 53 int xmode;
52 54
53 string xrds_location; 55 string xrds_location;
54 string http_content_type; 56 string http_content_type;
55 service_t html_openid1; 57 service_t html_openid1;
56 service_t html_openid2; 58 service_t html_openid2;
57 string cdata_buf; 59 string cdata_buf;
58 long status_code; 60 long status_code;
59 string status_string; 61 string status_string;
60 62
61 typedef list<string> pt_stack_t; 63 typedef list<string> pt_stack_t;
62 pt_stack_t pt_stack; 64 pt_stack_t pt_stack;
63 int skipping; 65 int skipping;
66 bool parser_choked;
67 string save_html;
64 68
65 XRD_t *xrd; 69 XRD_t *xrd;
66 service_t *xrd_service; 70 service_t *xrd_service;
67 string* cdata; 71 string* cdata;
68 72
69 idigger_t() 73 idigger_t()
70 : util::curl_t(easy_init()), 74 : util::curl_t(easy_init()),
71 util::expat_t(0), 75 util::expat_t(0),
72 xri_proxy(XRI_PROXY_URL) { 76 xri_proxy(XRI_PROXY_URL) {
73 CURLcode r; 77 CURLcode r;
74 (r=misc_sets()) 78 (r=misc_sets())
75 || (r=set_write()) 79 || (r=set_write())
76 || (r=set_header()) 80 || (r=set_header())
77 ; 81 ;
78 if(r) 82 if(r)
79 throw exception_curl(OPKELE_CP_ "failed to set curly options",r); 83 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
80 } 84 }
81 ~idigger_t() throw() { } 85 ~idigger_t() throw() { }
82 86
83 void discover(idiscovery_t& result,const string& identity) { 87 void discover(idiscovery_t& result,const string& identity) {
84 result.clear(); 88 result.clear();
85 string::size_type fsc = identity.find_first_not_of(whitespace); 89 string::size_type fsc = identity.find_first_not_of(whitespace);
86 if(fsc==string::npos) 90 if(fsc==string::npos)
87 throw bad_input(OPKELE_CP_ "whtiespace-only identity"); 91 throw bad_input(OPKELE_CP_ "whtiespace-only identity");
88 string::size_type lsc = identity.find_last_not_of(whitespace); 92 string::size_type lsc = identity.find_last_not_of(whitespace);
89 assert(lsc!=string::npos); 93 assert(lsc!=string::npos);
90 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 94 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
91 fsc += sizeof("xri://")-1; 95 fsc += sizeof("xri://")-1;
92 if((fsc+1)>=lsc) 96 if((fsc+1)>=lsc)
93 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 97 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
94 string id(identity,fsc,lsc-fsc+1); 98 string id(identity,fsc,lsc-fsc+1);
95 if(strchr(i_leaders,id[0])) { 99 if(strchr(i_leaders,id[0])) {
96 result.normalized_id = id; 100 result.normalized_id = id;
97 result.xri_identity = true; 101 result.xri_identity = true;
98 /* TODO: further canonicalize xri identity? Like folding case or whatever... */ 102 /* TODO: further canonicalize xri identity? Like folding case or whatever... */
99 discover_at( 103 discover_at(
100 result, 104 result,
101 xri_proxy + util::url_encode(id)+ 105 xri_proxy + util::url_encode(id)+
102 "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd); 106 "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd);
103 if(status_code!=100) 107 if(status_code!=100)
104 throw failed_xri_resolution(OPKELE_CP_ 108 throw failed_xri_resolution(OPKELE_CP_
105 "XRI resolution failed with '"+status_string+"' message",status_code); 109 "XRI resolution failed with '"+status_string+"' message",status_code);
106 if(result.xrd.canonical_ids.empty()) 110 if(result.xrd.canonical_ids.empty())
107 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found"); 111 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
108 result.canonicalized_id = result.xrd.canonical_ids.begin()->second; 112 result.canonicalized_id = result.xrd.canonical_ids.begin()->second;
109 }else{ 113 }else{
110 result.xri_identity = false; 114 result.xri_identity = false;
111 if(id.find("://")==string::npos) 115 if(id.find("://")==string::npos)
112 id.insert(0,"http://"); 116 id.insert(0,"http://");
113 string::size_type fp = id.find('#'); 117 string::size_type fp = id.find('#');
114 if(fp!=string::npos) { 118 if(fp!=string::npos) {
115 string::size_type qp = id.find('?'); 119 string::size_type qp = id.find('?');
116 if(qp==string::npos || qp<fp) 120 if(qp==string::npos || qp<fp)
117 id.erase(fp); 121 id.erase(fp);
118 else if(qp>fp) 122 else if(qp>fp)
119 id.erase(fp,qp-fp); 123 id.erase(fp,qp-fp);
120 } 124 }
121 result.normalized_id = util::rfc_3986_normalize_uri(id); 125 result.normalized_id = util::rfc_3986_normalize_uri(id);
122 discover_at(result,id,xmode_html|xmode_xrd); 126 discover_at(result,id,xmode_html|xmode_xrd);
123 const char * eu = 0; 127 const char * eu = 0;
124 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 128 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
125 if(r) 129 if(r)
126 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 130 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
127 result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */ 131 result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */
128 if(xrds_location.empty()) { 132 if(xrds_location.empty()) {
129 html2xrd(result.xrd); 133 html2xrd(result.xrd);
130 }else{ 134 }else{
131 discover_at(result,xrds_location,xmode_xrd); 135 discover_at(result,xrds_location,xmode_xrd);
132 if(result.xrd.empty()) 136 if(result.xrd.empty())
133 html2xrd(result.xrd); 137 html2xrd(result.xrd);
134 } 138 }
135 } 139 }
136 } 140 }
137 141
138 void discover_at(idiscovery_t& result,const string& url,int xm) { 142 void discover_at(idiscovery_t& result,const string& url,int xm) {
139 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str()); 143 CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
140 if(r) 144 if(r)
141 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r); 145 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
142 146
143 (*(expat_t*)this) = parser_create_ns();
144 set_user_data(); set_element_handler();
145 set_character_data_handler();
146
147 http_content_type.clear(); 147 http_content_type.clear();
148 xmode = xm; 148 xmode = xm;
149 prepare_to_parse();
149 if(xmode&xmode_html) { 150 if(xmode&xmode_html) {
150 xrds_location.clear(); 151 xrds_location.clear();
151 html_openid1.clear(); html_openid2.clear(); 152 save_html.clear();
153 save_html.reserve(max_html);
152 } 154 }
153 xrd = &result.xrd; 155 xrd = &result.xrd;
154 cdata = 0; xrd_service = 0; skipping = 0;
155 status_code = 100; status_string.clear();
156 156
157 r = easy_perform(); 157 r = easy_perform();
158 if(r && r!=CURLE_WRITE_ERROR) 158 if(r && r!=CURLE_WRITE_ERROR)
159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
160 160
161 parse(0,0,true); 161 if(!parser_choked) {
162 parse(0,0,true);
163 }else{
164 /* TODO: do not bother if we've seen xml */
165 try {
166 util::tidy_doc_t td = util::tidy_doc_t::create();
167 if(!td)
168 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
169#ifndef NDEBUG
170 td.opt_set(TidyQuiet,false);
171 td.opt_set(TidyShowWarnings,false);
172 td.opt_set(TidyForceOutput,true);
173 td.opt_set(TidyXhtmlOut,true);
174 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
175 td.opt_set(TidyMark,false);
176#endif /* NDEBUG */
177 if(td.parse_string(save_html)<=0)
178 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
179 if(td.clean_and_repair()<=0)
180 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
181 util::tidy_buf_t tide;
182 if(td.save_buffer(tide)<=0)
183 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
184 prepare_to_parse();
185 parse(tide.c_str(),tide.size(),true);
186 }catch(exception_tidy& et) { }
187 }
188 save_html.clear();
189 }
190
191 void prepare_to_parse() {
192 (*(expat_t*)this) = parser_create_ns();
193 set_user_data(); set_element_handler();
194 set_character_data_handler();
195
196 if(xmode&xmode_html) {
197 html_openid1.clear(); html_openid2.clear();
198 parser_choked = false;
199 }
200
201 cdata = 0; xrd_service = 0; skipping = 0;
202 status_code = 100; status_string.clear();
162 } 203 }
163 204
164 void html2xrd(XRD_t& x) { 205 void html2xrd(XRD_t& x) {
165 if(!html_openid1.uris.empty()) { 206 if(!html_openid1.uris.empty()) {
166 html_openid1.types.insert(STURI_OPENID11); 207 html_openid1.types.insert(STURI_OPENID11);
167 x.services.add(-1,html_openid1); 208 x.services.add(-1,html_openid1);
168 } 209 }
169 if(!html_openid2.uris.empty()) { 210 if(!html_openid2.uris.empty()) {
170 html_openid2.types.insert(STURI_OPENID20); 211 html_openid2.types.insert(STURI_OPENID20);
171 x.services.add(-1,html_openid2); 212 x.services.add(-1,html_openid2);
172 } 213 }
173 } 214 }
174 215
175 size_t write(void *p,size_t s,size_t nm) { 216 size_t write(void *p,size_t s,size_t nm) {
176 if(skipping<0) return 0;
177 /* TODO: limit total size */ 217 /* TODO: limit total size */
178 size_t bytes = s*nm; 218 size_t bytes = s*nm;
179 bool rp = parse((const char *)p,bytes,false); 219 const char *inbuf = (const char*)p;
220 if(xmode&xmode_html) {
221 size_t mbts = save_html.capacity()-save_html.size();
222 size_t bts = 0;
223 if(mbts>0) {
224 bts = (bytes>mbts)?mbts:bytes;
225 save_html.append(inbuf,bts);
226 }
227 if(skipping<0) return bts;
228 }
229 if(skipping<0) return 0;
230 bool rp = parse(inbuf,bytes,false);
180 if(!rp) { 231 if(!rp) {
232 parser_choked = true;
181 skipping = -1; 233 skipping = -1;
182 bytes = 0; 234 if(!(xmode&xmode_html))
235 bytes = 0;
183 } 236 }
184 return bytes; 237 return bytes;
185 } 238 }
186 size_t header(void *p,size_t s,size_t nm) { 239 size_t header(void *p,size_t s,size_t nm) {
187 size_t bytes = s*nm; 240 size_t bytes = s*nm;
188 const char *h = (const char*)p; 241 const char *h = (const char*)p;
189 const char *colon = (const char*)memchr(p,':',bytes); 242 const char *colon = (const char*)memchr(p,':',bytes);
190 const char *space = (const char*)memchr(p,' ',bytes); 243 const char *space = (const char*)memchr(p,' ',bytes);
191 if(space && ( (!colon) || space<colon ) ) { 244 if(space && ( (!colon) || space<colon ) ) {
192 xrds_location.clear(); http_content_type.clear(); 245 xrds_location.clear(); http_content_type.clear();
193 }else if(colon) { 246 }else if(colon) {
194 const char *hv = ++colon; 247 const char *hv = ++colon;
195 int hnl = colon-h; 248 int hnl = colon-h;
196 int rb; 249 int rb;
197 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb); 250 for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
198 while(rb>0 && isspace(hv[rb-1])) --rb; 251 while(rb>0 && isspace(hv[rb-1])) --rb;
199 if(rb) { 252 if(rb) {
200 if( (hnl>=sizeof(XRDS_HEADER)) 253 if( (hnl>=sizeof(XRDS_HEADER))
201 && !strncasecmp(h,XRDS_HEADER":", 254 && !strncasecmp(h,XRDS_HEADER":",
202 sizeof(XRDS_HEADER)) ) { 255 sizeof(XRDS_HEADER)) ) {
203 xrds_location.assign(hv,rb); 256 xrds_location.assign(hv,rb);
204 }else if( (hnl>=sizeof(CT_HEADER)) 257 }else if( (hnl>=sizeof(CT_HEADER))
205 && !strncasecmp(h,CT_HEADER":", 258 && !strncasecmp(h,CT_HEADER":",
206 sizeof(CT_HEADER)) ) { 259 sizeof(CT_HEADER)) ) {
207 const char *sc = (const char*)memchr( 260 const char *sc = (const char*)memchr(
208 hv,';',rb); 261 hv,';',rb);
209 http_content_type.assign(hv,sc?(sc-hv):rb); 262 http_content_type.assign(hv,sc?(sc-hv):rb);
210 } 263 }
211 } 264 }
212 } 265 }
213 return curl_t::header(p,s,nm); 266 return curl_t::header(p,s,nm);
214 } 267 }
215 268
216 void start_element(const XML_Char *n,const XML_Char **a) { 269 void start_element(const XML_Char *n,const XML_Char **a) {
217 if(skipping<0) return; 270 if(skipping<0) return;
218 if(skipping) { 271 if(skipping) {
219 if(xmode&xmode_html) 272 if(xmode&xmode_html)
220 html_start_element(n,a); 273 html_start_element(n,a);
221 ++skipping; return; 274 ++skipping; return;
222 } 275 }
223 if(pt_stack.empty()) { 276 if(pt_stack.empty()) {
224 if(is_qelement(n,NSURI_XRDS "\tXRDS")) 277 if(is_qelement(n,NSURI_XRDS "\tXRDS"))
225 return; 278 return;
226 if(is_qelement(n,NSURI_XRD "\tXRD")) { 279 if(is_qelement(n,NSURI_XRD "\tXRD")) {
227 assert(xrd); 280 assert(xrd);
228 xrd->clear(); 281 xrd->clear();
229 pt_stack.push_back(n); 282 pt_stack.push_back(n);
230 }else if(xmode&xmode_html) { 283 }else if(xmode&xmode_html) {