summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2008-02-16 22:45:49 (UTC)
committer Michael Krelin <hacker@klever.net>2008-02-16 22:45:49 (UTC)
commitc18b77c610d0f963a274420a6558629d198818ee (patch) (unidiff)
tree953a2ab1e3b436f00d383e4c9c6fe87e3b876b80
parent21bddce2d98394865cf2ed0b144f92bbb6993bc9 (diff)
downloadlibopkele-c18b77c610d0f963a274420a6558629d198818ee.zip
libopkele-c18b77c610d0f963a274420a6558629d198818ee.tar.gz
libopkele-c18b77c610d0f963a274420a6558629d198818ee.tar.bz2
do not tidy XRD documents
Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--lib/discovery.cc4
1 files changed, 2 insertions, 2 deletions
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 6f9926c..b7f2db6 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -98,211 +98,211 @@ namespace opkele {
98 string* cdata; 98 string* cdata;
99 99
100 idigger_t() 100 idigger_t()
101 : util::curl_t(easy_init()), 101 : util::curl_t(easy_init()),
102 util::expat_t(0), 102 util::expat_t(0),
103 xri_proxy(XRI_PROXY_URL) { 103 xri_proxy(XRI_PROXY_URL) {
104 CURLcode r; 104 CURLcode r;
105 (r=misc_sets()) 105 (r=misc_sets())
106 || (r=set_write()) 106 || (r=set_write())
107 || (r=set_header()) 107 || (r=set_header())
108 ; 108 ;
109 if(r) 109 if(r)
110 throw exception_curl(OPKELE_CP_ "failed to set curly options",r); 110 throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
111 } 111 }
112 ~idigger_t() throw() { } 112 ~idigger_t() throw() { }
113 113
114 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) { 114 void yadiscover(endpoint_discovery_iterator oi,const string& yurl,const char **types,bool redirs) {
115 idiscovery_t idis; 115 idiscovery_t idis;
116 idis.xri_identity = false; 116 idis.xri_identity = false;
117 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs)); 117 discover_at(idis,yurl,xmode_html|xmode_xrd|(redirs?0:xmode_noredirs));
118 if(!xrds_location.empty()) { 118 if(!xrds_location.empty()) {
119 idis.clear(); 119 idis.clear();
120 discover_at(idis,xrds_location,xmode_xrd); 120 discover_at(idis,xrds_location,xmode_xrd);
121 } 121 }
122 idis.normalized_id = idis.canonicalized_id = yurl; 122 idis.normalized_id = idis.canonicalized_id = yurl;
123 service_type_t st; 123 service_type_t st;
124 for(st.uri=*types;*types;st.uri=*(++types)) 124 for(st.uri=*types;*types;st.uri=*(++types))
125 queue_endpoints(oi,idis,&st); 125 queue_endpoints(oi,idis,&st);
126 } 126 }
127 127
128 string discover(endpoint_discovery_iterator& oi,const string& identity) { 128 string discover(endpoint_discovery_iterator& oi,const string& identity) {
129 string rv; 129 string rv;
130 idiscovery_t idis; 130 idiscovery_t idis;
131 string::size_type fsc = identity.find_first_not_of(whitespace); 131 string::size_type fsc = identity.find_first_not_of(whitespace);
132 if(fsc==string::npos) 132 if(fsc==string::npos)
133 throw bad_input(OPKELE_CP_ "whitespace-only identity"); 133 throw bad_input(OPKELE_CP_ "whitespace-only identity");
134 string::size_type lsc = identity.find_last_not_of(whitespace); 134 string::size_type lsc = identity.find_last_not_of(whitespace);
135 assert(lsc!=string::npos); 135 assert(lsc!=string::npos);
136 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1)) 136 if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
137 fsc += sizeof("xri://")-1; 137 fsc += sizeof("xri://")-1;
138 if((fsc+1)>=lsc) 138 if((fsc+1)>=lsc)
139 throw bad_input(OPKELE_CP_ "not a character of importance in identity"); 139 throw bad_input(OPKELE_CP_ "not a character of importance in identity");
140 string id(identity,fsc,lsc-fsc+1); 140 string id(identity,fsc,lsc-fsc+1);
141 idis.clear(); 141 idis.clear();
142 if(strchr(i_leaders,id[0])) { 142 if(strchr(i_leaders,id[0])) {
143 /* TODO: further normalize xri identity? Like folding case 143 /* TODO: further normalize xri identity? Like folding case
144 * or whatever... */ 144 * or whatever... */
145 rv = id; 145 rv = id;
146 set<string> cids; 146 set<string> cids;
147 for(const struct service_type_t *st=op_service_types; 147 for(const struct service_type_t *st=op_service_types;
148 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) { 148 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) {
149 idis.clear(); 149 idis.clear();
150 discover_at( idis, 150 discover_at( idis,
151 xri_proxy + util::url_encode(id)+ 151 xri_proxy + util::url_encode(id)+
152 "?_xrd_t="+util::url_encode(st->uri)+ 152 "?_xrd_t="+util::url_encode(st->uri)+
153 "&_xrd_r=application/xrd%2Bxml" 153 "&_xrd_r=application/xrd%2Bxml"
154 ";sep=true;refs=true", 154 ";sep=true;refs=true",
155 xmode_xrd ); 155 xmode_xrd );
156 if(status_code==241) continue; 156 if(status_code==241) continue;
157 if(status_code!=100) 157 if(status_code!=100)
158 throw failed_xri_resolution(OPKELE_CP_ 158 throw failed_xri_resolution(OPKELE_CP_
159 "XRI resolution failed with '"+status_string+"' message" 159 "XRI resolution failed with '"+status_string+"' message"
160 ", while looking for SEP with type '"+st->uri+"'", status_code); 160 ", while looking for SEP with type '"+st->uri+"'", status_code);
161 if(idis.xrd.canonical_ids.empty()) 161 if(idis.xrd.canonical_ids.empty())
162 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID found for XRI identity found"); 162 throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
163 string cid = idis.xrd.canonical_ids.begin()->second; 163 string cid = idis.xrd.canonical_ids.begin()->second;
164 if(cids.find(cid)==cids.end()) { 164 if(cids.find(cid)==cids.end()) {
165 cids.insert(cid); 165 cids.insert(cid);
166 idis.clear(); 166 idis.clear();
167 discover_at( idis, 167 discover_at( idis,
168 xri_proxy + util::url_encode(id)+ 168 xri_proxy + util::url_encode(id)+
169 "?_xrd_t="+util::url_encode(st->uri)+ 169 "?_xrd_t="+util::url_encode(st->uri)+
170 "&_xrd_r=application/xrd%2Bxml" 170 "&_xrd_r=application/xrd%2Bxml"
171 ";sep=true;refs=true", 171 ";sep=true;refs=true",
172 xmode_xrd ); 172 xmode_xrd );
173 if(status_code==241) continue; 173 if(status_code==241) continue;
174 if(status_code!=100) 174 if(status_code!=100)
175 throw failed_xri_resolution(OPKELE_CP_ 175 throw failed_xri_resolution(OPKELE_CP_
176 "XRI resolution failed with '"+status_string+"' message" 176 "XRI resolution failed with '"+status_string+"' message"
177 ", while looking for SEP with type '"+st->uri+"'" 177 ", while looking for SEP with type '"+st->uri+"'"
178 " on canonical id", status_code); 178 " on canonical id", status_code);
179 } 179 }
180 idis.canonicalized_id = cid; 180 idis.canonicalized_id = cid;
181 idis.normalized_id = rv; idis.xri_identity = true; 181 idis.normalized_id = rv; idis.xri_identity = true;
182 queue_endpoints(oi,idis,st); 182 queue_endpoints(oi,idis,st);
183 } 183 }
184 }else{ 184 }else{
185 idis.xri_identity = false; 185 idis.xri_identity = false;
186 if(id.find("://")==string::npos) 186 if(id.find("://")==string::npos)
187 id.insert(0,"http://"); 187 id.insert(0,"http://");
188 string::size_type fp = id.find('#'); 188 string::size_type fp = id.find('#');
189 if(fp!=string::npos) { 189 if(fp!=string::npos) {
190 string::size_type qp = id.find('?'); 190 string::size_type qp = id.find('?');
191 if(qp==string::npos || qp<fp) 191 if(qp==string::npos || qp<fp)
192 id.erase(fp); 192 id.erase(fp);
193 else if(qp>fp) 193 else if(qp>fp)
194 id.erase(fp,qp-fp); 194 id.erase(fp,qp-fp);
195 } 195 }
196 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id); 196 rv = idis.normalized_id = util::rfc_3986_normalize_uri(id);
197 discover_at(idis,id,xmode_html|xmode_xrd); 197 discover_at(idis,id,xmode_html|xmode_xrd);
198 const char * eu = 0; 198 const char * eu = 0;
199 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu); 199 CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
200 if(r) 200 if(r)
201 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r); 201 throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
202 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) ); 202 string cid = util::strip_uri_fragment_part( idis.canonicalized_id = util::rfc_3986_normalize_uri(eu) );
203 if(xrds_location.empty()) { 203 if(xrds_location.empty()) {
204 html2xrd(oi,idis); 204 html2xrd(oi,idis);
205 }else{ 205 }else{
206 idis.clear(); 206 idis.clear();
207 idis.canonicalized_id = cid; 207 idis.canonicalized_id = cid;
208 discover_at(idis,xrds_location,xmode_xrd); 208 discover_at(idis,xrds_location,xmode_xrd);
209 if(idis.xrd.empty()) 209 if(idis.xrd.empty())
210 html2xrd(oi,idis); 210 html2xrd(oi,idis);
211 else{ 211 else{
212 for(const service_type_t *st=op_service_types; 212 for(const service_type_t *st=op_service_types;
213 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st) 213 st<&op_service_types[sizeof(op_service_types)/sizeof(*op_service_types)];++st)
214 queue_endpoints(oi,idis,st); 214 queue_endpoints(oi,idis,st);
215 } 215 }
216 } 216 }
217 } 217 }
218 return rv; 218 return rv;
219 } 219 }
220 220
221 void discover_at(idiscovery_t& idis,const string& url,int xm) { 221 void discover_at(idiscovery_t& idis,const string& url,int xm) {
222 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5); 222 CURLcode r = easy_setopt(CURLOPT_MAXREDIRS, (xm&xmode_noredirs)?0:5);
223 if(r) 223 if(r)
224 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option"); 224 throw exception_curl(OPKELE_CP_ "failed to set curly maxredirs option");
225 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) ) 225 if( (r=easy_setopt(CURLOPT_URL,url.c_str())) )
226 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r); 226 throw exception_curl(OPKELE_CP_ "failed to set curly urlie",r);
227 227
228 http_content_type.clear(); 228 http_content_type.clear();
229 xmode = xm; 229 xmode = xm;
230 prepare_to_parse(); 230 prepare_to_parse();
231 if(xmode&xmode_html) { 231 if(xmode&xmode_html) {
232 xrds_location.clear(); 232 xrds_location.clear();
233 save_html.clear(); 233 save_html.clear();
234 save_html.reserve(max_html); 234 save_html.reserve(max_html);
235 } 235 }
236 xrd = &idis.xrd; 236 xrd = &idis.xrd;
237 237
238 r = easy_perform(); 238 r = easy_perform();
239 if(r && r!=CURLE_WRITE_ERROR) 239 if(r && r!=CURLE_WRITE_ERROR)
240 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 240 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
241 241
242 if(!parser_choked) { 242 if(!parser_choked) {
243 parse(0,0,true); 243 parse(0,0,true);
244 }else{ 244 }else if(xmode&xmode_html){
245 /* TODO: do not bother if we've seen xml */ 245 /* TODO: do not bother if we've seen xml */
246 try { 246 try {
247 util::tidy_doc_t td = util::tidy_doc_t::create(); 247 util::tidy_doc_t td = util::tidy_doc_t::create();
248 if(!td) 248 if(!td)
249 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document"); 249 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
250#ifndef NDEBUG 250#ifndef NDEBUG
251 td.opt_set(TidyQuiet,false); 251 td.opt_set(TidyQuiet,false);
252 td.opt_set(TidyShowWarnings,false); 252 td.opt_set(TidyShowWarnings,false);
253#endif /* NDEBUG */ 253#endif /* NDEBUG */
254 td.opt_set(TidyForceOutput,true); 254 td.opt_set(TidyForceOutput,true);
255 td.opt_set(TidyXhtmlOut,true); 255 td.opt_set(TidyXhtmlOut,true);
256 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit); 256 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
257 td.opt_set(TidyMark,false); 257 td.opt_set(TidyMark,false);
258 if(td.parse_string(save_html)<=0) 258 if(td.parse_string(save_html)<=0)
259 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document"); 259 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
260 if(td.clean_and_repair()<=0) 260 if(td.clean_and_repair()<=0)
261 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair"); 261 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
262 util::tidy_buf_t tide; 262 util::tidy_buf_t tide;
263 if(td.save_buffer(tide)<=0) 263 if(td.save_buffer(tide)<=0)
264 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer"); 264 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
265 prepare_to_parse(); 265 prepare_to_parse();
266 parse(tide.c_str(),tide.size(),true); 266 parse(tide.c_str(),tide.size(),true);
267 }catch(exception_tidy& et) { } 267 }catch(exception_tidy& et) { }
268 } 268 }
269 save_html.clear(); 269 save_html.clear();
270 } 270 }
271 271
272 void prepare_to_parse() { 272 void prepare_to_parse() {
273 (*(expat_t*)this) = parser_create_ns(); 273 (*(expat_t*)this) = parser_create_ns();
274 set_user_data(); set_element_handler(); 274 set_user_data(); set_element_handler();
275 set_character_data_handler(); 275 set_character_data_handler();
276 276
277 if(xmode&xmode_html) { 277 if(xmode&xmode_html) {
278 html_openid1.clear(); html_openid2.clear(); 278 html_openid1.clear(); html_openid2.clear();
279 parser_choked = false; 279 parser_choked = false;
280 } 280 }
281 281
282 cdata = 0; xrd_service = 0; skipping = 0; 282 cdata = 0; xrd_service = 0; skipping = 0;
283 pt_stack.clear(); 283 pt_stack.clear();
284 status_code = 100; status_string.clear(); 284 status_code = 100; status_string.clear();
285 } 285 }
286 286
287 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) { 287 void html2xrd(endpoint_discovery_iterator& oi,idiscovery_t& id) {
288 XRD_t& x = id.xrd; 288 XRD_t& x = id.xrd;
289 if(!html_openid2.uris.empty()) { 289 if(!html_openid2.uris.empty()) {
290 html_openid2.types.insert(STURI_OPENID20); 290 html_openid2.types.insert(STURI_OPENID20);
291 x.services.add(-1,html_openid2); 291 x.services.add(-1,html_openid2);
292 queue_endpoints(oi,id,&op_service_types[st_index_2]); 292 queue_endpoints(oi,id,&op_service_types[st_index_2]);
293 } 293 }
294 if(!html_openid1.uris.empty()) { 294 if(!html_openid1.uris.empty()) {
295 html_openid1.types.insert(STURI_OPENID11); 295 html_openid1.types.insert(STURI_OPENID11);
296 x.services.add(-1,html_openid1); 296 x.services.add(-1,html_openid1);
297 queue_endpoints(oi,id,&op_service_types[st_index_1]); 297 queue_endpoints(oi,id,&op_service_types[st_index_1]);
298 } 298 }
299 } 299 }
300 300
301 size_t write(void *p,size_t s,size_t nm) { 301 size_t write(void *p,size_t s,size_t nm) {
302 /* TODO: limit total size */ 302 /* TODO: limit total size */
303 size_t bytes = s*nm; 303 size_t bytes = s*nm;
304 const char *inbuf = (const char*)p; 304 const char *inbuf = (const char*)p;
305 if(xmode&xmode_html) { 305 if(xmode&xmode_html) {
306 size_t mbts = save_html.capacity()-save_html.size(); 306 size_t mbts = save_html.capacity()-save_html.size();
307 size_t bts = 0; 307 size_t bts = 0;
308 if(mbts>0) { 308 if(mbts>0) {