-rw-r--r-- | lib/consumer.cc | 124 |
1 files changed, 65 insertions, 59 deletions
diff --git a/lib/consumer.cc b/lib/consumer.cc index ff5da91..df95b64 100644 --- a/lib/consumer.cc +++ b/lib/consumer.cc | |||
@@ -12,17 +12,55 @@ | |||
12 | 12 | ||
13 | #include "config.h" | 13 | #include "config.h" |
14 | 14 | ||
15 | #if defined(USE_LIBPCRECPP) | 15 | #include <pcre.h> |
16 | # include <pcrecpp.h> | ||
17 | #elif defined(USE_PCREPP) | ||
18 | # include <pcre++.h> | ||
19 | #else | ||
20 | /* internal implementation won't be built */ | ||
21 | #endif | ||
22 | 16 | ||
23 | namespace opkele { | 17 | namespace opkele { |
24 | using namespace std; | 18 | using namespace std; |
25 | 19 | ||
20 | class pcre_matches_t { | ||
21 | public: | ||
22 | int *_ov; | ||
23 | int _s; | ||
24 | |||
25 | pcre_matches_t() : _ov(0), _s(0) { } | ||
26 | pcre_matches_t(int s) : _ov(0), _s(s) { | ||
27 | if(_s&1) ++_s; | ||
28 | _s += _s>>1; | ||
29 | _ov = new int[_s]; | ||
30 | } | ||
31 | ~pcre_matches_t() throw() { if(_ov) delete[] _ov; } | ||
32 | |||
33 | int begin(int i) const { return _ov[i<<1]; } | ||
34 | int end(int i) const { return _ov[(i<<1)+1]; } | ||
35 | int length(int i) const { int t=i<<1; return _ov[t+1]-_ov[t]; } | ||
36 | }; | ||
37 | |||
38 | class pcre_t { | ||
39 | public: | ||
40 | pcre *_p; | ||
41 | |||
42 | pcre_t() : _p(0) { } | ||
43 | pcre_t(pcre *p) : _p(p) { } | ||
44 | pcre_t(const char *re,int opts) : _p(0) { | ||
45 | static const char *errptr; static int erroffset; | ||
46 | _p = pcre_compile(re,opts,&errptr,&erroffset,NULL); | ||
47 | if(!_p) | ||
48 | throw internal_error(OPKELE_CP_ string("Failed to compile regexp: ")+errptr); | ||
49 | } | ||
50 | ~pcre_t() throw() { if(_p) (*pcre_free)(_p); } | ||
51 | |||
52 | pcre_t& operator=(pcre *p) { if(_p) (*pcre_free)(_p); _p=p; return *this; } | ||
53 | |||
54 | operator const pcre*(void) const { return _p; } | ||
55 | operator pcre*(void) { return _p; } | ||
56 | |||
57 | int exec(const string& s,pcre_matches_t& m) { | ||
58 | if(!_p) | ||
59 | throw internal_error(OPKELE_CP_ "Trying to execute absent regexp"); | ||
60 | return pcre_exec(_p,NULL,s.c_str(),s.length(),0,0,m._ov,m._s); | ||
61 | } | ||
62 | }; | ||
63 | |||
26 | class curl_t { | 64 | class curl_t { |
27 | public: | 65 | public: |
28 | CURL *_c; | 66 | CURL *_c; |
@@ -268,7 +306,6 @@ namespace opkele { | |||
268 | } | 306 | } |
269 | 307 | ||
270 | void consumer_t::retrieve_links(const string& url,string& server,string& delegate) { | 308 | void consumer_t::retrieve_links(const string& url,string& server,string& delegate) { |
271 | #if defined(USE_LIBPCRECPP) || defined(USE_PCREPP) | ||
272 | server.erase(); | 309 | server.erase(); |
273 | delegate.erase(); | 310 | delegate.erase(); |
274 | curl_t curl = curl_easy_init(); | 311 | curl_t curl = curl_easy_init(); |
@@ -286,67 +323,36 @@ namespace opkele { | |||
286 | r = curl_easy_perform(curl); | 323 | r = curl_easy_perform(curl); |
287 | if(r && r!=CURLE_WRITE_ERROR) | 324 | if(r && r!=CURLE_WRITE_ERROR) |
288 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); | 325 | throw exception_curl(OPKELE_CP_ "failed to curl_easy_perform()",r); |
289 | // strip out everything past body | 326 | static const char *re_bre = "<\\s*body\\b", *re_hdre = "<\\s*head[^>]*>", |
290 | static const char *re_hdre = "<\\s*head[^>]*>", | ||
291 | *re_lre = "<\\s*link\\b([^>]+)>", | 327 | *re_lre = "<\\s*link\\b([^>]+)>", |
292 | *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]", | 328 | *re_rre = "\\brel\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]", |
293 | *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]"; | 329 | *re_hre = "\\bhref\\s*=\\s*['\"]\\s*([^'\"\\s]+)\\s*['\"]"; |
294 | #if defined(USE_LIBPCRECPP) | 330 | pcre_matches_t m1(3), m2(3); |
295 | static pcrecpp::RE_Options ro(PCRE_CASELESS|PCRE_DOTALL); | 331 | pcre_t bre(re_bre,PCRE_CASELESS); |
296 | static pcrecpp::RE | 332 | if(bre.exec(html,m1)>0) |
297 | bre("<body\\b.*",ro), hdre(re_hdre,ro), | 333 | html.erase(m1.begin(0)); |
298 | lre(re_lre,ro), rre(re_rre), hre(re_hre,ro); | 334 | pcre_t hdre(re_hdre,PCRE_CASELESS); |
299 | bre.Replace("",&html); | 335 | if(hdre.exec(html,m1)<=0) |
300 | pcrecpp::StringPiece hpiece(html); | 336 | throw bad_input(OPKELE_CP_ "failed to find <head>"); |
301 | if(!hdre.FindAndConsume(&hpiece)) | 337 | html.erase(0,m1.end(0)+1); |
302 | throw bad_input(OPKELE_CP_ "failed to find head"); | 338 | pcre_t lre(re_lre,PCRE_CASELESS), rre(re_rre,PCRE_CASELESS), hre(re_hre,PCRE_CASELESS); |
303 | string attrs; | 339 | while(lre.exec(html,m1)>=2) { |
304 | while(lre.FindAndConsume(&hpiece,&attrs)) { | 340 | string attrs(html,m1.begin(1),m1.length(1)); |
305 | pcrecpp::StringPiece rel, href; | 341 | html.erase(0,m1.end(0)+1); |
306 | if(!(rre.PartialMatch(attrs,&rel) && hre.PartialMatch(attrs,&href))) | 342 | if(!( rre.exec(attrs,m1)>=2 && hre.exec(attrs,m2)>=2 )) |
307 | continue; | 343 | continue; |
344 | string rel(attrs,m1.begin(1),m1.length(1)); | ||
345 | string href(attrs,m2.begin(1),m2.length(1)); | ||
308 | if(rel=="openid.server") { | 346 | if(rel=="openid.server") { |
309 | href.CopyToString(&server); | 347 | server = href; |
310 | if(!delegate.empty()) | 348 | if(!delegate.empty()) break; |
311 | break; | ||
312 | }else if(rel=="openid.delegate") { | 349 | }else if(rel=="openid.delegate") { |
313 | href.CopyToString(&delegate); | 350 | delegate = href; |
314 | if(!server.empty()) | 351 | if(!server.empty()) break; |
315 | break; | ||
316 | } | ||
317 | } | 352 | } |
318 | #elif defined(USE_PCREPP) | ||
319 | pcrepp::Pcre bre("<body\\b",PCRE_CASELESS); | ||
320 | if(bre.search(html)) | ||
321 | html.erase(bre.get_match_start()); | ||
322 | pcrepp::Pcre hdre(re_hdre,PCRE_CASELESS); | ||
323 | if(!hdre.search(html)) | ||
324 | throw bad_input(OPKELE_CP_ "failed to find head"); | ||
325 | html.erase(0,hdre.get_match_end()+1); | ||
326 | pcrepp::Pcre lre(re_lre,PCRE_CASELESS), rre(re_rre,PCRE_CASELESS), hre(re_hre,PCRE_CASELESS); | ||
327 | while(lre.search(html)) { | ||
328 | string attrs = lre[0]; | ||
329 | html.erase(0,lre.get_match_end()+1); | ||
330 | if(!(rre.search(attrs)&&hre.search(attrs))) | ||
331 | continue; | ||
332 | if(rre[0]=="openid.server") { | ||
333 | server = hre[0]; | ||
334 | if(!delegate.empty()) | ||
335 | break; | ||
336 | }else if(rre[0]=="openid.delegate") { | ||
337 | delegate = hre[0]; | ||
338 | if(!server.empty()) | ||
339 | break; | ||
340 | } | 353 | } |
341 | } | ||
342 | #else | ||
343 | #error "I must have gone crazy" | ||
344 | #endif | ||
345 | if(server.empty()) | 354 | if(server.empty()) |
346 | throw failed_assertion(OPKELE_CP_ "The location has no openid.server declaration"); | 355 | throw failed_assertion(OPKELE_CP_ "The location has no openid.server declaration"); |
347 | #else /* none of the RE bindings enabled */ | ||
348 | throw not_implemented(OPKELE_CP_ "No internal implementation of retrieve_links were provided at compile-time"); | ||
349 | #endif | ||
350 | } | 356 | } |
351 | 357 | ||
352 | assoc_t consumer_t::find_assoc(const string& server) { | 358 | assoc_t consumer_t::find_assoc(const string& server) { |