summaryrefslogtreecommitdiffabout
Unidiff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--lib/Makefile.am4
-rw-r--r--lib/discovery.cc75
2 files changed, 66 insertions, 13 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9c73e96..989de28 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -7,12 +7,12 @@ INCLUDES = \
7 ${KONFORKA_CFLAGS} \ 7 ${KONFORKA_CFLAGS} \
8 ${OPENSSL_CFLAGS} \ 8 ${OPENSSL_CFLAGS} \
9 ${LIBCURL_CPPFLAGS} \ 9 ${LIBCURL_CPPFLAGS} \
10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} 10 ${PCRE_CFLAGS} ${EXPAT_CFLAGS} ${TIDY_CFLAGS}
11libopkele_la_LIBADD = \ 11libopkele_la_LIBADD = \
12 ${LIBCURL} \ 12 ${LIBCURL} \
13 ${PCRE_LIBS} ${EXPAT_LIBS} \ 13 ${PCRE_LIBS} ${EXPAT_LIBS} \
14 ${OPENSSL_LIBS} \ 14 ${OPENSSL_LIBS} \
15 ${KONFORKA_LIBS} 15 ${KONFORKA_LIBS} ${TIDY_LIBS}
16 16
17libopkele_la_SOURCES = \ 17libopkele_la_SOURCES = \
18 params.cc \ 18 params.cc \
diff --git a/lib/discovery.cc b/lib/discovery.cc
index 8729cfb..a308b56 100644
--- a/lib/discovery.cc
+++ b/lib/discovery.cc
@@ -5,6 +5,7 @@
5#include <opkele/discovery.h> 5#include <opkele/discovery.h>
6#include <opkele/exception.h> 6#include <opkele/exception.h>
7#include <opkele/util.h> 7#include <opkele/util.h>
8#include <opkele/tidy.h>
8#include <opkele/debug.h> 9#include <opkele/debug.h>
9 10
10#include "config.h" 11#include "config.h"
@@ -19,6 +20,7 @@ namespace opkele {
19 20
20 static const char *whitespace = " \t\r\n"; 21 static const char *whitespace = " \t\r\n";
21 static const char *i_leaders = "=@+$!("; 22 static const char *i_leaders = "=@+$!(";
23 static const size_t max_html = 16384;
22 24
23 static inline bool is_qelement(const XML_Char *n,const char *qen) { 25 static inline bool is_qelement(const XML_Char *n,const char *qen) {
24 return !strcasecmp(n,qen); 26 return !strcasecmp(n,qen);
@@ -61,6 +63,8 @@ namespace opkele {
61 typedef list<string> pt_stack_t; 63 typedef list<string> pt_stack_t;
62 pt_stack_t pt_stack; 64 pt_stack_t pt_stack;
63 int skipping; 65 int skipping;
66 bool parser_choked;
67 string save_html;
64 68
65 XRD_t *xrd; 69 XRD_t *xrd;
66 service_t *xrd_service; 70 service_t *xrd_service;
@@ -140,25 +144,62 @@ namespace opkele {
140 if(r) 144 if(r)
141 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r); 145 throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
142 146
143 (*(expat_t*)this) = parser_create_ns();
144 set_user_data(); set_element_handler();
145 set_character_data_handler();
146
147 http_content_type.clear(); 147 http_content_type.clear();
148 xmode = xm; 148 xmode = xm;
149 prepare_to_parse();
149 if(xmode&xmode_html) { 150 if(xmode&xmode_html) {
150 xrds_location.clear(); 151 xrds_location.clear();
151 html_openid1.clear(); html_openid2.clear(); 152 save_html.clear();
153 save_html.reserve(max_html);
152 } 154 }
153 xrd = &result.xrd; 155 xrd = &result.xrd;
154 cdata = 0; xrd_service = 0; skipping = 0;
155 status_code = 100; status_string.clear();
156 156
157 r = easy_perform(); 157 r = easy_perform();
158 if(r && r!=CURLE_WRITE_ERROR) 158 if(r && r!=CURLE_WRITE_ERROR)
159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r); 159 throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
160 160
161 parse(0,0,true); 161 if(!parser_choked) {
162 parse(0,0,true);
163 }else{
164 /* TODO: do not bother if we've seen xml */
165 try {
166 util::tidy_doc_t td = util::tidy_doc_t::create();
167 if(!td)
168 throw exception_tidy(OPKELE_CP_ "failed to create htmltidy document");
169#ifndef NDEBUG
170 td.opt_set(TidyQuiet,false);
171 td.opt_set(TidyShowWarnings,false);
172 td.opt_set(TidyForceOutput,true);
173 td.opt_set(TidyXhtmlOut,true);
174 td.opt_set(TidyDoctypeMode,TidyDoctypeOmit);
175 td.opt_set(TidyMark,false);
176#endif /* NDEBUG */
177 if(td.parse_string(save_html)<=0)
178 throw exception_tidy(OPKELE_CP_ "tidy failed to parse document");
179 if(td.clean_and_repair()<=0)
180 throw exception_tidy(OPKELE_CP_ "tidy failed to clean and repair");
181 util::tidy_buf_t tide;
182 if(td.save_buffer(tide)<=0)
183 throw exception_tidy(OPKELE_CP_ "tidy failed to save buffer");
184 prepare_to_parse();
185 parse(tide.c_str(),tide.size(),true);
186 }catch(exception_tidy& et) { }
187 }
188 save_html.clear();
189 }
190
191 void prepare_to_parse() {
192 (*(expat_t*)this) = parser_create_ns();
193 set_user_data(); set_element_handler();
194 set_character_data_handler();
195
196 if(xmode&xmode_html) {
197 html_openid1.clear(); html_openid2.clear();
198 parser_choked = false;
199 }
200
201 cdata = 0; xrd_service = 0; skipping = 0;
202 status_code = 100; status_string.clear();
162 } 203 }
163 204
164 void html2xrd(XRD_t& x) { 205 void html2xrd(XRD_t& x) {
@@ -173,13 +214,25 @@ namespace opkele {
173 } 214 }
174 215
175 size_t write(void *p,size_t s,size_t nm) { 216 size_t write(void *p,size_t s,size_t nm) {
176 if(skipping<0) return 0;
177 /* TODO: limit total size */ 217 /* TODO: limit total size */
178 size_t bytes = s*nm; 218 size_t bytes = s*nm;
179 bool rp = parse((const char *)p,bytes,false); 219 const char *inbuf = (const char*)p;
220 if(xmode&xmode_html) {
221 size_t mbts = save_html.capacity()-save_html.size();
222 size_t bts = 0;
223 if(mbts>0) {
224 bts = (bytes>mbts)?mbts:bytes;
225 save_html.append(inbuf,bts);
226 }
227 if(skipping<0) return bts;
228 }
229 if(skipping<0) return 0;
230 bool rp = parse(inbuf,bytes,false);
180 if(!rp) { 231 if(!rp) {
232 parser_choked = true;
181 skipping = -1; 233 skipping = -1;
182 bytes = 0; 234 if(!(xmode&xmode_html))
235 bytes = 0;
183 } 236 }
184 return bytes; 237 return bytes;
185 } 238 }