summaryrefslogtreecommitdiffabout
path: root/lib/util.cc
authorMichael Krelin <hacker@klever.net>2007-12-02 21:48:18 (UTC)
committer Michael Krelin <hacker@klever.net>2007-12-02 21:51:08 (UTC)
commit262f1579f0a9138a01f06afea06d00155cefd4b5 (patch) (unidiff)
treefb4db0ee7b679a1957c63abbe6f6af1d2fa82531 /lib/util.cc
parent73d98f3652b498b9a74b183bef395714c7d73fda (diff)
downloadlibopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.zip
libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.gz
libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.bz2
first cut on XRI resolver
This commit adds openid service resolver that does discovery using XRI (proxy only), Yadis protocol and html-based discovery. It uses expat as xml parsing engine, which makes it a bit more strict about html it receives, but I think failing to discover links in *severely* broken html is better than misdetecting links, hidden in comments or such. This is highly experimental code and needs more thoughts and testing. Thanks everyone pushing me towards this development. Namely Joseph, John, Gen. Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib/util.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/util.cc16
1 files changed, 12 insertions, 4 deletions
diff --git a/lib/util.cc b/lib/util.cc
index ac70938..69d37b5 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -152,72 +152,80 @@ namespace opkele {
152 throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); 152 throw failed_conversion(OPKELE_CP_ "failed to snprintf()");
153 return rv; 153 return rv;
154 } 154 }
155 155
156 long string_to_long(const string& s) { 156 long string_to_long(const string& s) {
157 char *endptr = 0; 157 char *endptr = 0;
158 long rv = strtol(s.c_str(),&endptr,10); 158 long rv = strtol(s.c_str(),&endptr,10);
159 if((!endptr) || endptr==s.c_str()) 159 if((!endptr) || endptr==s.c_str())
160 throw failed_conversion(OPKELE_CP_ "failed to strtol()"); 160 throw failed_conversion(OPKELE_CP_ "failed to strtol()");
161 return rv; 161 return rv;
162 } 162 }
163 163
164 /* 164 /*
165 * Normalize URL according to the rules, described in rfc 3986, section 6 165 * Normalize URL according to the rules, described in rfc 3986, section 6
166 * 166 *
167 * - uppercase hext triplets (e.g. %ab -> %AB) 167 * - uppercase hext triplets (e.g. %ab -> %AB)
168 * - lowercase scheme and host 168 * - lowercase scheme and host
169 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, 169 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3,
170 * that is - [:alpha:][:digit:]._~- 170 * that is - [:alpha:][:digit:]._~-
171 * - remove dot segments 171 * - remove dot segments
172 * - remove empty and default ports 172 * - remove empty and default ports
173 * - if there's no path component, add '/' 173 * - if there's no path component, add '/'
174 */ 174 */
175 string rfc_3986_normalize_uri(const string& uri) { 175 string rfc_3986_normalize_uri(const string& uri) {
176 static const char *whitespace = " \t\r\n";
176 string rv; 177 string rv;
177 string::size_type colon = uri.find(':'); 178 string::size_type ns = uri.find_first_not_of(whitespace);
179 if(ns==string::npos)
180 throw bad_input(OPKELE_CP_ "Can't normalize empty URI");
181 string::size_type colon = uri.find(':',ns);
178 if(colon==string::npos) 182 if(colon==string::npos)
179 throw bad_input(OPKELE_CP_ "No scheme specified in URI"); 183 throw bad_input(OPKELE_CP_ "No scheme specified in URI");
180 transform( 184 transform(
181 uri.begin(), uri.begin()+colon+1, 185 uri.begin()+ns, uri.begin()+colon+1,
182 back_inserter(rv), ::tolower ); 186 back_inserter(rv), ::tolower );
183 bool s; 187 bool s;
184 if(rv=="http:") 188 if(rv=="http:")
185 s = false; 189 s = false;
186 else if(rv=="https:") 190 else if(rv=="https:")
187 s = true; 191 s = true;
192#ifndef NDEBUG
193 else if(rv=="file:")
194 s = false;
195#endif /* XXX: or try to make tests work some other way */
188 else 196 else
189 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); 197 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here");
190 string::size_type ul = uri.length(); 198 string::size_type ul = uri.find_last_not_of(whitespace)+1;
191 if(ul <= (colon+3)) 199 if(ul <= (colon+3))
192 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); 200 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered");
193 if(uri[colon+1]!='/' || uri[colon+2]!='/') 201 if(uri[colon+1]!='/' || uri[colon+2]!='/')
194 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); 202 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component");
195 rv += "//"; 203 rv += "//";
196 string::size_type interesting = uri.find_first_of(":/#?",colon+3); 204 string::size_type interesting = uri.find_first_of(":/#?",colon+3);
197 if(interesting==string::npos) { 205 if(interesting==string::npos) {
198 transform( 206 transform(
199 uri.begin()+colon+3,uri.end(), 207 uri.begin()+colon+3,uri.begin()+ul,
200 back_inserter(rv), ::tolower ); 208 back_inserter(rv), ::tolower );
201 rv += '/'; return rv; 209 rv += '/'; return rv;
202 } 210 }
203 transform( 211 transform(
204 uri.begin()+colon+3,uri.begin()+interesting, 212 uri.begin()+colon+3,uri.begin()+interesting,
205 back_inserter(rv), ::tolower ); 213 back_inserter(rv), ::tolower );
206 bool qf = false; 214 bool qf = false;
207 char ic = uri[interesting]; 215 char ic = uri[interesting];
208 if(ic==':') { 216 if(ic==':') {
209 string::size_type ni = uri.find_first_of("/#?%",interesting+1); 217 string::size_type ni = uri.find_first_of("/#?%",interesting+1);
210 const char *nptr = uri.data()+interesting+1; 218 const char *nptr = uri.data()+interesting+1;
211 char *eptr = 0; 219 char *eptr = 0;
212 long port = strtol(nptr,&eptr,10); 220 long port = strtol(nptr,&eptr,10);
213 if( (port>0) && (port<65535) && port!=(s?443:80) ) { 221 if( (port>0) && (port<65535) && port!=(s?443:80) ) {
214 char tmp[6]; 222 char tmp[6];
215 snprintf(tmp,sizeof(tmp),"%ld",port); 223 snprintf(tmp,sizeof(tmp),"%ld",port);
216 rv += ':'; rv += tmp; 224 rv += ':'; rv += tmp;
217 } 225 }
218 if(ni==string::npos) { 226 if(ni==string::npos) {
219 rv += '/'; return rv; 227 rv += '/'; return rv;
220 } 228 }
221 interesting = ni; 229 interesting = ni;
222 }else if(ic!='/') { 230 }else if(ic!='/') {
223 rv += '/'; rv += ic; 231 rv += '/'; rv += ic;