author | Michael Krelin <hacker@klever.net> | 2007-12-02 21:48:18 (UTC) |
---|---|---|
committer | Michael Krelin <hacker@klever.net> | 2007-12-02 21:51:08 (UTC) |
commit | 262f1579f0a9138a01f06afea06d00155cefd4b5 (patch) (unidiff) | |
tree | fb4db0ee7b679a1957c63abbe6f6af1d2fa82531 /lib/util.cc | |
parent | 73d98f3652b498b9a74b183bef395714c7d73fda (diff) | |
download | libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.zip libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.gz libopkele-262f1579f0a9138a01f06afea06d00155cefd4b5.tar.bz2 |
first cut on XRI resolver
This commit adds openid service resolver that does discovery using XRI (proxy
only), Yadis protocol and html-based discovery. It uses expat as xml parsing
engine, which makes it a bit more strict about html it receives, but I think
failing to discover links in *severely* broken html is better than
misdetecting links, hidden in comments or such.
This is highly experimental code and needs more thoughts and testing.
Thanks everyone pushing me towards this development. Namely Joseph, John,
Gen.
Signed-off-by: Michael Krelin <hacker@klever.net>
-rw-r--r-- | lib/util.cc | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/lib/util.cc b/lib/util.cc index ac70938..69d37b5 100644 --- a/lib/util.cc +++ b/lib/util.cc | |||
@@ -152,72 +152,80 @@ namespace opkele { | |||
152 | throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); | 152 | throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); |
153 | return rv; | 153 | return rv; |
154 | } | 154 | } |
155 | 155 | ||
156 | long string_to_long(const string& s) { | 156 | long string_to_long(const string& s) { |
157 | char *endptr = 0; | 157 | char *endptr = 0; |
158 | long rv = strtol(s.c_str(),&endptr,10); | 158 | long rv = strtol(s.c_str(),&endptr,10); |
159 | if((!endptr) || endptr==s.c_str()) | 159 | if((!endptr) || endptr==s.c_str()) |
160 | throw failed_conversion(OPKELE_CP_ "failed to strtol()"); | 160 | throw failed_conversion(OPKELE_CP_ "failed to strtol()"); |
161 | return rv; | 161 | return rv; |
162 | } | 162 | } |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Normalize URL according to the rules, described in rfc 3986, section 6 | 165 | * Normalize URL according to the rules, described in rfc 3986, section 6 |
166 | * | 166 | * |
167 | * - uppercase hext triplets (e.g. %ab -> %AB) | 167 | * - uppercase hext triplets (e.g. %ab -> %AB) |
168 | * - lowercase scheme and host | 168 | * - lowercase scheme and host |
169 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, | 169 | * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, |
170 | * that is - [:alpha:][:digit:]._~- | 170 | * that is - [:alpha:][:digit:]._~- |
171 | * - remove dot segments | 171 | * - remove dot segments |
172 | * - remove empty and default ports | 172 | * - remove empty and default ports |
173 | * - if there's no path component, add '/' | 173 | * - if there's no path component, add '/' |
174 | */ | 174 | */ |
175 | string rfc_3986_normalize_uri(const string& uri) { | 175 | string rfc_3986_normalize_uri(const string& uri) { |
176 | static const char *whitespace = " \t\r\n"; | ||
176 | string rv; | 177 | string rv; |
177 | string::size_type colon = uri.find(':'); | 178 | string::size_type ns = uri.find_first_not_of(whitespace); |
179 | if(ns==string::npos) | ||
180 | throw bad_input(OPKELE_CP_ "Can't normalize empty URI"); | ||
181 | string::size_type colon = uri.find(':',ns); | ||
178 | if(colon==string::npos) | 182 | if(colon==string::npos) |
179 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); | 183 | throw bad_input(OPKELE_CP_ "No scheme specified in URI"); |
180 | transform( | 184 | transform( |
181 | uri.begin(), uri.begin()+colon+1, | 185 | uri.begin()+ns, uri.begin()+colon+1, |
182 | back_inserter(rv), ::tolower ); | 186 | back_inserter(rv), ::tolower ); |
183 | bool s; | 187 | bool s; |
184 | if(rv=="http:") | 188 | if(rv=="http:") |
185 | s = false; | 189 | s = false; |
186 | else if(rv=="https:") | 190 | else if(rv=="https:") |
187 | s = true; | 191 | s = true; |
192 | #ifndef NDEBUG | ||
193 | else if(rv=="file:") | ||
194 | s = false; | ||
195 | #endif /* XXX: or try to make tests work some other way */ | ||
188 | else | 196 | else |
189 | throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); | 197 | throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here"); |
190 | string::size_type ul = uri.length(); | 198 | string::size_type ul = uri.find_last_not_of(whitespace)+1; |
191 | if(ul <= (colon+3)) | 199 | if(ul <= (colon+3)) |
192 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); | 200 | throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); |
193 | if(uri[colon+1]!='/' || uri[colon+2]!='/') | 201 | if(uri[colon+1]!='/' || uri[colon+2]!='/') |
194 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); | 202 | throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); |
195 | rv += "//"; | 203 | rv += "//"; |
196 | string::size_type interesting = uri.find_first_of(":/#?",colon+3); | 204 | string::size_type interesting = uri.find_first_of(":/#?",colon+3); |
197 | if(interesting==string::npos) { | 205 | if(interesting==string::npos) { |
198 | transform( | 206 | transform( |
199 | uri.begin()+colon+3,uri.end(), | 207 | uri.begin()+colon+3,uri.begin()+ul, |
200 | back_inserter(rv), ::tolower ); | 208 | back_inserter(rv), ::tolower ); |
201 | rv += '/'; return rv; | 209 | rv += '/'; return rv; |
202 | } | 210 | } |
203 | transform( | 211 | transform( |
204 | uri.begin()+colon+3,uri.begin()+interesting, | 212 | uri.begin()+colon+3,uri.begin()+interesting, |
205 | back_inserter(rv), ::tolower ); | 213 | back_inserter(rv), ::tolower ); |
206 | bool qf = false; | 214 | bool qf = false; |
207 | char ic = uri[interesting]; | 215 | char ic = uri[interesting]; |
208 | if(ic==':') { | 216 | if(ic==':') { |
209 | string::size_type ni = uri.find_first_of("/#?%",interesting+1); | 217 | string::size_type ni = uri.find_first_of("/#?%",interesting+1); |
210 | const char *nptr = uri.data()+interesting+1; | 218 | const char *nptr = uri.data()+interesting+1; |
211 | char *eptr = 0; | 219 | char *eptr = 0; |
212 | long port = strtol(nptr,&eptr,10); | 220 | long port = strtol(nptr,&eptr,10); |
213 | if( (port>0) && (port<65535) && port!=(s?443:80) ) { | 221 | if( (port>0) && (port<65535) && port!=(s?443:80) ) { |
214 | char tmp[6]; | 222 | char tmp[6]; |
215 | snprintf(tmp,sizeof(tmp),"%ld",port); | 223 | snprintf(tmp,sizeof(tmp),"%ld",port); |
216 | rv += ':'; rv += tmp; | 224 | rv += ':'; rv += tmp; |
217 | } | 225 | } |
218 | if(ni==string::npos) { | 226 | if(ni==string::npos) { |
219 | rv += '/'; return rv; | 227 | rv += '/'; return rv; |
220 | } | 228 | } |
221 | interesting = ni; | 229 | interesting = ni; |
222 | }else if(ic!='/') { | 230 | }else if(ic!='/') { |
223 | rv += '/'; rv += ic; | 231 | rv += '/'; rv += ic; |