summaryrefslogtreecommitdiffabout
path: root/lib/util.cc
Unidiff
Diffstat (limited to 'lib/util.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/util.cc55
1 files changed, 38 insertions, 17 deletions
diff --git a/lib/util.cc b/lib/util.cc
index 416e2cc..a9b9bed 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -114,22 +114,32 @@ namespace opkele {
114 } 114 }
115 115
116 time_t w3c_to_time(const string& w) { 116 time_t w3c_to_time(const string& w) {
117 int fraction;
117 struct tm tm_t; 118 struct tm tm_t;
118 memset(&tm_t,0,sizeof(tm_t)); 119 memset(&tm_t,0,sizeof(tm_t));
119 if( 120 if( (
121 sscanf(
122 w.c_str(),
123 "%04d-%02d-%02dT%02d:%02d:%02dZ",
124 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
125 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec
126 ) != 6
127 ) && (
120 sscanf( 128 sscanf(
121 w.c_str(), 129 w.c_str(),
122 "%04d-%02d-%02dT%02d:%02d:%02dZ", 130 "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
123 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, 131 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
124 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec 132 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec,
125 ) != 6 ) 133 &fraction
134 ) != 7
135 ) )
126 throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); 136 throw failed_conversion(OPKELE_CP_ "failed to sscanf()");
127 tm_t.tm_mon--; 137 tm_t.tm_mon--;
128 tm_t.tm_year-=1900; 138 tm_t.tm_year-=1900;
129 time_t rv = mktime(&tm_t); 139 time_t rv = mktime(&tm_t);
130 if(rv==(time_t)-1) 140 if(rv==(time_t)-1)
131 throw failed_conversion(OPKELE_CP_ "failed to mktime()"); 141 throw failed_conversion(OPKELE_CP_ "failed to mktime()");
132 return rv; 142 return rv-timezone;
133 } 143 }
134 144
135 /* 145 /*
@@ -164,7 +174,7 @@ namespace opkele {
164 /* 174 /*
165 * Normalize URL according to the rules, described in rfc 3986, section 6 175 * Normalize URL according to the rules, described in rfc 3986, section 6
166 * 176 *
167 * - uppercase hext triplets (e.g. %ab -> %AB) 177 * - uppercase hex triplets (e.g. %ab -> %AB)
168 * - lowercase scheme and host 178 * - lowercase scheme and host
169 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, 179 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3,
170 * that is - [:alpha:][:digit:]._~- 180 * that is - [:alpha:][:digit:]._~-
@@ -173,30 +183,40 @@ namespace opkele {
173 * - if there's no path component, add '/' 183 * - if there's no path component, add '/'
174 */ 184 */
175 string rfc_3986_normalize_uri(const string& uri) { 185 string rfc_3986_normalize_uri(const string& uri) {
186 static const char *whitespace = " \t\r\n";
176 string rv; 187 string rv;
177 string::size_type colon = uri.find(':'); 188 string::size_type ns = uri.find_first_not_of(whitespace);
189 if(ns==string::npos)
190 throw bad_input(OPKELE_CP_ "Can't normalize empty URI");
191 string::size_type colon = uri.find(':',ns);
178 if(colon==string::npos) 192 if(colon==string::npos)
179 throw bad_input(OPKELE_CP_ "No scheme specified in URI"); 193 throw bad_input(OPKELE_CP_ "No scheme specified in URI");
180 transform( 194 transform(
181 uri.begin(), uri.begin()+colon+1, 195 uri.begin()+ns, uri.begin()+colon+1,
182 back_inserter(rv), ::tolower ); 196 back_inserter(rv), ::tolower );
183 bool s; 197 bool s;
184 if(rv=="http:") 198 string::size_type ul = uri.find_last_not_of(whitespace)+1;
185 s = false;
186 else if(rv=="https:")
187 s = true;
188 else
189 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here");
190 string::size_type ul = uri.length();
191 if(ul <= (colon+3)) 199 if(ul <= (colon+3))
192 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered"); 200 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered");
193 if(uri[colon+1]!='/' || uri[colon+2]!='/') 201 if(uri[colon+1]!='/' || uri[colon+2]!='/')
194 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component"); 202 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component");
203 if(rv=="http:")
204 s = false;
205 else if(rv=="https:")
206 s = true;
207 else{
208 /* TODO: support more schemes.
209 * e.g. xri. How do we normalize
210 * xri?
211 */
212 rv.append(uri,colon+1,ul-colon-1);
213 return rv;
214 }
195 rv += "//"; 215 rv += "//";
196 string::size_type interesting = uri.find_first_of(":/#?",colon+3); 216 string::size_type interesting = uri.find_first_of(":/#?",colon+3);
197 if(interesting==string::npos) { 217 if(interesting==string::npos) {
198 transform( 218 transform(
199 uri.begin()+colon+3,uri.end(), 219 uri.begin()+colon+3,uri.begin()+ul,
200 back_inserter(rv), ::tolower ); 220 back_inserter(rv), ::tolower );
201 rv += '/'; return rv; 221 rv += '/'; return rv;
202 } 222 }
@@ -285,7 +305,8 @@ namespace opkele {
285 } 305 }
286 } 306 }
287 if(!pseg.empty()) { 307 if(!pseg.empty()) {
288 rv += '/'; rv += pseg; 308 if(!qf) rv += '/';
309 rv += pseg;
289 } 310 }
290 return rv; 311 return rv;
291 } 312 }