summaryrefslogtreecommitdiffabout
path: root/lib/util.cc
authorMichael Krelin <hacker@klever.net>2008-03-02 20:23:40 (UTC)
committer Michael Krelin <hacker@klever.net>2008-03-02 20:23:40 (UTC)
commitda3f84153be2a93da7ffc49af33b29b9725fac38 (patch) (unidiff)
treebcacc4c38a53b70c679fa69fbf2577da2802a197 /lib/util.cc
parentf47e336b569739bdde8e9add96ff2c46f97257fb (diff)
downloadlibopkele-da3f84153be2a93da7ffc49af33b29b9725fac38.zip
libopkele-da3f84153be2a93da7ffc49af33b29b9725fac38.tar.gz
libopkele-da3f84153be2a93da7ffc49af33b29b9725fac38.tar.bz2
made util::url_encode refrain from encoding unreserved chars
as per rfc 3986 Signed-off-by: Michael Krelin <hacker@klever.net>
Diffstat (limited to 'lib/util.cc') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/util.cc39
1 files changed, 33 insertions, 6 deletions
diff --git a/lib/util.cc b/lib/util.cc
index 29e6738..a6e08e2 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -1,61 +1,60 @@
1#include <errno.h> 1#include <errno.h>
2#include <cassert> 2#include <cassert>
3#include <cctype> 3#include <cctype>
4#include <cstring> 4#include <cstring>
5#include <vector> 5#include <vector>
6#include <string> 6#include <string>
7#include <stack> 7#include <stack>
8#include <algorithm> 8#include <algorithm>
9#include <openssl/bio.h> 9#include <openssl/bio.h>
10#include <openssl/evp.h> 10#include <openssl/evp.h>
11#include <openssl/sha.h> 11#include <openssl/sha.h>
12#include <openssl/hmac.h> 12#include <openssl/hmac.h>
13#include <curl/curl.h>
14#include <opkele/util.h> 13#include <opkele/util.h>
15#include <opkele/exception.h> 14#include <opkele/exception.h>
16#include <opkele/data.h> 15#include <opkele/data.h>
17#include <opkele/debug.h> 16#include <opkele/debug.h>
18 17
19#include <config.h> 18#include <config.h>
20#ifdef HAVE_DEMANGLE 19#ifdef HAVE_DEMANGLE
21# include <cxxabi.h> 20# include <cxxabi.h>
22#endif 21#endif
23 22
24namespace opkele { 23namespace opkele {
25 using namespace std; 24 using namespace std;
26 25
27 namespace util { 26 namespace util {
28 27
29 /* 28 /*
30 * base64 29 * base64
31 */ 30 */
32 string encode_base64(const void *data,size_t length) { 31 string encode_base64(const void *data,size_t length) {
33 BIO *b64 = 0, *bmem = 0; 32 BIO *b64 = 0, *bmem = 0;
34 try { 33 try {
35 b64 = BIO_new(BIO_f_base64()); 34 b64 = BIO_new(BIO_f_base64());
36 if(!b64) 35 if(!b64)
37 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder"); 36 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder");
38 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL); 37 BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL);
39 bmem = BIO_new(BIO_s_mem()); 38 bmem = BIO_new(BIO_s_mem());
40 BIO_set_flags(b64,BIO_CLOSE); 39 BIO_set_flags(b64,BIO_CLOSE);
41 if(!bmem) 40 if(!bmem)
42 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer"); 41 throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer");
43 BIO_push(b64,bmem); 42 BIO_push(b64,bmem);
44 if(((size_t)BIO_write(b64,data,length))!=length) 43 if(((size_t)BIO_write(b64,data,length))!=length)
45 throw exception_openssl(OPKELE_CP_ "failed to BIO_write()"); 44 throw exception_openssl(OPKELE_CP_ "failed to BIO_write()");
46 if(BIO_flush(b64)!=1) 45 if(BIO_flush(b64)!=1)
47 throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()"); 46 throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()");
48 char *rvd; 47 char *rvd;
49 long rvl = BIO_get_mem_data(bmem,&rvd); 48 long rvl = BIO_get_mem_data(bmem,&rvd);
50 string rv(rvd,rvl); 49 string rv(rvd,rvl);
51 BIO_free_all(b64); 50 BIO_free_all(b64);
52 return rv; 51 return rv;
53 }catch(...) { 52 }catch(...) {
54 if(b64) BIO_free_all(b64); 53 if(b64) BIO_free_all(b64);
55 throw; 54 throw;
56 } 55 }
57 } 56 }
58 57
59 void decode_base64(const string& data,vector<unsigned char>& rv) { 58 void decode_base64(const string& data,vector<unsigned char>& rv) {
60 BIO *b64 = 0, *bmem = 0; 59 BIO *b64 = 0, *bmem = 0;
61 rv.clear(); 60 rv.clear();
@@ -111,102 +110,130 @@ namespace opkele {
111 110
112 /* 111 /*
113 * w3c times 112 * w3c times
114 */ 113 */
115 114
116 string time_to_w3c(time_t t) { 115 string time_to_w3c(time_t t) {
117 struct tm tm_t; 116 struct tm tm_t;
118 if(!gmtime_r(&t,&tm_t)) 117 if(!gmtime_r(&t,&tm_t))
119 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()"); 118 throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()");
120 char rv[25]; 119 char rv[25];
121 if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t)) 120 if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t))
122 throw failed_conversion(OPKELE_CP_ "failed to strftime()"); 121 throw failed_conversion(OPKELE_CP_ "failed to strftime()");
123 return rv; 122 return rv;
124 } 123 }
125 124
126 time_t w3c_to_time(const string& w) { 125 time_t w3c_to_time(const string& w) {
127 int fraction; 126 int fraction;
128 struct tm tm_t; 127 struct tm tm_t;
129 memset(&tm_t,0,sizeof(tm_t)); 128 memset(&tm_t,0,sizeof(tm_t));
130 if( ( 129 if( (
131 sscanf( 130 sscanf(
132 w.c_str(), 131 w.c_str(),
133 "%04d-%02d-%02dT%02d:%02d:%02dZ", 132 "%04d-%02d-%02dT%02d:%02d:%02dZ",
134 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, 133 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
135 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec 134 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec
136 ) != 6 135 ) != 6
137 ) && ( 136 ) && (
138 sscanf( 137 sscanf(
139 w.c_str(), 138 w.c_str(),
140 "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ", 139 "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
141 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday, 140 &tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
142 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec, 141 &tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec,
143 &fraction 142 &fraction
144 ) != 7 143 ) != 7
145 ) ) 144 ) )
146 throw failed_conversion(OPKELE_CP_ "failed to sscanf()"); 145 throw failed_conversion(OPKELE_CP_ "failed to sscanf()");
147 tm_t.tm_mon--; 146 tm_t.tm_mon--;
148 tm_t.tm_year-=1900; 147 tm_t.tm_year-=1900;
149 time_t rv = mktime(&tm_t); 148 time_t rv = mktime(&tm_t);
150 if(rv==(time_t)-1) 149 if(rv==(time_t)-1)
151 throw failed_conversion(OPKELE_CP_ "failed to mktime()"); 150 throw failed_conversion(OPKELE_CP_ "failed to mktime()");
152 return rv-timezone; 151 return rv-timezone;
153 } 152 }
154 153
155 /* 154 /*
156 * 155 *
157 */ 156 */
158 157
158 static inline bool isrfc3986unreserved(int c) {
159 if(c<'-') return false;
160 if(c<='.') return true;
161 if(c<'0') return false; if(c<='9') return true;
162 if(c<'A') return false; if(c<='Z') return true;
163 if(c<'_') return false;
164 if(c=='_') return true;
165 if(c<'a') return false; if(c<='z') return true;
166 if(c=='~') return true;
167 return false;
168 }
169
170 struct __url_encoder : public unary_function<char,void> {
171 public:
172 string& rv;
173
174 __url_encoder(string& r) : rv(r) { }
175
176 result_type operator()(argument_type c) {
177 if(isrfc3986unreserved(c))
178 rv += c;
179 else{
180 char tmp[4];
181 snprintf(tmp,sizeof(tmp),"%%%02X",
182 (c&0xff));
183 rv += tmp;
184 }
185 }
186 };
187
159 string url_encode(const string& str) { 188 string url_encode(const string& str) {
160 char * t = curl_escape(str.c_str(),str.length()); 189 string rv;
161 if(!t) 190 for_each(str.begin(),str.end(),
162 throw failed_conversion(OPKELE_CP_ "failed to curl_escape()"); 191 __url_encoder(rv));
163 string rv(t);
164 curl_free(t);
165 return rv; 192 return rv;
166 } 193 }
167 194
168 string attr_escape(const string& str) { 195 string attr_escape(const string& str) {
169 static const char *unsafechars = "<>&\n\"'"; 196 static const char *unsafechars = "<>&\n\"'";
170 string rv; 197 string rv;
171 string::size_type p=0; 198 string::size_type p=0;
172 while(true) { 199 while(true) {
173 string::size_type us = str.find_first_of(unsafechars,p); 200 string::size_type us = str.find_first_of(unsafechars,p);
174 if(us==string::npos) { 201 if(us==string::npos) {
175 if(p!=str.length()) 202 if(p!=str.length())
176 rv.append(str,p,str.length()-p); 203 rv.append(str,p,str.length()-p);
177 return rv; 204 return rv;
178 } 205 }
179 rv.append(str,p,us-p); 206 rv.append(str,p,us-p);
180 rv += "&#"; 207 rv += "&#";
181 rv += long_to_string((long)str[us]); 208 rv += long_to_string((long)str[us]);
182 rv += ';'; 209 rv += ';';
183 p = us+1; 210 p = us+1;
184 } 211 }
185 } 212 }
186 213
187 string long_to_string(long l) { 214 string long_to_string(long l) {
188 char rv[32]; 215 char rv[32];
189 int r=snprintf(rv,sizeof(rv),"%ld",l); 216 int r=snprintf(rv,sizeof(rv),"%ld",l);
190 if(r<0 || r>=(int)sizeof(rv)) 217 if(r<0 || r>=(int)sizeof(rv))
191 throw failed_conversion(OPKELE_CP_ "failed to snprintf()"); 218 throw failed_conversion(OPKELE_CP_ "failed to snprintf()");
192 return rv; 219 return rv;
193 } 220 }
194 221
195 long string_to_long(const string& s) { 222 long string_to_long(const string& s) {
196 char *endptr = 0; 223 char *endptr = 0;
197 long rv = strtol(s.c_str(),&endptr,10); 224 long rv = strtol(s.c_str(),&endptr,10);
198 if((!endptr) || endptr==s.c_str()) 225 if((!endptr) || endptr==s.c_str())
199 throw failed_conversion(OPKELE_CP_ "failed to strtol()"); 226 throw failed_conversion(OPKELE_CP_ "failed to strtol()");
200 return rv; 227 return rv;
201 } 228 }
202 229
203 /* 230 /*
204 * Normalize URL according to the rules, described in rfc 3986, section 6 231 * Normalize URL according to the rules, described in rfc 3986, section 6
205 * 232 *
206 * - uppercase hex triplets (e.g. %ab -> %AB) 233 * - uppercase hex triplets (e.g. %ab -> %AB)
207 * - lowercase scheme and host 234 * - lowercase scheme and host
208 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3, 235 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3,
209 * that is - [:alpha:][:digit:]._~- 236 * that is - [:alpha:][:digit:]._~-
210 * - remove dot segments 237 * - remove dot segments
211 * - remove empty and default ports 238 * - remove empty and default ports
212 * - if there's no path component, add '/' 239 * - if there's no path component, add '/'