author | Mark Lodato <lodatom@gmail.com> | 2010-02-09 15:12:43 (UTC) |
---|---|---|
committer | Mark Lodato <lodatom@gmail.com> | 2010-02-09 15:12:43 (UTC) |
commit | a2c6355f9fdede78ce46aeee39ef649637aaadf9 (patch) (unidiff) | |
tree | 4ed595f688691e7a35c5684ca59164bcc777b74c | |
parent | 8aab27f24de70acfbdcee31c634a4b1facf23b92 (diff) | |
download | cgit-a2c6355f9fdede78ce46aeee39ef649637aaadf9.zip cgit-a2c6355f9fdede78ce46aeee39ef649637aaadf9.tar.gz cgit-a2c6355f9fdede78ce46aeee39ef649637aaadf9.tar.bz2 |
html: properly percent-escape URLs
The only valid characters for a URL are unreserved characters
a-zA-Z0-9_-.~ and the reserved characters !*'();:@&=+$,/?%#[] , as per
RFC 3986. Everything else must be escaped. Additionally, the # and
? always have special meaning, and the &, =, and + have special meaning
in a query string, so they too must be escaped. To make this easier,
a table of escapes is now used so that we do not have to call fmt() for
each character; if the entry is 0, no escaping is needed.
Signed-off-by: Mark Lodato <lodatom@gmail.com>
-rw-r--r-- | html.c | 36 |
1 files changed, 32 insertions, 4 deletions
@@ -1,291 +1,319 @@ | |||
1 | /* html.c: helper functions for html output | 1 | /* html.c: helper functions for html output |
2 | * | 2 | * |
3 | * Copyright (C) 2006 Lars Hjemli | 3 | * Copyright (C) 2006 Lars Hjemli |
4 | * | 4 | * |
5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <unistd.h> | 9 | #include <unistd.h> |
10 | #include <stdio.h> | 10 | #include <stdio.h> |
11 | #include <stdlib.h> | 11 | #include <stdlib.h> |
12 | #include <stdarg.h> | 12 | #include <stdarg.h> |
13 | #include <string.h> | 13 | #include <string.h> |
14 | #include <errno.h> | 14 | #include <errno.h> |
15 | 15 | ||
16 | /* Percent-encoding of each character, except: a-zA-Z0-9!$()*,./:;@- */ | ||
17 | static const char* url_escape_table[256] = { | ||
18 | "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", | ||
19 | "%0a", "%0b", "%0c", "%0d", "%0e", "%0f", "%10", "%11", "%12", "%13", | ||
20 | "%14", "%15", "%16", "%17", "%18", "%19", "%1a", "%1b", "%1c", "%1d", | ||
21 | "%1e", "%1f", "%20", 0, "%22", "%23", 0, "%25", "%26", "%27", 0, 0, 0, | ||
22 | "%2b", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "%3c", "%3d", | ||
23 | "%3e", "%3f", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, "%5c", 0, "%5e", 0, "%60", 0, 0, 0, 0, 0, | ||
25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "%7b", | ||
26 | "%7c", "%7d", 0, "%7f", "%80", "%81", "%82", "%83", "%84", "%85", | ||
27 | "%86", "%87", "%88", "%89", "%8a", "%8b", "%8c", "%8d", "%8e", "%8f", | ||
28 | "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", "%98", "%99", | ||
29 | "%9a", "%9b", "%9c", "%9d", "%9e", "%9f", "%a0", "%a1", "%a2", "%a3", | ||
30 | "%a4", "%a5", "%a6", "%a7", "%a8", "%a9", "%aa", "%ab", "%ac", "%ad", | ||
31 | "%ae", "%af", "%b0", "%b1", "%b2", "%b3", "%b4", "%b5", "%b6", "%b7", | ||
32 | "%b8", "%b9", "%ba", "%bb", "%bc", "%bd", "%be", "%bf", "%c0", "%c1", | ||
33 | "%c2", "%c3", "%c4", "%c5", "%c6", "%c7", "%c8", "%c9", "%ca", "%cb", | ||
34 | "%cc", "%cd", "%ce", "%cf", "%d0", "%d1", "%d2", "%d3", "%d4", "%d5", | ||
35 | "%d6", "%d7", "%d8", "%d9", "%da", "%db", "%dc", "%dd", "%de", "%df", | ||
36 | "%e0", "%e1", "%e2", "%e3", "%e4", "%e5", "%e6", "%e7", "%e8", "%e9", | ||
37 | "%ea", "%eb", "%ec", "%ed", "%ee", "%ef", "%f0", "%f1", "%f2", "%f3", | ||
38 | "%f4", "%f5", "%f6", "%f7", "%f8", "%f9", "%fa", "%fb", "%fc", "%fd", | ||
39 | "%fe", "%ff" | ||
40 | }; | ||
41 | |||
16 | int htmlfd = STDOUT_FILENO; | 42 | int htmlfd = STDOUT_FILENO; |
17 | 43 | ||
18 | char *fmt(const char *format, ...) | 44 | char *fmt(const char *format, ...) |
19 | { | 45 | { |
20 | static char buf[8][1024]; | 46 | static char buf[8][1024]; |
21 | static int bufidx; | 47 | static int bufidx; |
22 | int len; | 48 | int len; |
23 | va_list args; | 49 | va_list args; |
24 | 50 | ||
25 | bufidx++; | 51 | bufidx++; |
26 | bufidx &= 7; | 52 | bufidx &= 7; |
27 | 53 | ||
28 | va_start(args, format); | 54 | va_start(args, format); |
29 | len = vsnprintf(buf[bufidx], sizeof(buf[bufidx]), format, args); | 55 | len = vsnprintf(buf[bufidx], sizeof(buf[bufidx]), format, args); |
30 | va_end(args); | 56 | va_end(args); |
31 | if (len>sizeof(buf[bufidx])) { | 57 | if (len>sizeof(buf[bufidx])) { |
32 | fprintf(stderr, "[html.c] string truncated: %s\n", format); | 58 | fprintf(stderr, "[html.c] string truncated: %s\n", format); |
33 | exit(1); | 59 | exit(1); |
34 | } | 60 | } |
35 | return buf[bufidx]; | 61 | return buf[bufidx]; |
36 | } | 62 | } |
37 | 63 | ||
38 | void html_raw(const char *data, size_t size) | 64 | void html_raw(const char *data, size_t size) |
39 | { | 65 | { |
40 | write(htmlfd, data, size); | 66 | write(htmlfd, data, size); |
41 | } | 67 | } |
42 | 68 | ||
43 | void html(const char *txt) | 69 | void html(const char *txt) |
44 | { | 70 | { |
45 | write(htmlfd, txt, strlen(txt)); | 71 | write(htmlfd, txt, strlen(txt)); |
46 | } | 72 | } |
47 | 73 | ||
48 | void htmlf(const char *format, ...) | 74 | void htmlf(const char *format, ...) |
49 | { | 75 | { |
50 | static char buf[65536]; | 76 | static char buf[65536]; |
51 | va_list args; | 77 | va_list args; |
52 | 78 | ||
53 | va_start(args, format); | 79 | va_start(args, format); |
54 | vsnprintf(buf, sizeof(buf), format, args); | 80 | vsnprintf(buf, sizeof(buf), format, args); |
55 | va_end(args); | 81 | va_end(args); |
56 | html(buf); | 82 | html(buf); |
57 | } | 83 | } |
58 | 84 | ||
59 | void html_status(int code, const char *msg, int more_headers) | 85 | void html_status(int code, const char *msg, int more_headers) |
60 | { | 86 | { |
61 | htmlf("Status: %d %s\n", code, msg); | 87 | htmlf("Status: %d %s\n", code, msg); |
62 | if (!more_headers) | 88 | if (!more_headers) |
63 | html("\n"); | 89 | html("\n"); |
64 | } | 90 | } |
65 | 91 | ||
66 | void html_txt(const char *txt) | 92 | void html_txt(const char *txt) |
67 | { | 93 | { |
68 | const char *t = txt; | 94 | const char *t = txt; |
69 | while(t && *t){ | 95 | while(t && *t){ |
70 | int c = *t; | 96 | int c = *t; |
71 | if (c=='<' || c=='>' || c=='&') { | 97 | if (c=='<' || c=='>' || c=='&') { |
72 | write(htmlfd, txt, t - txt); | 98 | write(htmlfd, txt, t - txt); |
73 | if (c=='>') | 99 | if (c=='>') |
74 | html(">"); | 100 | html(">"); |
75 | else if (c=='<') | 101 | else if (c=='<') |
76 | html("<"); | 102 | html("<"); |
77 | else if (c=='&') | 103 | else if (c=='&') |
78 | html("&"); | 104 | html("&"); |
79 | txt = t+1; | 105 | txt = t+1; |
80 | } | 106 | } |
81 | t++; | 107 | t++; |
82 | } | 108 | } |
83 | if (t!=txt) | 109 | if (t!=txt) |
84 | html(txt); | 110 | html(txt); |
85 | } | 111 | } |
86 | 112 | ||
87 | void html_ntxt(int len, const char *txt) | 113 | void html_ntxt(int len, const char *txt) |
88 | { | 114 | { |
89 | const char *t = txt; | 115 | const char *t = txt; |
90 | while(t && *t && len--){ | 116 | while(t && *t && len--){ |
91 | int c = *t; | 117 | int c = *t; |
92 | if (c=='<' || c=='>' || c=='&') { | 118 | if (c=='<' || c=='>' || c=='&') { |
93 | write(htmlfd, txt, t - txt); | 119 | write(htmlfd, txt, t - txt); |
94 | if (c=='>') | 120 | if (c=='>') |
95 | html(">"); | 121 | html(">"); |
96 | else if (c=='<') | 122 | else if (c=='<') |
97 | html("<"); | 123 | html("<"); |
98 | else if (c=='&') | 124 | else if (c=='&') |
99 | html("&"); | 125 | html("&"); |
100 | txt = t+1; | 126 | txt = t+1; |
101 | } | 127 | } |
102 | t++; | 128 | t++; |
103 | } | 129 | } |
104 | if (t!=txt) | 130 | if (t!=txt) |
105 | write(htmlfd, txt, t - txt); | 131 | write(htmlfd, txt, t - txt); |
106 | if (len<0) | 132 | if (len<0) |
107 | html("..."); | 133 | html("..."); |
108 | } | 134 | } |
109 | 135 | ||
110 | void html_attr(const char *txt) | 136 | void html_attr(const char *txt) |
111 | { | 137 | { |
112 | const char *t = txt; | 138 | const char *t = txt; |
113 | while(t && *t){ | 139 | while(t && *t){ |
114 | int c = *t; | 140 | int c = *t; |
115 | if (c=='<' || c=='>' || c=='\'' || c=='\"') { | 141 | if (c=='<' || c=='>' || c=='\'' || c=='\"') { |
116 | write(htmlfd, txt, t - txt); | 142 | write(htmlfd, txt, t - txt); |
117 | if (c=='>') | 143 | if (c=='>') |
118 | html(">"); | 144 | html(">"); |
119 | else if (c=='<') | 145 | else if (c=='<') |
120 | html("<"); | 146 | html("<"); |
121 | else if (c=='\'') | 147 | else if (c=='\'') |
122 | html("'"); | 148 | html("'"); |
123 | else if (c=='"') | 149 | else if (c=='"') |
124 | html("""); | 150 | html("""); |
125 | txt = t+1; | 151 | txt = t+1; |
126 | } | 152 | } |
127 | t++; | 153 | t++; |
128 | } | 154 | } |
129 | if (t!=txt) | 155 | if (t!=txt) |
130 | html(txt); | 156 | html(txt); |
131 | } | 157 | } |
132 | 158 | ||
133 | void html_url_path(const char *txt) | 159 | void html_url_path(const char *txt) |
134 | { | 160 | { |
135 | const char *t = txt; | 161 | const char *t = txt; |
136 | while(t && *t){ | 162 | while(t && *t){ |
137 | int c = *t; | 163 | int c = *t; |
138 | if (c=='"' || c=='#' || c=='\'' || c=='?') { | 164 | const char *e = url_escape_table[c]; |
165 | if (e && c!='+' && c!='&' && c!='+') { | ||
139 | write(htmlfd, txt, t - txt); | 166 | write(htmlfd, txt, t - txt); |
140 | write(htmlfd, fmt("%%%2x", c), 3); | 167 | write(htmlfd, e, 3); |
141 | txt = t+1; | 168 | txt = t+1; |
142 | } | 169 | } |
143 | t++; | 170 | t++; |
144 | } | 171 | } |
145 | if (t!=txt) | 172 | if (t!=txt) |
146 | html(txt); | 173 | html(txt); |
147 | } | 174 | } |
148 | 175 | ||
149 | void html_url_arg(const char *txt) | 176 | void html_url_arg(const char *txt) |
150 | { | 177 | { |
151 | const char *t = txt; | 178 | const char *t = txt; |
152 | while(t && *t){ | 179 | while(t && *t){ |
153 | int c = *t; | 180 | int c = *t; |
154 | if (c=='"' || c=='#' || c=='%' || c=='&' || c=='\'' || c=='+' || c=='?') { | 181 | const char *e = url_escape_table[c]; |
182 | if (e) { | ||
155 | write(htmlfd, txt, t - txt); | 183 | write(htmlfd, txt, t - txt); |
156 | write(htmlfd, fmt("%%%2x", c), 3); | 184 | write(htmlfd, e, 3); |
157 | txt = t+1; | 185 | txt = t+1; |
158 | } | 186 | } |
159 | t++; | 187 | t++; |
160 | } | 188 | } |
161 | if (t!=txt) | 189 | if (t!=txt) |
162 | html(txt); | 190 | html(txt); |
163 | } | 191 | } |
164 | 192 | ||
165 | void html_hidden(const char *name, const char *value) | 193 | void html_hidden(const char *name, const char *value) |
166 | { | 194 | { |
167 | html("<input type='hidden' name='"); | 195 | html("<input type='hidden' name='"); |
168 | html_attr(name); | 196 | html_attr(name); |
169 | html("' value='"); | 197 | html("' value='"); |
170 | html_attr(value); | 198 | html_attr(value); |
171 | html("'/>"); | 199 | html("'/>"); |
172 | } | 200 | } |
173 | 201 | ||
174 | void html_option(const char *value, const char *text, const char *selected_value) | 202 | void html_option(const char *value, const char *text, const char *selected_value) |
175 | { | 203 | { |
176 | html("<option value='"); | 204 | html("<option value='"); |
177 | html_attr(value); | 205 | html_attr(value); |
178 | html("'"); | 206 | html("'"); |
179 | if (selected_value && !strcmp(selected_value, value)) | 207 | if (selected_value && !strcmp(selected_value, value)) |
180 | html(" selected='selected'"); | 208 | html(" selected='selected'"); |
181 | html(">"); | 209 | html(">"); |
182 | html_txt(text); | 210 | html_txt(text); |
183 | html("</option>\n"); | 211 | html("</option>\n"); |
184 | } | 212 | } |
185 | 213 | ||
186 | void html_link_open(const char *url, const char *title, const char *class) | 214 | void html_link_open(const char *url, const char *title, const char *class) |
187 | { | 215 | { |
188 | html("<a href='"); | 216 | html("<a href='"); |
189 | html_attr(url); | 217 | html_attr(url); |
190 | if (title) { | 218 | if (title) { |
191 | html("' title='"); | 219 | html("' title='"); |
192 | html_attr(title); | 220 | html_attr(title); |
193 | } | 221 | } |
194 | if (class) { | 222 | if (class) { |
195 | html("' class='"); | 223 | html("' class='"); |
196 | html_attr(class); | 224 | html_attr(class); |
197 | } | 225 | } |
198 | html("'>"); | 226 | html("'>"); |
199 | } | 227 | } |
200 | 228 | ||
201 | void html_link_close(void) | 229 | void html_link_close(void) |
202 | { | 230 | { |
203 | html("</a>"); | 231 | html("</a>"); |
204 | } | 232 | } |
205 | 233 | ||
206 | void html_fileperm(unsigned short mode) | 234 | void html_fileperm(unsigned short mode) |
207 | { | 235 | { |
208 | htmlf("%c%c%c", (mode & 4 ? 'r' : '-'), | 236 | htmlf("%c%c%c", (mode & 4 ? 'r' : '-'), |
209 | (mode & 2 ? 'w' : '-'), (mode & 1 ? 'x' : '-')); | 237 | (mode & 2 ? 'w' : '-'), (mode & 1 ? 'x' : '-')); |
210 | } | 238 | } |
211 | 239 | ||
212 | int html_include(const char *filename) | 240 | int html_include(const char *filename) |
213 | { | 241 | { |
214 | FILE *f; | 242 | FILE *f; |
215 | char buf[4096]; | 243 | char buf[4096]; |
216 | size_t len; | 244 | size_t len; |
217 | 245 | ||
218 | if (!(f = fopen(filename, "r"))) { | 246 | if (!(f = fopen(filename, "r"))) { |
219 | fprintf(stderr, "[cgit] Failed to include file %s: %s (%d).\n", | 247 | fprintf(stderr, "[cgit] Failed to include file %s: %s (%d).\n", |
220 | filename, strerror(errno), errno); | 248 | filename, strerror(errno), errno); |
221 | return -1; | 249 | return -1; |
222 | } | 250 | } |
223 | while((len = fread(buf, 1, 4096, f)) > 0) | 251 | while((len = fread(buf, 1, 4096, f)) > 0) |
224 | write(htmlfd, buf, len); | 252 | write(htmlfd, buf, len); |
225 | fclose(f); | 253 | fclose(f); |
226 | return 0; | 254 | return 0; |
227 | } | 255 | } |
228 | 256 | ||
229 | int hextoint(char c) | 257 | int hextoint(char c) |
230 | { | 258 | { |
231 | if (c >= 'a' && c <= 'f') | 259 | if (c >= 'a' && c <= 'f') |
232 | return 10 + c - 'a'; | 260 | return 10 + c - 'a'; |
233 | else if (c >= 'A' && c <= 'F') | 261 | else if (c >= 'A' && c <= 'F') |
234 | return 10 + c - 'A'; | 262 | return 10 + c - 'A'; |
235 | else if (c >= '0' && c <= '9') | 263 | else if (c >= '0' && c <= '9') |
236 | return c - '0'; | 264 | return c - '0'; |
237 | else | 265 | else |
238 | return -1; | 266 | return -1; |
239 | } | 267 | } |
240 | 268 | ||
241 | char *convert_query_hexchar(char *txt) | 269 | char *convert_query_hexchar(char *txt) |
242 | { | 270 | { |
243 | int d1, d2; | 271 | int d1, d2; |
244 | if (strlen(txt) < 3) { | 272 | if (strlen(txt) < 3) { |
245 | *txt = '\0'; | 273 | *txt = '\0'; |
246 | return txt-1; | 274 | return txt-1; |
247 | } | 275 | } |
248 | d1 = hextoint(*(txt+1)); | 276 | d1 = hextoint(*(txt+1)); |
249 | d2 = hextoint(*(txt+2)); | 277 | d2 = hextoint(*(txt+2)); |
250 | if (d1<0 || d2<0) { | 278 | if (d1<0 || d2<0) { |
251 | strcpy(txt, txt+3); | 279 | strcpy(txt, txt+3); |
252 | return txt-1; | 280 | return txt-1; |
253 | } else { | 281 | } else { |
254 | *txt = d1 * 16 + d2; | 282 | *txt = d1 * 16 + d2; |
255 | strcpy(txt+1, txt+3); | 283 | strcpy(txt+1, txt+3); |
256 | return txt; | 284 | return txt; |
257 | } | 285 | } |
258 | } | 286 | } |
259 | 287 | ||
260 | int http_parse_querystring(const char *txt_, void (*fn)(const char *name, const char *value)) | 288 | int http_parse_querystring(const char *txt_, void (*fn)(const char *name, const char *value)) |
261 | { | 289 | { |
262 | char *t, *txt, *value = NULL, c; | 290 | char *t, *txt, *value = NULL, c; |
263 | 291 | ||
264 | if (!txt_) | 292 | if (!txt_) |
265 | return 0; | 293 | return 0; |
266 | 294 | ||
267 | t = txt = strdup(txt_); | 295 | t = txt = strdup(txt_); |
268 | if (t == NULL) { | 296 | if (t == NULL) { |
269 | printf("Out of memory\n"); | 297 | printf("Out of memory\n"); |
270 | exit(1); | 298 | exit(1); |
271 | } | 299 | } |
272 | while((c=*t) != '\0') { | 300 | while((c=*t) != '\0') { |
273 | if (c=='=') { | 301 | if (c=='=') { |
274 | *t = '\0'; | 302 | *t = '\0'; |
275 | value = t+1; | 303 | value = t+1; |
276 | } else if (c=='+') { | 304 | } else if (c=='+') { |
277 | *t = ' '; | 305 | *t = ' '; |
278 | } else if (c=='%') { | 306 | } else if (c=='%') { |
279 | t = convert_query_hexchar(t); | 307 | t = convert_query_hexchar(t); |
280 | } else if (c=='&') { | 308 | } else if (c=='&') { |
281 | *t = '\0'; | 309 | *t = '\0'; |
282 | (*fn)(txt, value); | 310 | (*fn)(txt, value); |
283 | txt = t+1; | 311 | txt = t+1; |
284 | value = NULL; | 312 | value = NULL; |
285 | } | 313 | } |
286 | t++; | 314 | t++; |
287 | } | 315 | } |
288 | if (t!=txt) | 316 | if (t!=txt) |
289 | (*fn)(txt, value); | 317 | (*fn)(txt, value); |
290 | return 0; | 318 | return 0; |
291 | } | 319 | } |