author | Lars Hjemli <hjemli@gmail.com> | 2007-11-05 21:27:43 (UTC) |
---|---|---|
committer | Jonathan Bastien-Filiatrault <joe@x2a.org> | 2007-11-05 23:13:31 (UTC) |
commit | a2ebbd6948da96172108db5e9c02c141923ad05c (patch) (unidiff) | |
tree | 9c40233ffcd35f1b20bb07a04bdb8c23d649e1ee /parsing.c | |
parent | 7858a309d7671109950ec940f893c2d112d36b99 (diff) | |
download | cgit-a2ebbd6948da96172108db5e9c02c141923ad05c.zip cgit-a2ebbd6948da96172108db5e9c02c141923ad05c.tar.gz cgit-a2ebbd6948da96172108db5e9c02c141923ad05c.tar.bz2 |
Use utf8::reencode_string from git
This replaces the iconv-support in cgit with similar functions already
existing in git.
Signed-off-by: Lars Hjemli <hjemli@gmai.com>
-rw-r--r-- | parsing.c | 64 |
1 files changed, 4 insertions, 60 deletions
@@ -1,388 +1,332 @@ | |||
1 | /* config.c: parsing of config files | 1 | /* config.c: parsing of config files |
2 | * | 2 | * |
3 | * Copyright (C) 2006 Lars Hjemli | 3 | * Copyright (C) 2006 Lars Hjemli |
4 | * | 4 | * |
5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <iconv.h> | ||
10 | |||
11 | #include "cgit.h" | 9 | #include "cgit.h" |
12 | 10 | ||
13 | int next_char(FILE *f) | 11 | int next_char(FILE *f) |
14 | { | 12 | { |
15 | int c = fgetc(f); | 13 | int c = fgetc(f); |
16 | if (c=='\r') { | 14 | if (c=='\r') { |
17 | c = fgetc(f); | 15 | c = fgetc(f); |
18 | if (c!='\n') { | 16 | if (c!='\n') { |
19 | ungetc(c, f); | 17 | ungetc(c, f); |
20 | c = '\r'; | 18 | c = '\r'; |
21 | } | 19 | } |
22 | } | 20 | } |
23 | return c; | 21 | return c; |
24 | } | 22 | } |
25 | 23 | ||
26 | void skip_line(FILE *f) | 24 | void skip_line(FILE *f) |
27 | { | 25 | { |
28 | int c; | 26 | int c; |
29 | 27 | ||
30 | while((c=next_char(f)) && c!='\n' && c!=EOF) | 28 | while((c=next_char(f)) && c!='\n' && c!=EOF) |
31 | ; | 29 | ; |
32 | } | 30 | } |
33 | 31 | ||
34 | int read_config_line(FILE *f, char *line, const char **value, int bufsize) | 32 | int read_config_line(FILE *f, char *line, const char **value, int bufsize) |
35 | { | 33 | { |
36 | int i = 0, isname = 0; | 34 | int i = 0, isname = 0; |
37 | 35 | ||
38 | *value = NULL; | 36 | *value = NULL; |
39 | while(i<bufsize-1) { | 37 | while(i<bufsize-1) { |
40 | int c = next_char(f); | 38 | int c = next_char(f); |
41 | if (!isname && (c=='#' || c==';')) { | 39 | if (!isname && (c=='#' || c==';')) { |
42 | skip_line(f); | 40 | skip_line(f); |
43 | continue; | 41 | continue; |
44 | } | 42 | } |
45 | if (!isname && isspace(c)) | 43 | if (!isname && isspace(c)) |
46 | continue; | 44 | continue; |
47 | 45 | ||
48 | if (c=='=' && !*value) { | 46 | if (c=='=' && !*value) { |
49 | line[i] = 0; | 47 | line[i] = 0; |
50 | *value = &line[i+1]; | 48 | *value = &line[i+1]; |
51 | } else if (c=='\n' && !isname) { | 49 | } else if (c=='\n' && !isname) { |
52 | i = 0; | 50 | i = 0; |
53 | continue; | 51 | continue; |
54 | } else if (c=='\n' || c==EOF) { | 52 | } else if (c=='\n' || c==EOF) { |
55 | line[i] = 0; | 53 | line[i] = 0; |
56 | break; | 54 | break; |
57 | } else { | 55 | } else { |
58 | line[i]=c; | 56 | line[i]=c; |
59 | } | 57 | } |
60 | isname = 1; | 58 | isname = 1; |
61 | i++; | 59 | i++; |
62 | } | 60 | } |
63 | line[i+1] = 0; | 61 | line[i+1] = 0; |
64 | return i; | 62 | return i; |
65 | } | 63 | } |
66 | 64 | ||
67 | int cgit_read_config(const char *filename, configfn fn) | 65 | int cgit_read_config(const char *filename, configfn fn) |
68 | { | 66 | { |
69 | static int nesting; | 67 | static int nesting; |
70 | int len; | 68 | int len; |
71 | char line[256]; | 69 | char line[256]; |
72 | const char *value; | 70 | const char *value; |
73 | FILE *f; | 71 | FILE *f; |
74 | 72 | ||
75 | /* cancel deeply nested include-commands */ | 73 | /* cancel deeply nested include-commands */ |
76 | if (nesting > 8) | 74 | if (nesting > 8) |
77 | return -1; | 75 | return -1; |
78 | if (!(f = fopen(filename, "r"))) | 76 | if (!(f = fopen(filename, "r"))) |
79 | return -1; | 77 | return -1; |
80 | nesting++; | 78 | nesting++; |
81 | while((len = read_config_line(f, line, &value, sizeof(line))) > 0) | 79 | while((len = read_config_line(f, line, &value, sizeof(line))) > 0) |
82 | (*fn)(line, value); | 80 | (*fn)(line, value); |
83 | nesting--; | 81 | nesting--; |
84 | fclose(f); | 82 | fclose(f); |
85 | return 0; | 83 | return 0; |
86 | } | 84 | } |
87 | 85 | ||
88 | char *convert_query_hexchar(char *txt) | 86 | char *convert_query_hexchar(char *txt) |
89 | { | 87 | { |
90 | int d1, d2; | 88 | int d1, d2; |
91 | if (strlen(txt) < 3) { | 89 | if (strlen(txt) < 3) { |
92 | *txt = '\0'; | 90 | *txt = '\0'; |
93 | return txt-1; | 91 | return txt-1; |
94 | } | 92 | } |
95 | d1 = hextoint(*(txt+1)); | 93 | d1 = hextoint(*(txt+1)); |
96 | d2 = hextoint(*(txt+2)); | 94 | d2 = hextoint(*(txt+2)); |
97 | if (d1<0 || d2<0) { | 95 | if (d1<0 || d2<0) { |
98 | strcpy(txt, txt+3); | 96 | strcpy(txt, txt+3); |
99 | return txt-1; | 97 | return txt-1; |
100 | } else { | 98 | } else { |
101 | *txt = d1 * 16 + d2; | 99 | *txt = d1 * 16 + d2; |
102 | strcpy(txt+1, txt+3); | 100 | strcpy(txt+1, txt+3); |
103 | return txt; | 101 | return txt; |
104 | } | 102 | } |
105 | } | 103 | } |
106 | 104 | ||
107 | int cgit_parse_query(char *txt, configfn fn) | 105 | int cgit_parse_query(char *txt, configfn fn) |
108 | { | 106 | { |
109 | char *t, *value = NULL, c; | 107 | char *t, *value = NULL, c; |
110 | 108 | ||
111 | if (!txt) | 109 | if (!txt) |
112 | return 0; | 110 | return 0; |
113 | 111 | ||
114 | t = txt = xstrdup(txt); | 112 | t = txt = xstrdup(txt); |
115 | 113 | ||
116 | while((c=*t) != '\0') { | 114 | while((c=*t) != '\0') { |
117 | if (c=='=') { | 115 | if (c=='=') { |
118 | *t = '\0'; | 116 | *t = '\0'; |
119 | value = t+1; | 117 | value = t+1; |
120 | } else if (c=='+') { | 118 | } else if (c=='+') { |
121 | *t = ' '; | 119 | *t = ' '; |
122 | } else if (c=='%') { | 120 | } else if (c=='%') { |
123 | t = convert_query_hexchar(t); | 121 | t = convert_query_hexchar(t); |
124 | } else if (c=='&') { | 122 | } else if (c=='&') { |
125 | *t = '\0'; | 123 | *t = '\0'; |
126 | (*fn)(txt, value); | 124 | (*fn)(txt, value); |
127 | txt = t+1; | 125 | txt = t+1; |
128 | value = NULL; | 126 | value = NULL; |
129 | } | 127 | } |
130 | t++; | 128 | t++; |
131 | } | 129 | } |
132 | if (t!=txt) | 130 | if (t!=txt) |
133 | (*fn)(txt, value); | 131 | (*fn)(txt, value); |
134 | return 0; | 132 | return 0; |
135 | } | 133 | } |
136 | 134 | ||
137 | /* | 135 | /* |
138 | * url syntax: [repo ['/' cmd [ '/' path]]] | 136 | * url syntax: [repo ['/' cmd [ '/' path]]] |
139 | * repo: any valid repo url, may contain '/' | 137 | * repo: any valid repo url, may contain '/' |
140 | * cmd: log | commit | diff | tree | view | blob | snapshot | 138 | * cmd: log | commit | diff | tree | view | blob | snapshot |
141 | * path: any valid path, may contain '/' | 139 | * path: any valid path, may contain '/' |
142 | * | 140 | * |
143 | */ | 141 | */ |
144 | void cgit_parse_url(const char *url) | 142 | void cgit_parse_url(const char *url) |
145 | { | 143 | { |
146 | char *cmd, *p; | 144 | char *cmd, *p; |
147 | 145 | ||
148 | cgit_repo = NULL; | 146 | cgit_repo = NULL; |
149 | if (!url || url[0] == '\0') | 147 | if (!url || url[0] == '\0') |
150 | return; | 148 | return; |
151 | 149 | ||
152 | cgit_repo = cgit_get_repoinfo(url); | 150 | cgit_repo = cgit_get_repoinfo(url); |
153 | if (cgit_repo) { | 151 | if (cgit_repo) { |
154 | cgit_query_repo = cgit_repo->url; | 152 | cgit_query_repo = cgit_repo->url; |
155 | return; | 153 | return; |
156 | } | 154 | } |
157 | 155 | ||
158 | cmd = strchr(url, '/'); | 156 | cmd = strchr(url, '/'); |
159 | while (!cgit_repo && cmd) { | 157 | while (!cgit_repo && cmd) { |
160 | cmd[0] = '\0'; | 158 | cmd[0] = '\0'; |
161 | cgit_repo = cgit_get_repoinfo(url); | 159 | cgit_repo = cgit_get_repoinfo(url); |
162 | if (cgit_repo == NULL) { | 160 | if (cgit_repo == NULL) { |
163 | cmd[0] = '/'; | 161 | cmd[0] = '/'; |
164 | cmd = strchr(cmd + 1, '/'); | 162 | cmd = strchr(cmd + 1, '/'); |
165 | continue; | 163 | continue; |
166 | } | 164 | } |
167 | 165 | ||
168 | cgit_query_repo = cgit_repo->url; | 166 | cgit_query_repo = cgit_repo->url; |
169 | p = strchr(cmd + 1, '/'); | 167 | p = strchr(cmd + 1, '/'); |
170 | if (p) { | 168 | if (p) { |
171 | p[0] = '\0'; | 169 | p[0] = '\0'; |
172 | if (p[1]) | 170 | if (p[1]) |
173 | cgit_query_path = trim_end(p + 1, '/'); | 171 | cgit_query_path = trim_end(p + 1, '/'); |
174 | } | 172 | } |
175 | cgit_cmd = cgit_get_cmd_index(cmd + 1); | 173 | cgit_cmd = cgit_get_cmd_index(cmd + 1); |
176 | cgit_query_page = xstrdup(cmd + 1); | 174 | cgit_query_page = xstrdup(cmd + 1); |
177 | return; | 175 | return; |
178 | } | 176 | } |
179 | } | 177 | } |
180 | 178 | ||
181 | static char *iconv_msg(char *msg, const char *encoding) | ||
182 | { | ||
183 | iconv_t msg_conv = iconv_open(PAGE_ENCODING, encoding); | ||
184 | size_t inlen = strlen(msg); | ||
185 | char *in; | ||
186 | char *out; | ||
187 | size_t inleft; | ||
188 | size_t outleft; | ||
189 | char *buf; | ||
190 | char *ret; | ||
191 | size_t buf_sz; | ||
192 | int again, fail; | ||
193 | |||
194 | if(msg_conv == (iconv_t)-1) | ||
195 | return NULL; | ||
196 | |||
197 | buf_sz = inlen * 2; | ||
198 | buf = xmalloc(buf_sz+1); | ||
199 | do { | ||
200 | in = msg; | ||
201 | inleft = inlen; | ||
202 | |||
203 | out = buf; | ||
204 | outleft = buf_sz; | ||
205 | iconv(msg_conv, &in, &inleft, &out, &outleft); | ||
206 | |||
207 | if(inleft == 0) { | ||
208 | fail = 0; | ||
209 | again = 0; | ||
210 | } else if(inleft != 0 && errno == E2BIG) { | ||
211 | fail = 0; | ||
212 | again = 1; | ||
213 | |||
214 | buf_sz *= 2; | ||
215 | free(buf); | ||
216 | buf = xmalloc(buf_sz+1); | ||
217 | } else { | ||
218 | fail = 1; | ||
219 | again = 0; | ||
220 | } | ||
221 | } while(again && !fail); | ||
222 | |||
223 | if(fail) { | ||
224 | free(buf); | ||
225 | ret = NULL; | ||
226 | } else { | ||
227 | buf = xrealloc(buf, out - buf); | ||
228 | *out = 0; | ||
229 | ret = buf; | ||
230 | } | ||
231 | |||
232 | iconv_close(msg_conv); | ||
233 | |||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | char *substr(const char *head, const char *tail) | 179 | char *substr(const char *head, const char *tail) |
238 | { | 180 | { |
239 | char *buf; | 181 | char *buf; |
240 | 182 | ||
241 | buf = xmalloc(tail - head + 1); | 183 | buf = xmalloc(tail - head + 1); |
242 | strncpy(buf, head, tail - head); | 184 | strncpy(buf, head, tail - head); |
243 | buf[tail - head] = '\0'; | 185 | buf[tail - head] = '\0'; |
244 | return buf; | 186 | return buf; |
245 | } | 187 | } |
246 | 188 | ||
247 | struct commitinfo *cgit_parse_commit(struct commit *commit) | 189 | struct commitinfo *cgit_parse_commit(struct commit *commit) |
248 | { | 190 | { |
249 | struct commitinfo *ret; | 191 | struct commitinfo *ret; |
250 | char *p = commit->buffer, *t = commit->buffer; | 192 | char *p = commit->buffer, *t = commit->buffer; |
251 | 193 | ||
252 | ret = xmalloc(sizeof(*ret)); | 194 | ret = xmalloc(sizeof(*ret)); |
253 | ret->commit = commit; | 195 | ret->commit = commit; |
254 | ret->author = NULL; | 196 | ret->author = NULL; |
255 | ret->author_email = NULL; | 197 | ret->author_email = NULL; |
256 | ret->committer = NULL; | 198 | ret->committer = NULL; |
257 | ret->committer_email = NULL; | 199 | ret->committer_email = NULL; |
258 | ret->subject = NULL; | 200 | ret->subject = NULL; |
259 | ret->msg = NULL; | 201 | ret->msg = NULL; |
260 | ret->msg_encoding = NULL; | 202 | ret->msg_encoding = NULL; |
261 | 203 | ||
262 | if (p == NULL) | 204 | if (p == NULL) |
263 | return ret; | 205 | return ret; |
264 | 206 | ||
265 | if (strncmp(p, "tree ", 5)) | 207 | if (strncmp(p, "tree ", 5)) |
266 | die("Bad commit: %s", sha1_to_hex(commit->object.sha1)); | 208 | die("Bad commit: %s", sha1_to_hex(commit->object.sha1)); |
267 | else | 209 | else |
268 | p += 46; // "tree " + hex[40] + "\n" | 210 | p += 46; // "tree " + hex[40] + "\n" |
269 | 211 | ||
270 | while (!strncmp(p, "parent ", 7)) | 212 | while (!strncmp(p, "parent ", 7)) |
271 | p += 48; // "parent " + hex[40] + "\n" | 213 | p += 48; // "parent " + hex[40] + "\n" |
272 | 214 | ||
273 | if (!strncmp(p, "author ", 7)) { | 215 | if (!strncmp(p, "author ", 7)) { |
274 | p += 7; | 216 | p += 7; |
275 | t = strchr(p, '<') - 1; | 217 | t = strchr(p, '<') - 1; |
276 | ret->author = substr(p, t); | 218 | ret->author = substr(p, t); |
277 | p = t; | 219 | p = t; |
278 | t = strchr(t, '>') + 1; | 220 | t = strchr(t, '>') + 1; |
279 | ret->author_email = substr(p, t); | 221 | ret->author_email = substr(p, t); |
280 | ret->author_date = atol(++t); | 222 | ret->author_date = atol(++t); |
281 | p = strchr(t, '\n') + 1; | 223 | p = strchr(t, '\n') + 1; |
282 | } | 224 | } |
283 | 225 | ||
284 | if (!strncmp(p, "committer ", 9)) { | 226 | if (!strncmp(p, "committer ", 9)) { |
285 | p += 9; | 227 | p += 9; |
286 | t = strchr(p, '<') - 1; | 228 | t = strchr(p, '<') - 1; |
287 | ret->committer = substr(p, t); | 229 | ret->committer = substr(p, t); |
288 | p = t; | 230 | p = t; |
289 | t = strchr(t, '>') + 1; | 231 | t = strchr(t, '>') + 1; |
290 | ret->committer_email = substr(p, t); | 232 | ret->committer_email = substr(p, t); |
291 | ret->committer_date = atol(++t); | 233 | ret->committer_date = atol(++t); |
292 | p = strchr(t, '\n') + 1; | 234 | p = strchr(t, '\n') + 1; |
293 | } | 235 | } |
294 | 236 | ||
295 | if (!strncmp(p, "encoding ", 9)) { | 237 | if (!strncmp(p, "encoding ", 9)) { |
296 | p += 9; | 238 | p += 9; |
297 | t = strchr(p, '\n') + 1; | 239 | t = strchr(p, '\n') + 1; |
298 | ret->msg_encoding = substr(p, t); | 240 | ret->msg_encoding = substr(p, t); |
299 | p = t; | 241 | p = t; |
300 | } else | 242 | } else |
301 | ret->msg_encoding = xstrdup(PAGE_ENCODING); | 243 | ret->msg_encoding = xstrdup(PAGE_ENCODING); |
302 | 244 | ||
303 | while (*p && (*p != '\n')) | 245 | while (*p && (*p != '\n')) |
304 | p = strchr(p, '\n') + 1; // skip unknown header fields | 246 | p = strchr(p, '\n') + 1; // skip unknown header fields |
305 | 247 | ||
306 | while (*p == '\n') | 248 | while (*p == '\n') |
307 | p = strchr(p, '\n') + 1; | 249 | p = strchr(p, '\n') + 1; |
308 | 250 | ||
309 | t = strchr(p, '\n'); | 251 | t = strchr(p, '\n'); |
310 | if (t) { | 252 | if (t) { |
311 | if (*t == '\0') | 253 | if (*t == '\0') |
312 | ret->subject = "** empty **"; | 254 | ret->subject = "** empty **"; |
313 | else | 255 | else |
314 | ret->subject = substr(p, t); | 256 | ret->subject = substr(p, t); |
315 | p = t + 1; | 257 | p = t + 1; |
316 | 258 | ||
317 | while (*p == '\n') | 259 | while (*p == '\n') |
318 | p = strchr(p, '\n') + 1; | 260 | p = strchr(p, '\n') + 1; |
319 | ret->msg = xstrdup(p); | 261 | ret->msg = xstrdup(p); |
320 | } else | 262 | } else |
321 | ret->subject = substr(p, p+strlen(p)); | 263 | ret->subject = substr(p, p+strlen(p)); |
322 | 264 | ||
323 | if(strcmp(ret->msg_encoding, PAGE_ENCODING)) { | 265 | if(strcmp(ret->msg_encoding, PAGE_ENCODING)) { |
324 | t = iconv_msg(ret->subject, ret->msg_encoding); | 266 | t = reencode_string(ret->subject, PAGE_ENCODING, |
267 | ret->msg_encoding); | ||
325 | if(t) { | 268 | if(t) { |
326 | free(ret->subject); | 269 | free(ret->subject); |
327 | ret->subject = t; | 270 | ret->subject = t; |
328 | } | 271 | } |
329 | 272 | ||
330 | t = iconv_msg(ret->msg, ret->msg_encoding); | 273 | t = reencode_string(ret->msg, PAGE_ENCODING, |
274 | ret->msg_encoding); | ||
331 | if(t) { | 275 | if(t) { |
332 | free(ret->msg); | 276 | free(ret->msg); |
333 | ret->msg = t; | 277 | ret->msg = t; |
334 | } | 278 | } |
335 | } | 279 | } |
336 | 280 | ||
337 | return ret; | 281 | return ret; |
338 | } | 282 | } |
339 | 283 | ||
340 | 284 | ||
341 | struct taginfo *cgit_parse_tag(struct tag *tag) | 285 | struct taginfo *cgit_parse_tag(struct tag *tag) |
342 | { | 286 | { |
343 | void *data; | 287 | void *data; |
344 | enum object_type type; | 288 | enum object_type type; |
345 | unsigned long size; | 289 | unsigned long size; |
346 | char *p, *t; | 290 | char *p, *t; |
347 | struct taginfo *ret; | 291 | struct taginfo *ret; |
348 | 292 | ||
349 | data = read_sha1_file(tag->object.sha1, &type, &size); | 293 | data = read_sha1_file(tag->object.sha1, &type, &size); |
350 | if (!data || type != OBJ_TAG) { | 294 | if (!data || type != OBJ_TAG) { |
351 | free(data); | 295 | free(data); |
352 | return 0; | 296 | return 0; |
353 | } | 297 | } |
354 | 298 | ||
355 | ret = xmalloc(sizeof(*ret)); | 299 | ret = xmalloc(sizeof(*ret)); |
356 | ret->tagger = NULL; | 300 | ret->tagger = NULL; |
357 | ret->tagger_email = NULL; | 301 | ret->tagger_email = NULL; |
358 | ret->tagger_date = 0; | 302 | ret->tagger_date = 0; |
359 | ret->msg = NULL; | 303 | ret->msg = NULL; |
360 | 304 | ||
361 | p = data; | 305 | p = data; |
362 | 306 | ||
363 | while (p && *p) { | 307 | while (p && *p) { |
364 | if (*p == '\n') | 308 | if (*p == '\n') |
365 | break; | 309 | break; |
366 | 310 | ||
367 | if (!strncmp(p, "tagger ", 7)) { | 311 | if (!strncmp(p, "tagger ", 7)) { |
368 | p += 7; | 312 | p += 7; |
369 | t = strchr(p, '<') - 1; | 313 | t = strchr(p, '<') - 1; |
370 | ret->tagger = substr(p, t); | 314 | ret->tagger = substr(p, t); |
371 | p = t; | 315 | p = t; |
372 | t = strchr(t, '>') + 1; | 316 | t = strchr(t, '>') + 1; |
373 | ret->tagger_email = substr(p, t); | 317 | ret->tagger_email = substr(p, t); |
374 | ret->tagger_date = atol(++t); | 318 | ret->tagger_date = atol(++t); |
375 | } | 319 | } |
376 | p = strchr(p, '\n') + 1; | 320 | p = strchr(p, '\n') + 1; |
377 | } | 321 | } |
378 | 322 | ||
379 | while (p && *p && (*p != '\n')) | 323 | while (p && *p && (*p != '\n')) |
380 | p = strchr(p, '\n') + 1; // skip unknown tag fields | 324 | p = strchr(p, '\n') + 1; // skip unknown tag fields |
381 | 325 | ||
382 | while (p && (*p == '\n')) | 326 | while (p && (*p == '\n')) |
383 | p = strchr(p, '\n') + 1; | 327 | p = strchr(p, '\n') + 1; |
384 | if (p && *p) | 328 | if (p && *p) |
385 | ret->msg = xstrdup(p); | 329 | ret->msg = xstrdup(p); |
386 | free(data); | 330 | free(data); |
387 | return ret; | 331 | return ret; |
388 | } | 332 | } |