author | Lars Hjemli <hjemli@gmail.com> | 2008-09-14 07:45:37 (UTC) |
---|---|---|
committer | Lars Hjemli <hjemli@gmail.com> | 2008-09-15 20:33:11 (UTC) |
commit | a8305a9543969206aa7cec03948c5a19950eedb9 (patch) (unidiff) | |
tree | 8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502 /parsing.c | |
parent | b28765135dd6f52635977454eaf95d0e6c7e7271 (diff) | |
download | cgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2 |
parsing.c: be prepared for unexpected content in commit/tag objects
When parsing commits and tags cgit made too many assumptions about the
formatting of said objects. This patch tries to make the code be more
prepared to handle 'malformed' objects.
Signed-off-by: Lars Hjemli <hjemli@gmail.com>
-rw-r--r-- | parsing.c | 159 |
1 files changed, 96 insertions, 63 deletions
@@ -1,208 +1,241 @@ | |||
1 | /* config.c: parsing of config files | 1 | /* config.c: parsing of config files |
2 | * | 2 | * |
3 | * Copyright (C) 2006 Lars Hjemli | 3 | * Copyright (C) 2006 Lars Hjemli |
4 | * | 4 | * |
5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include "cgit.h" | 9 | #include "cgit.h" |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * url syntax: [repo ['/' cmd [ '/' path]]] | 12 | * url syntax: [repo ['/' cmd [ '/' path]]] |
13 | * repo: any valid repo url, may contain '/' | 13 | * repo: any valid repo url, may contain '/' |
14 | * cmd: log | commit | diff | tree | view | blob | snapshot | 14 | * cmd: log | commit | diff | tree | view | blob | snapshot |
15 | * path: any valid path, may contain '/' | 15 | * path: any valid path, may contain '/' |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | void cgit_parse_url(const char *url) | 18 | void cgit_parse_url(const char *url) |
19 | { | 19 | { |
20 | char *cmd, *p; | 20 | char *cmd, *p; |
21 | 21 | ||
22 | ctx.repo = NULL; | 22 | ctx.repo = NULL; |
23 | if (!url || url[0] == '\0') | 23 | if (!url || url[0] == '\0') |
24 | return; | 24 | return; |
25 | 25 | ||
26 | ctx.repo = cgit_get_repoinfo(url); | 26 | ctx.repo = cgit_get_repoinfo(url); |
27 | if (ctx.repo) { | 27 | if (ctx.repo) { |
28 | ctx.qry.repo = ctx.repo->url; | 28 | ctx.qry.repo = ctx.repo->url; |
29 | return; | 29 | return; |
30 | } | 30 | } |
31 | 31 | ||
32 | cmd = strchr(url, '/'); | 32 | cmd = strchr(url, '/'); |
33 | while (!ctx.repo && cmd) { | 33 | while (!ctx.repo && cmd) { |
34 | cmd[0] = '\0'; | 34 | cmd[0] = '\0'; |
35 | ctx.repo = cgit_get_repoinfo(url); | 35 | ctx.repo = cgit_get_repoinfo(url); |
36 | if (ctx.repo == NULL) { | 36 | if (ctx.repo == NULL) { |
37 | cmd[0] = '/'; | 37 | cmd[0] = '/'; |
38 | cmd = strchr(cmd + 1, '/'); | 38 | cmd = strchr(cmd + 1, '/'); |
39 | continue; | 39 | continue; |
40 | } | 40 | } |
41 | 41 | ||
42 | ctx.qry.repo = ctx.repo->url; | 42 | ctx.qry.repo = ctx.repo->url; |
43 | p = strchr(cmd + 1, '/'); | 43 | p = strchr(cmd + 1, '/'); |
44 | if (p) { | 44 | if (p) { |
45 | p[0] = '\0'; | 45 | p[0] = '\0'; |
46 | if (p[1]) | 46 | if (p[1]) |
47 | ctx.qry.path = trim_end(p + 1, '/'); | 47 | ctx.qry.path = trim_end(p + 1, '/'); |
48 | } | 48 | } |
49 | if (cmd[1]) | 49 | if (cmd[1]) |
50 | ctx.qry.page = xstrdup(cmd + 1); | 50 | ctx.qry.page = xstrdup(cmd + 1); |
51 | return; | 51 | return; |
52 | } | 52 | } |
53 | } | 53 | } |
54 | 54 | ||
55 | char *substr(const char *head, const char *tail) | 55 | char *substr(const char *head, const char *tail) |
56 | { | 56 | { |
57 | char *buf; | 57 | char *buf; |
58 | 58 | ||
59 | buf = xmalloc(tail - head + 1); | 59 | buf = xmalloc(tail - head + 1); |
60 | strncpy(buf, head, tail - head); | 60 | strncpy(buf, head, tail - head); |
61 | buf[tail - head] = '\0'; | 61 | buf[tail - head] = '\0'; |
62 | return buf; | 62 | return buf; |
63 | } | 63 | } |
64 | 64 | ||
65 | char *parse_user(char *t, char **name, char **email, unsigned long *date) | ||
66 | { | ||
67 | char *p = t; | ||
68 | int mode = 1; | ||
69 | |||
70 | while (p && *p) { | ||
71 | if (mode == 1 && *p == '<') { | ||
72 | *name = substr(t, p - 1); | ||
73 | t = p; | ||
74 | mode++; | ||
75 | } else if (mode == 1 && *p == '\n') { | ||
76 | *name = substr(t, p); | ||
77 | p++; | ||
78 | break; | ||
79 | } else if (mode == 2 && *p == '>') { | ||
80 | *email = substr(t, p + 1); | ||
81 | t = p; | ||
82 | mode++; | ||
83 | } else if (mode == 2 && *p == '\n') { | ||
84 | *email = substr(t, p); | ||
85 | p++; | ||
86 | break; | ||
87 | } else if (mode == 3 && isdigit(*p)) { | ||
88 | *date = atol(p); | ||
89 | mode++; | ||
90 | } else if (*p == '\n') { | ||
91 | p++; | ||
92 | break; | ||
93 | } | ||
94 | p++; | ||
95 | } | ||
96 | return p; | ||
97 | } | ||
98 | |||
99 | const char *reencode(char **txt, const char *src_enc, const char *dst_enc) | ||
100 | { | ||
101 | char *tmp; | ||
102 | |||
103 | if (!txt || !*txt || !src_enc || !dst_enc) | ||
104 | return *txt; | ||
105 | |||
106 | tmp = reencode_string(*txt, src_enc, dst_enc); | ||
107 | if (tmp) { | ||
108 | free(*txt); | ||
109 | *txt = tmp; | ||
110 | } | ||
111 | return *txt; | ||
112 | } | ||
113 | |||
65 | struct commitinfo *cgit_parse_commit(struct commit *commit) | 114 | struct commitinfo *cgit_parse_commit(struct commit *commit) |
66 | { | 115 | { |
67 | struct commitinfo *ret; | 116 | struct commitinfo *ret; |
68 | char *p = commit->buffer, *t = commit->buffer; | 117 | char *p = commit->buffer, *t = commit->buffer; |
69 | 118 | ||
70 | ret = xmalloc(sizeof(*ret)); | 119 | ret = xmalloc(sizeof(*ret)); |
71 | ret->commit = commit; | 120 | ret->commit = commit; |
72 | ret->author = NULL; | 121 | ret->author = NULL; |
73 | ret->author_email = NULL; | 122 | ret->author_email = NULL; |
74 | ret->committer = NULL; | 123 | ret->committer = NULL; |
75 | ret->committer_email = NULL; | 124 | ret->committer_email = NULL; |
76 | ret->subject = NULL; | 125 | ret->subject = NULL; |
77 | ret->msg = NULL; | 126 | ret->msg = NULL; |
78 | ret->msg_encoding = NULL; | 127 | ret->msg_encoding = NULL; |
79 | 128 | ||
80 | if (p == NULL) | 129 | if (p == NULL) |
81 | return ret; | 130 | return ret; |
82 | 131 | ||
83 | if (strncmp(p, "tree ", 5)) | 132 | if (strncmp(p, "tree ", 5)) |
84 | die("Bad commit: %s", sha1_to_hex(commit->object.sha1)); | 133 | die("Bad commit: %s", sha1_to_hex(commit->object.sha1)); |
85 | else | 134 | else |
86 | p += 46; // "tree " + hex[40] + "\n" | 135 | p += 46; // "tree " + hex[40] + "\n" |
87 | 136 | ||
88 | while (!strncmp(p, "parent ", 7)) | 137 | while (!strncmp(p, "parent ", 7)) |
89 | p += 48; // "parent " + hex[40] + "\n" | 138 | p += 48; // "parent " + hex[40] + "\n" |
90 | 139 | ||
91 | if (!strncmp(p, "author ", 7)) { | 140 | if (p && !strncmp(p, "author ", 7)) { |
92 | p += 7; | 141 | p = parse_user(p + 7, &ret->author, &ret->author_email, |
93 | t = strchr(p, '<') - 1; | 142 | &ret->author_date); |
94 | ret->author = substr(p, t); | ||
95 | p = t; | ||
96 | t = strchr(t, '>') + 1; | ||
97 | ret->author_email = substr(p, t); | ||
98 | ret->author_date = atol(t+1); | ||
99 | p = strchr(t, '\n') + 1; | ||
100 | } | 143 | } |
101 | 144 | ||
102 | if (!strncmp(p, "committer ", 9)) { | 145 | if (p && !strncmp(p, "committer ", 9)) { |
103 | p += 9; | 146 | p = parse_user(p + 9, &ret->committer, &ret->committer_email, |
104 | t = strchr(p, '<') - 1; | 147 | &ret->committer_date); |
105 | ret->committer = substr(p, t); | ||
106 | p = t; | ||
107 | t = strchr(t, '>') + 1; | ||
108 | ret->committer_email = substr(p, t); | ||
109 | ret->committer_date = atol(t+1); | ||
110 | p = strchr(t, '\n') + 1; | ||
111 | } | 148 | } |
112 | 149 | ||
113 | if (!strncmp(p, "encoding ", 9)) { | 150 | if (p && !strncmp(p, "encoding ", 9)) { |
114 | p += 9; | 151 | p += 9; |
115 | t = strchr(p, '\n') + 1; | 152 | t = strchr(p, '\n'); |
116 | ret->msg_encoding = substr(p, t); | 153 | if (t) { |
117 | p = t; | 154 | ret->msg_encoding = substr(p, t + 1); |
118 | } else | 155 | p = t + 1; |
119 | ret->msg_encoding = xstrdup(PAGE_ENCODING); | 156 | } |
157 | } | ||
120 | 158 | ||
121 | while (*p && (*p != '\n')) | 159 | // skip unknown header fields |
122 | p = strchr(p, '\n') + 1; // skip unknown header fields | 160 | while (p && *p && (*p != '\n')) { |
161 | p = strchr(p, '\n'); | ||
162 | if (p) | ||
163 | p++; | ||
164 | } | ||
123 | 165 | ||
124 | while (*p == '\n') | 166 | // skip empty lines between headers and message |
125 | p = strchr(p, '\n') + 1; | 167 | while (p && *p == '\n') |
168 | p++; | ||
169 | |||
170 | if (!p) | ||
171 | return ret; | ||
126 | 172 | ||
127 | t = strchr(p, '\n'); | 173 | t = strchr(p, '\n'); |
128 | if (t) { | 174 | if (t) { |
129 | if (*t == '\0') | 175 | ret->subject = substr(p, t); |
130 | ret->subject = "** empty **"; | ||
131 | else | ||
132 | ret->subject = substr(p, t); | ||
133 | p = t + 1; | 176 | p = t + 1; |
134 | 177 | ||
135 | while (*p == '\n') | 178 | while (p && *p == '\n') { |
136 | p = strchr(p, '\n') + 1; | 179 | p = strchr(p, '\n'); |
137 | ret->msg = xstrdup(p); | 180 | if (p) |
138 | } else | 181 | p++; |
139 | ret->subject = substr(p, p+strlen(p)); | ||
140 | |||
141 | if(strcmp(ret->msg_encoding, PAGE_ENCODING)) { | ||
142 | t = reencode_string(ret->subject, PAGE_ENCODING, | ||
143 | ret->msg_encoding); | ||
144 | if(t) { | ||
145 | free(ret->subject); | ||
146 | ret->subject = t; | ||
147 | } | 182 | } |
183 | if (p) | ||
184 | ret->msg = xstrdup(p); | ||
185 | } else | ||
186 | ret->subject = xstrdup(p); | ||
148 | 187 | ||
149 | t = reencode_string(ret->msg, PAGE_ENCODING, | 188 | if (ret->msg_encoding) { |
150 | ret->msg_encoding); | 189 | reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); |
151 | if(t) { | 190 | reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding); |
152 | free(ret->msg); | ||
153 | ret->msg = t; | ||
154 | } | ||
155 | } | 191 | } |
156 | 192 | ||
157 | return ret; | 193 | return ret; |
158 | } | 194 | } |
159 | 195 | ||
160 | 196 | ||
161 | struct taginfo *cgit_parse_tag(struct tag *tag) | 197 | struct taginfo *cgit_parse_tag(struct tag *tag) |
162 | { | 198 | { |
163 | void *data; | 199 | void *data; |
164 | enum object_type type; | 200 | enum object_type type; |
165 | unsigned long size; | 201 | unsigned long size; |
166 | char *p, *t; | 202 | char *p; |
167 | struct taginfo *ret; | 203 | struct taginfo *ret; |
168 | 204 | ||
169 | data = read_sha1_file(tag->object.sha1, &type, &size); | 205 | data = read_sha1_file(tag->object.sha1, &type, &size); |
170 | if (!data || type != OBJ_TAG) { | 206 | if (!data || type != OBJ_TAG) { |
171 | free(data); | 207 | free(data); |
172 | return 0; | 208 | return 0; |
173 | } | 209 | } |
174 | 210 | ||
175 | ret = xmalloc(sizeof(*ret)); | 211 | ret = xmalloc(sizeof(*ret)); |
176 | ret->tagger = NULL; | 212 | ret->tagger = NULL; |
177 | ret->tagger_email = NULL; | 213 | ret->tagger_email = NULL; |
178 | ret->tagger_date = 0; | 214 | ret->tagger_date = 0; |
179 | ret->msg = NULL; | 215 | ret->msg = NULL; |
180 | 216 | ||
181 | p = data; | 217 | p = data; |
182 | 218 | ||
183 | while (p && *p) { | 219 | while (p && *p) { |
184 | if (*p == '\n') | 220 | if (*p == '\n') |
185 | break; | 221 | break; |
186 | 222 | ||
187 | if (!strncmp(p, "tagger ", 7)) { | 223 | if (!strncmp(p, "tagger ", 7)) { |
188 | p += 7; | 224 | p = parse_user(p + 7, &ret->tagger, &ret->tagger_email, |
189 | t = strchr(p, '<') - 1; | 225 | &ret->tagger_date); |
190 | ret->tagger = substr(p, t); | 226 | } else { |
191 | p = t; | 227 | p = strchr(p, '\n'); |
192 | t = strchr(t, '>') + 1; | 228 | if (p) |
193 | ret->tagger_email = substr(p, t); | 229 | p++; |
194 | ret->tagger_date = atol(t+1); | ||
195 | } | 230 | } |
196 | p = strchr(p, '\n') + 1; | ||
197 | } | 231 | } |
198 | 232 | ||
199 | while (p && *p && (*p != '\n')) | 233 | // skip empty lines between headers and message |
200 | p = strchr(p, '\n') + 1; // skip unknown tag fields | 234 | while (p && *p == '\n') |
235 | p++; | ||
201 | 236 | ||
202 | while (p && (*p == '\n')) | ||
203 | p = strchr(p, '\n') + 1; | ||
204 | if (p && *p) | 237 | if (p && *p) |
205 | ret->msg = xstrdup(p); | 238 | ret->msg = xstrdup(p); |
206 | free(data); | 239 | free(data); |
207 | return ret; | 240 | return ret; |
208 | } | 241 | } |