summaryrefslogtreecommitdiffabout
authorLars Hjemli <hjemli@gmail.com>2008-09-14 07:45:37 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2008-09-15 20:33:11 (UTC)
commita8305a9543969206aa7cec03948c5a19950eedb9 (patch) (unidiff)
tree8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502
parentb28765135dd6f52635977454eaf95d0e6c7e7271 (diff)
downloadcgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2
parsing.c: be prepared for unexpected content in commit/tag objects
When parsing commits and tags cgit made too many assumptions about the formatting of said objects. This patch tries to make the code be more prepared to handle 'malformed' objects. Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--cgit.h2
-rw-r--r--parsing.c159
2 files changed, 97 insertions, 64 deletions
diff --git a/cgit.h b/cgit.h
index 1615616..08fd95a 100644
--- a/cgit.h
+++ b/cgit.h
@@ -85,7 +85,7 @@ struct commitinfo {
85struct taginfo { 85struct taginfo {
86 char *tagger; 86 char *tagger;
87 char *tagger_email; 87 char *tagger_email;
88 int tagger_date; 88 unsigned long tagger_date;
89 char *msg; 89 char *msg;
90}; 90};
91 91
diff --git a/parsing.c b/parsing.c
index 66e8b3d..c8f3048 100644
--- a/parsing.c
+++ b/parsing.c
@@ -62,6 +62,55 @@ char *substr(const char *head, const char *tail)
62 return buf; 62 return buf;
63} 63}
64 64
65char *parse_user(char *t, char **name, char **email, unsigned long *date)
66{
67 char *p = t;
68 int mode = 1;
69
70 while (p && *p) {
71 if (mode == 1 && *p == '<') {
72 *name = substr(t, p - 1);
73 t = p;
74 mode++;
75 } else if (mode == 1 && *p == '\n') {
76 *name = substr(t, p);
77 p++;
78 break;
79 } else if (mode == 2 && *p == '>') {
80 *email = substr(t, p + 1);
81 t = p;
82 mode++;
83 } else if (mode == 2 && *p == '\n') {
84 *email = substr(t, p);
85 p++;
86 break;
87 } else if (mode == 3 && isdigit(*p)) {
88 *date = atol(p);
89 mode++;
90 } else if (*p == '\n') {
91 p++;
92 break;
93 }
94 p++;
95 }
96 return p;
97}
98
99const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
100{
101 char *tmp;
102
103 if (!txt || !*txt || !src_enc || !dst_enc)
104 return *txt;
105
106 tmp = reencode_string(*txt, src_enc, dst_enc);
107 if (tmp) {
108 free(*txt);
109 *txt = tmp;
110 }
111 return *txt;
112}
113
65struct commitinfo *cgit_parse_commit(struct commit *commit) 114struct commitinfo *cgit_parse_commit(struct commit *commit)
66{ 115{
67 struct commitinfo *ret; 116 struct commitinfo *ret;
@@ -88,70 +137,57 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
88 while (!strncmp(p, "parent ", 7)) 137 while (!strncmp(p, "parent ", 7))
89 p += 48; // "parent " + hex[40] + "\n" 138 p += 48; // "parent " + hex[40] + "\n"
90 139
91 if (!strncmp(p, "author ", 7)) { 140 if (p && !strncmp(p, "author ", 7)) {
92 p += 7; 141 p = parse_user(p + 7, &ret->author, &ret->author_email,
93 t = strchr(p, '<') - 1; 142 &ret->author_date);
94 ret->author = substr(p, t);
95 p = t;
96 t = strchr(t, '>') + 1;
97 ret->author_email = substr(p, t);
98 ret->author_date = atol(t+1);
99 p = strchr(t, '\n') + 1;
100 } 143 }
101 144
102 if (!strncmp(p, "committer ", 9)) { 145 if (p && !strncmp(p, "committer ", 9)) {
103 p += 9; 146 p = parse_user(p + 9, &ret->committer, &ret->committer_email,
104 t = strchr(p, '<') - 1; 147 &ret->committer_date);
105 ret->committer = substr(p, t);
106 p = t;
107 t = strchr(t, '>') + 1;
108 ret->committer_email = substr(p, t);
109 ret->committer_date = atol(t+1);
110 p = strchr(t, '\n') + 1;
111 } 148 }
112 149
113 if (!strncmp(p, "encoding ", 9)) { 150 if (p && !strncmp(p, "encoding ", 9)) {
114 p += 9; 151 p += 9;
115 t = strchr(p, '\n') + 1; 152 t = strchr(p, '\n');
116 ret->msg_encoding = substr(p, t); 153 if (t) {
117 p = t; 154 ret->msg_encoding = substr(p, t + 1);
118 } else 155 p = t + 1;
119 ret->msg_encoding = xstrdup(PAGE_ENCODING); 156 }
157 }
120 158
121 while (*p && (*p != '\n')) 159 // skip unknown header fields
122 p = strchr(p, '\n') + 1; // skip unknown header fields 160 while (p && *p && (*p != '\n')) {
161 p = strchr(p, '\n');
162 if (p)
163 p++;
164 }
123 165
124 while (*p == '\n') 166 // skip empty lines between headers and message
125 p = strchr(p, '\n') + 1; 167 while (p && *p == '\n')
168 p++;
169
170 if (!p)
171 return ret;
126 172
127 t = strchr(p, '\n'); 173 t = strchr(p, '\n');
128 if (t) { 174 if (t) {
129 if (*t == '\0') 175 ret->subject = substr(p, t);
130 ret->subject = "** empty **";
131 else
132 ret->subject = substr(p, t);
133 p = t + 1; 176 p = t + 1;
134 177
135 while (*p == '\n') 178 while (p && *p == '\n') {
136 p = strchr(p, '\n') + 1; 179 p = strchr(p, '\n');
137 ret->msg = xstrdup(p); 180 if (p)
138 } else 181 p++;
139 ret->subject = substr(p, p+strlen(p));
140
141 if(strcmp(ret->msg_encoding, PAGE_ENCODING)) {
142 t = reencode_string(ret->subject, PAGE_ENCODING,
143 ret->msg_encoding);
144 if(t) {
145 free(ret->subject);
146 ret->subject = t;
147 } 182 }
183 if (p)
184 ret->msg = xstrdup(p);
185 } else
186 ret->subject = xstrdup(p);
148 187
149 t = reencode_string(ret->msg, PAGE_ENCODING, 188 if (ret->msg_encoding) {
150 ret->msg_encoding); 189 reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
151 if(t) { 190 reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
152 free(ret->msg);
153 ret->msg = t;
154 }
155 } 191 }
156 192
157 return ret; 193 return ret;
@@ -163,7 +199,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
163 void *data; 199 void *data;
164 enum object_type type; 200 enum object_type type;
165 unsigned long size; 201 unsigned long size;
166 char *p, *t; 202 char *p;
167 struct taginfo *ret; 203 struct taginfo *ret;
168 204
169 data = read_sha1_file(tag->object.sha1, &type, &size); 205 data = read_sha1_file(tag->object.sha1, &type, &size);
@@ -185,22 +221,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
185 break; 221 break;
186 222
187 if (!strncmp(p, "tagger ", 7)) { 223 if (!strncmp(p, "tagger ", 7)) {
188 p += 7; 224 p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
189 t = strchr(p, '<') - 1; 225 &ret->tagger_date);
190 ret->tagger = substr(p, t); 226 } else {
191 p = t; 227 p = strchr(p, '\n');
192 t = strchr(t, '>') + 1; 228 if (p)
193 ret->tagger_email = substr(p, t); 229 p++;
194 ret->tagger_date = atol(t+1);
195 } 230 }
196 p = strchr(p, '\n') + 1;
197 } 231 }
198 232
199 while (p && *p && (*p != '\n')) 233 // skip empty lines between headers and message
200 p = strchr(p, '\n') + 1; // skip unknown tag fields 234 while (p && *p == '\n')
235 p++;
201 236
202 while (p && (*p == '\n'))
203 p = strchr(p, '\n') + 1;
204 if (p && *p) 237 if (p && *p)
205 ret->msg = xstrdup(p); 238 ret->msg = xstrdup(p);
206 free(data); 239 free(data);