summaryrefslogtreecommitdiffabout
authorLars Hjemli <hjemli@gmail.com>2008-09-14 07:45:37 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2008-09-15 20:33:11 (UTC)
commita8305a9543969206aa7cec03948c5a19950eedb9 (patch) (side-by-side diff)
tree8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502
parentb28765135dd6f52635977454eaf95d0e6c7e7271 (diff)
downloadcgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2
parsing.c: be prepared for unexpected content in commit/tag objects
When parsing commits and tags cgit made too many assumptions about the formatting of said objects. This patch tries to make the code be more prepared to handle 'malformed' objects. Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--cgit.h2
-rw-r--r--parsing.c155
2 files changed, 95 insertions, 62 deletions
diff --git a/cgit.h b/cgit.h
index 1615616..08fd95a 100644
--- a/cgit.h
+++ b/cgit.h
@@ -85,7 +85,7 @@ struct commitinfo {
struct taginfo {
char *tagger;
char *tagger_email;
- int tagger_date;
+ unsigned long tagger_date;
char *msg;
};
diff --git a/parsing.c b/parsing.c
index 66e8b3d..c8f3048 100644
--- a/parsing.c
+++ b/parsing.c
@@ -62,6 +62,55 @@ char *substr(const char *head, const char *tail)
return buf;
}
+char *parse_user(char *t, char **name, char **email, unsigned long *date)
+{
+ char *p = t;
+ int mode = 1;
+
+ while (p && *p) {
+ if (mode == 1 && *p == '<') {
+ *name = substr(t, p - 1);
+ t = p;
+ mode++;
+ } else if (mode == 1 && *p == '\n') {
+ *name = substr(t, p);
+ p++;
+ break;
+ } else if (mode == 2 && *p == '>') {
+ *email = substr(t, p + 1);
+ t = p;
+ mode++;
+ } else if (mode == 2 && *p == '\n') {
+ *email = substr(t, p);
+ p++;
+ break;
+ } else if (mode == 3 && isdigit(*p)) {
+ *date = atol(p);
+ mode++;
+ } else if (*p == '\n') {
+ p++;
+ break;
+ }
+ p++;
+ }
+ return p;
+}
+
+const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
+{
+ char *tmp;
+
+ if (!txt || !*txt || !src_enc || !dst_enc)
+ return *txt;
+
+ tmp = reencode_string(*txt, src_enc, dst_enc);
+ if (tmp) {
+ free(*txt);
+ *txt = tmp;
+ }
+ return *txt;
+}
+
struct commitinfo *cgit_parse_commit(struct commit *commit)
{
struct commitinfo *ret;
@@ -88,70 +137,57 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
while (!strncmp(p, "parent ", 7))
p += 48; // "parent " + hex[40] + "\n"
- if (!strncmp(p, "author ", 7)) {
- p += 7;
- t = strchr(p, '<') - 1;
- ret->author = substr(p, t);
- p = t;
- t = strchr(t, '>') + 1;
- ret->author_email = substr(p, t);
- ret->author_date = atol(t+1);
- p = strchr(t, '\n') + 1;
+ if (p && !strncmp(p, "author ", 7)) {
+ p = parse_user(p + 7, &ret->author, &ret->author_email,
+ &ret->author_date);
}
- if (!strncmp(p, "committer ", 9)) {
- p += 9;
- t = strchr(p, '<') - 1;
- ret->committer = substr(p, t);
- p = t;
- t = strchr(t, '>') + 1;
- ret->committer_email = substr(p, t);
- ret->committer_date = atol(t+1);
- p = strchr(t, '\n') + 1;
+ if (p && !strncmp(p, "committer ", 9)) {
+ p = parse_user(p + 9, &ret->committer, &ret->committer_email,
+ &ret->committer_date);
}
- if (!strncmp(p, "encoding ", 9)) {
+ if (p && !strncmp(p, "encoding ", 9)) {
p += 9;
- t = strchr(p, '\n') + 1;
- ret->msg_encoding = substr(p, t);
- p = t;
- } else
- ret->msg_encoding = xstrdup(PAGE_ENCODING);
+ t = strchr(p, '\n');
+ if (t) {
+ ret->msg_encoding = substr(p, t + 1);
+ p = t + 1;
+ }
+ }
+
+ // skip unknown header fields
+ while (p && *p && (*p != '\n')) {
+ p = strchr(p, '\n');
+ if (p)
+ p++;
+ }
- while (*p && (*p != '\n'))
- p = strchr(p, '\n') + 1; // skip unknown header fields
+ // skip empty lines between headers and message
+ while (p && *p == '\n')
+ p++;
- while (*p == '\n')
- p = strchr(p, '\n') + 1;
+ if (!p)
+ return ret;
t = strchr(p, '\n');
if (t) {
- if (*t == '\0')
- ret->subject = "** empty **";
- else
ret->subject = substr(p, t);
p = t + 1;
- while (*p == '\n')
- p = strchr(p, '\n') + 1;
+ while (p && *p == '\n') {
+ p = strchr(p, '\n');
+ if (p)
+ p++;
+ }
+ if (p)
ret->msg = xstrdup(p);
} else
- ret->subject = substr(p, p+strlen(p));
-
- if(strcmp(ret->msg_encoding, PAGE_ENCODING)) {
- t = reencode_string(ret->subject, PAGE_ENCODING,
- ret->msg_encoding);
- if(t) {
- free(ret->subject);
- ret->subject = t;
- }
+ ret->subject = xstrdup(p);
- t = reencode_string(ret->msg, PAGE_ENCODING,
- ret->msg_encoding);
- if(t) {
- free(ret->msg);
- ret->msg = t;
- }
+ if (ret->msg_encoding) {
+ reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
+ reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
}
return ret;
@@ -163,7 +199,7 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
void *data;
enum object_type type;
unsigned long size;
- char *p, *t;
+ char *p;
struct taginfo *ret;
data = read_sha1_file(tag->object.sha1, &type, &size);
@@ -185,22 +221,19 @@ struct taginfo *cgit_parse_tag(struct tag *tag)
break;
if (!strncmp(p, "tagger ", 7)) {
- p += 7;
- t = strchr(p, '<') - 1;
- ret->tagger = substr(p, t);
- p = t;
- t = strchr(t, '>') + 1;
- ret->tagger_email = substr(p, t);
- ret->tagger_date = atol(t+1);
+ p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+ &ret->tagger_date);
+ } else {
+ p = strchr(p, '\n');
+ if (p)
+ p++;
}
- p = strchr(p, '\n') + 1;
}
- while (p && *p && (*p != '\n'))
- p = strchr(p, '\n') + 1; // skip unknown tag fields
+ // skip empty lines between headers and message
+ while (p && *p == '\n')
+ p++;
- while (p && (*p == '\n'))
- p = strchr(p, '\n') + 1;
if (p && *p)
ret->msg = xstrdup(p);
free(data);