author | Lars Hjemli <hjemli@gmail.com> | 2008-09-14 07:45:37 (UTC) |
---|---|---|
committer | Lars Hjemli <hjemli@gmail.com> | 2008-09-15 20:33:11 (UTC) |
commit | a8305a9543969206aa7cec03948c5a19950eedb9 (patch) (side-by-side diff) | |
tree | 8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502 | |
parent | b28765135dd6f52635977454eaf95d0e6c7e7271 (diff) | |
download | cgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2 |
parsing.c: be prepared for unexpected content in commit/tag objects
When parsing commits and tags cgit made too many assumptions about the
formatting of said objects. This patch tries to make the code be more
prepared to handle 'malformed' objects.
Signed-off-by: Lars Hjemli <hjemli@gmail.com>
-rw-r--r-- | cgit.h | 2 | ||||
-rw-r--r-- | parsing.c | 155 |
2 files changed, 95 insertions, 62 deletions
@@ -87,3 +87,3 @@ struct taginfo { char *tagger_email; - int tagger_date; + unsigned long tagger_date; char *msg; @@ -64,2 +64,51 @@ char *substr(const char *head, const char *tail) +char *parse_user(char *t, char **name, char **email, unsigned long *date) +{ + char *p = t; + int mode = 1; + + while (p && *p) { + if (mode == 1 && *p == '<') { + *name = substr(t, p - 1); + t = p; + mode++; + } else if (mode == 1 && *p == '\n') { + *name = substr(t, p); + p++; + break; + } else if (mode == 2 && *p == '>') { + *email = substr(t, p + 1); + t = p; + mode++; + } else if (mode == 2 && *p == '\n') { + *email = substr(t, p); + p++; + break; + } else if (mode == 3 && isdigit(*p)) { + *date = atol(p); + mode++; + } else if (*p == '\n') { + p++; + break; + } + p++; + } + return p; +} + +const char *reencode(char **txt, const char *src_enc, const char *dst_enc) +{ + char *tmp; + + if (!txt || !*txt || !src_enc || !dst_enc) + return *txt; + + tmp = reencode_string(*txt, src_enc, dst_enc); + if (tmp) { + free(*txt); + *txt = tmp; + } + return *txt; +} + struct commitinfo *cgit_parse_commit(struct commit *commit) @@ -90,37 +139,34 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) - if (!strncmp(p, "author ", 7)) { - p += 7; - t = strchr(p, '<') - 1; - ret->author = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->author_email = substr(p, t); - ret->author_date = atol(t+1); - p = strchr(t, '\n') + 1; + if (p && !strncmp(p, "author ", 7)) { + p = parse_user(p + 7, &ret->author, &ret->author_email, + &ret->author_date); } - if (!strncmp(p, "committer ", 9)) { - p += 9; - t = strchr(p, '<') - 1; - ret->committer = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->committer_email = substr(p, t); - ret->committer_date = atol(t+1); - p = strchr(t, '\n') + 1; + if (p && !strncmp(p, "committer ", 9)) { + p = parse_user(p + 9, &ret->committer, &ret->committer_email, + &ret->committer_date); } - if (!strncmp(p, "encoding ", 9)) { + if (p && !strncmp(p, "encoding ", 9)) { p += 9; - t = strchr(p, '\n') + 1; - ret->msg_encoding = substr(p, t); - p = t; - } else - ret->msg_encoding = xstrdup(PAGE_ENCODING); + t = strchr(p, '\n'); + if (t) { + ret->msg_encoding = substr(p, t + 1); + p = t + 1; + } + } + + // skip unknown header fields + while (p && *p && (*p != '\n')) { + p = strchr(p, '\n'); + if (p) + p++; + } - while (*p && (*p != '\n')) - p = strchr(p, '\n') + 1; // skip unknown header fields + // skip empty lines between headers and message + while (p && *p == '\n') + p++; - while (*p == '\n') - p = strchr(p, '\n') + 1; + if (!p) + return ret; @@ -128,5 +174,2 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) if (t) { - if (*t == '\0') - ret->subject = "** empty **"; - else ret->subject = substr(p, t); @@ -134,22 +177,15 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) - while (*p == '\n') - p = strchr(p, '\n') + 1; + while (p && *p == '\n') { + p = strchr(p, '\n'); + if (p) + p++; + } + if (p) ret->msg = xstrdup(p); } else - ret->subject = substr(p, p+strlen(p)); - - if(strcmp(ret->msg_encoding, PAGE_ENCODING)) { - t = reencode_string(ret->subject, PAGE_ENCODING, - ret->msg_encoding); - if(t) { - free(ret->subject); - ret->subject = t; - } + ret->subject = xstrdup(p); - t = reencode_string(ret->msg, PAGE_ENCODING, - ret->msg_encoding); - if(t) { - free(ret->msg); - ret->msg = t; - } + if (ret->msg_encoding) { + reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); + reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding); } @@ -165,3 +201,3 @@ struct taginfo *cgit_parse_tag(struct tag *tag) unsigned long size; - char *p, *t; + char *p; struct taginfo *ret; @@ -187,18 +223,15 @@ struct taginfo *cgit_parse_tag(struct tag *tag) if (!strncmp(p, "tagger ", 7)) { - p += 7; - t = strchr(p, '<') - 1; - ret->tagger = substr(p, t); - p = t; - t = strchr(t, '>') + 1; - ret->tagger_email = substr(p, t); - ret->tagger_date = atol(t+1); + p = parse_user(p + 7, &ret->tagger, &ret->tagger_email, + &ret->tagger_date); + } else { + p = strchr(p, '\n'); + if (p) + p++; } - p = strchr(p, '\n') + 1; } - while (p && *p && (*p != '\n')) - p = strchr(p, '\n') + 1; // skip unknown tag fields + // skip empty lines between headers and message + while (p && *p == '\n') + p++; - while (p && (*p == '\n')) - p = strchr(p, '\n') + 1; if (p && *p) |