summaryrefslogtreecommitdiffabout
path: root/parsing.c
authorJulius Plenz <plenz@cis.fu-berlin.de>2011-03-10 16:03:23 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2011-03-26 14:21:07 (UTC)
commit0a799424f682071da9f5b632d1394308e9255bb5 (patch) (side-by-side diff)
treecbc64c3d05d27cffb6c997b7ace6924838a829ff /parsing.c
parent5db02854e64fa41aa459ea7d13fc827063deda41 (diff)
downloadcgit-0a799424f682071da9f5b632d1394308e9255bb5.zip
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (limited to 'parsing.c') (more/less context) (ignore whitespace changes)
-rw-r--r--parsing.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/parsing.c b/parsing.c
index f37c49d..c9e4350 100644
--- a/parsing.c
+++ b/parsing.c
@@ -97,25 +97,29 @@ char *parse_user(char *t, char **name, char **email, unsigned long *date)
}
#ifdef NO_ICONV
#define reencode(a, b, c)
#else
const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
{
char *tmp;
if (!txt || !*txt || !src_enc || !dst_enc)
return *txt;
- tmp = reencode_string(*txt, src_enc, dst_enc);
+ /* no encoding needed if src_enc equals dst_enc */
+ if(!strcasecmp(src_enc, dst_enc))
+ return *txt;
+
+ tmp = reencode_string(*txt, dst_enc, src_enc);
if (tmp) {
free(*txt);
*txt = tmp;
}
return *txt;
}
#endif
struct commitinfo *cgit_parse_commit(struct commit *commit)
{
struct commitinfo *ret;
char *p = commit->buffer, *t = commit->buffer;
@@ -151,24 +155,28 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
&ret->committer_date);
}
if (p && !strncmp(p, "encoding ", 9)) {
p += 9;
t = strchr(p, '\n');
if (t) {
ret->msg_encoding = substr(p, t + 1);
p = t + 1;
}
}
+ /* if no special encoding is found, assume UTF-8 */
+ if(!ret->msg_encoding)
+ ret->msg_encoding = xstrdup("UTF-8");
+
// skip unknown header fields
while (p && *p && (*p != '\n')) {
p = strchr(p, '\n');
if (p)
p++;
}
// skip empty lines between headers and message
while (p && *p == '\n')
p++;
if (!p)
@@ -180,32 +188,30 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
p = t + 1;
while (p && *p == '\n') {
p = strchr(p, '\n');
if (p)
p++;
}
if (p)
ret->msg = xstrdup(p);
} else
ret->subject = xstrdup(p);
- if (ret->msg_encoding) {
- reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding);
- reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding);
- reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding);
- reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding);
- reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
- reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
- }
+ reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
+ reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
+ reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
+ reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
+ reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
+ reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
return ret;
}
struct taginfo *cgit_parse_tag(struct tag *tag)
{
void *data;
enum object_type type;
unsigned long size;
char *p;
struct taginfo *ret;