summaryrefslogtreecommitdiffabout
path: root/parsing.c
authorJulius Plenz <plenz@cis.fu-berlin.de>2011-03-10 16:03:23 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2011-03-26 14:21:07 (UTC)
commit0a799424f682071da9f5b632d1394308e9255bb5 (patch) (unidiff)
treecbc64c3d05d27cffb6c997b7ace6924838a829ff /parsing.c
parent5db02854e64fa41aa459ea7d13fc827063deda41 (diff)
downloadcgit-0a799424f682071da9f5b632d1394308e9255bb5.zip
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (limited to 'parsing.c') (more/less context) (ignore whitespace changes)
-rw-r--r--parsing.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/parsing.c b/parsing.c
index f37c49d..c9e4350 100644
--- a/parsing.c
+++ b/parsing.c
@@ -106,7 +106,11 @@ const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
106 if (!txt || !*txt || !src_enc || !dst_enc) 106 if (!txt || !*txt || !src_enc || !dst_enc)
107 return *txt; 107 return *txt;
108 108
109 tmp = reencode_string(*txt, src_enc, dst_enc); 109 /* no encoding needed if src_enc equals dst_enc */
110 if(!strcasecmp(src_enc, dst_enc))
111 return *txt;
112
113 tmp = reencode_string(*txt, dst_enc, src_enc);
110 if (tmp) { 114 if (tmp) {
111 free(*txt); 115 free(*txt);
112 *txt = tmp; 116 *txt = tmp;
@@ -160,6 +164,10 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
160 } 164 }
161 } 165 }
162 166
167 /* if no special encoding is found, assume UTF-8 */
168 if(!ret->msg_encoding)
169 ret->msg_encoding = xstrdup("UTF-8");
170
163 // skip unknown header fields 171 // skip unknown header fields
164 while (p && *p && (*p != '\n')) { 172 while (p && *p && (*p != '\n')) {
165 p = strchr(p, '\n'); 173 p = strchr(p, '\n');
@@ -189,14 +197,12 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
189 } else 197 } else
190 ret->subject = xstrdup(p); 198 ret->subject = xstrdup(p);
191 199
192 if (ret->msg_encoding) { 200 reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
193 reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); 201 reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
194 reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); 202 reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
195 reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); 203 reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
196 reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); 204 reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
197 reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); 205 reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
198 reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
199 }
200 206
201 return ret; 207 return ret;
202} 208}