summaryrefslogtreecommitdiffabout
authorJulius Plenz <plenz@cis.fu-berlin.de>2011-03-10 16:03:23 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2011-03-26 14:21:07 (UTC)
commit0a799424f682071da9f5b632d1394308e9255bb5 (patch) (unidiff)
treecbc64c3d05d27cffb6c997b7ace6924838a829ff
parent5db02854e64fa41aa459ea7d13fc827063deda41 (diff)
downloadcgit-0a799424f682071da9f5b632d1394308e9255bb5.zip
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--parsing.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/parsing.c b/parsing.c
index f37c49d..c9e4350 100644
--- a/parsing.c
+++ b/parsing.c
@@ -103,13 +103,17 @@ const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
103{ 103{
104 char *tmp; 104 char *tmp;
105 105
106 if (!txt || !*txt || !src_enc || !dst_enc) 106 if (!txt || !*txt || !src_enc || !dst_enc)
107 return *txt; 107 return *txt;
108 108
109 tmp = reencode_string(*txt, src_enc, dst_enc); 109 /* no encoding needed if src_enc equals dst_enc */
110 if(!strcasecmp(src_enc, dst_enc))
111 return *txt;
112
113 tmp = reencode_string(*txt, dst_enc, src_enc);
110 if (tmp) { 114 if (tmp) {
111 free(*txt); 115 free(*txt);
112 *txt = tmp; 116 *txt = tmp;
113 } 117 }
114 return *txt; 118 return *txt;
115} 119}
@@ -157,12 +161,16 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
157 if (t) { 161 if (t) {
158 ret->msg_encoding = substr(p, t + 1); 162 ret->msg_encoding = substr(p, t + 1);
159 p = t + 1; 163 p = t + 1;
160 } 164 }
161 } 165 }
162 166
167 /* if no special encoding is found, assume UTF-8 */
168 if(!ret->msg_encoding)
169 ret->msg_encoding = xstrdup("UTF-8");
170
163 // skip unknown header fields 171 // skip unknown header fields
164 while (p && *p && (*p != '\n')) { 172 while (p && *p && (*p != '\n')) {
165 p = strchr(p, '\n'); 173 p = strchr(p, '\n');
166 if (p) 174 if (p)
167 p++; 175 p++;
168 } 176 }
@@ -186,20 +194,18 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
186 } 194 }
187 if (p) 195 if (p)
188 ret->msg = xstrdup(p); 196 ret->msg = xstrdup(p);
189 } else 197 } else
190 ret->subject = xstrdup(p); 198 ret->subject = xstrdup(p);
191 199
192 if (ret->msg_encoding) { 200 reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
193 reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); 201 reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
194 reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); 202 reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
195 reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); 203 reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
196 reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); 204 reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
197 reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); 205 reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
198 reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
199 }
200 206
201 return ret; 207 return ret;
202} 208}
203 209
204 210
205struct taginfo *cgit_parse_tag(struct tag *tag) 211struct taginfo *cgit_parse_tag(struct tag *tag)