summaryrefslogtreecommitdiffabout
authorJulius Plenz <plenz@cis.fu-berlin.de>2011-03-10 16:03:23 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2011-03-26 14:21:07 (UTC)
commit0a799424f682071da9f5b632d1394308e9255bb5 (patch) (unidiff)
treecbc64c3d05d27cffb6c997b7ace6924838a829ff
parent5db02854e64fa41aa459ea7d13fc827063deda41 (diff)
downloadcgit-0a799424f682071da9f5b632d1394308e9255bb5.zip
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--parsing.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/parsing.c b/parsing.c
index f37c49d..c9e4350 100644
--- a/parsing.c
+++ b/parsing.c
@@ -97,25 +97,29 @@ char *parse_user(char *t, char **name, char **email, unsigned long *date)
97} 97}
98 98
99#ifdef NO_ICONV 99#ifdef NO_ICONV
100#define reencode(a, b, c) 100#define reencode(a, b, c)
101#else 101#else
102const char *reencode(char **txt, const char *src_enc, const char *dst_enc) 102const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
103{ 103{
104 char *tmp; 104 char *tmp;
105 105
106 if (!txt || !*txt || !src_enc || !dst_enc) 106 if (!txt || !*txt || !src_enc || !dst_enc)
107 return *txt; 107 return *txt;
108 108
109 tmp = reencode_string(*txt, src_enc, dst_enc); 109 /* no encoding needed if src_enc equals dst_enc */
110 if(!strcasecmp(src_enc, dst_enc))
111 return *txt;
112
113 tmp = reencode_string(*txt, dst_enc, src_enc);
110 if (tmp) { 114 if (tmp) {
111 free(*txt); 115 free(*txt);
112 *txt = tmp; 116 *txt = tmp;
113 } 117 }
114 return *txt; 118 return *txt;
115} 119}
116#endif 120#endif
117 121
118struct commitinfo *cgit_parse_commit(struct commit *commit) 122struct commitinfo *cgit_parse_commit(struct commit *commit)
119{ 123{
120 struct commitinfo *ret; 124 struct commitinfo *ret;
121 char *p = commit->buffer, *t = commit->buffer; 125 char *p = commit->buffer, *t = commit->buffer;
@@ -151,24 +155,28 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
151 &ret->committer_date); 155 &ret->committer_date);
152 } 156 }
153 157
154 if (p && !strncmp(p, "encoding ", 9)) { 158 if (p && !strncmp(p, "encoding ", 9)) {
155 p += 9; 159 p += 9;
156 t = strchr(p, '\n'); 160 t = strchr(p, '\n');
157 if (t) { 161 if (t) {
158 ret->msg_encoding = substr(p, t + 1); 162 ret->msg_encoding = substr(p, t + 1);
159 p = t + 1; 163 p = t + 1;
160 } 164 }
161 } 165 }
162 166
167 /* if no special encoding is found, assume UTF-8 */
168 if(!ret->msg_encoding)
169 ret->msg_encoding = xstrdup("UTF-8");
170
163 // skip unknown header fields 171 // skip unknown header fields
164 while (p && *p && (*p != '\n')) { 172 while (p && *p && (*p != '\n')) {
165 p = strchr(p, '\n'); 173 p = strchr(p, '\n');
166 if (p) 174 if (p)
167 p++; 175 p++;
168 } 176 }
169 177
170 // skip empty lines between headers and message 178 // skip empty lines between headers and message
171 while (p && *p == '\n') 179 while (p && *p == '\n')
172 p++; 180 p++;
173 181
174 if (!p) 182 if (!p)
@@ -180,32 +188,30 @@ struct commitinfo *cgit_parse_commit(struct commit *commit)
180 p = t + 1; 188 p = t + 1;
181 189
182 while (p && *p == '\n') { 190 while (p && *p == '\n') {
183 p = strchr(p, '\n'); 191 p = strchr(p, '\n');
184 if (p) 192 if (p)
185 p++; 193 p++;
186 } 194 }
187 if (p) 195 if (p)
188 ret->msg = xstrdup(p); 196 ret->msg = xstrdup(p);
189 } else 197 } else
190 ret->subject = xstrdup(p); 198 ret->subject = xstrdup(p);
191 199
192 if (ret->msg_encoding) { 200 reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
193 reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); 201 reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
194 reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); 202 reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
195 reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); 203 reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
196 reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); 204 reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
197 reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); 205 reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
198 reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
199 }
200 206
201 return ret; 207 return ret;
202} 208}
203 209
204 210
205struct taginfo *cgit_parse_tag(struct tag *tag) 211struct taginfo *cgit_parse_tag(struct tag *tag)
206{ 212{
207 void *data; 213 void *data;
208 enum object_type type; 214 enum object_type type;
209 unsigned long size; 215 unsigned long size;
210 char *p; 216 char *p;
211 struct taginfo *ret; 217 struct taginfo *ret;