Diffstat (limited to 'libetpan/src/low-level/mime/mailmime_decode.c') (more/less context) (ignore whitespace changes)
-rw-r--r-- | libetpan/src/low-level/mime/mailmime_decode.c | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/libetpan/src/low-level/mime/mailmime_decode.c b/libetpan/src/low-level/mime/mailmime_decode.c new file mode 100644 index 0000000..715ddad --- a/dev/null +++ b/libetpan/src/low-level/mime/mailmime_decode.c | |||
@@ -0,0 +1,544 @@ | |||
1 | /* | ||
2 | * libEtPan! -- a mail stuff library | ||
3 | * | ||
4 | * Copyright (C) 2001, 2005 - DINH Viet Hoa | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Redistribution and use in source and binary forms, with or without | ||
8 | * modification, are permitted provided that the following conditions | ||
9 | * are met: | ||
10 | * 1. Redistributions of source code must retain the above copyright | ||
11 | * notice, this list of conditions and the following disclaimer. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. Neither the name of the libEtPan! project nor the names of its | ||
16 | * contributors may be used to endorse or promote products derived | ||
17 | * from this software without specific prior written permission. | ||
18 | * | ||
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | ||
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | ||
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
29 | * SUCH DAMAGE. | ||
30 | */ | ||
31 | |||
32 | /* | ||
33 | * $Id$ | ||
34 | */ | ||
35 | |||
36 | /* | ||
37 | RFC 2047 : MIME (Multipurpose Internet Mail Extensions) Part Three: | ||
38 | Message Header Extensions for Non-ASCII Text | ||
39 | */ | ||
40 | |||
41 | #include "mailmime_decode.h" | ||
42 | |||
43 | #include <ctype.h> | ||
44 | #include <unistd.h> | ||
45 | #include <sys/mman.h> | ||
46 | #include <string.h> | ||
47 | #include <stdlib.h> | ||
48 | |||
49 | #include "mailmime_content.h" | ||
50 | |||
51 | #include "charconv.h" | ||
52 | #include "mmapstring.h" | ||
53 | #include "mailimf.h" | ||
54 | |||
55 | #ifndef TRUE | ||
56 | #define TRUE 1 | ||
57 | #endif | ||
58 | |||
59 | #ifndef FALSE | ||
60 | #define FALSE 0 | ||
61 | #endif | ||
62 | |||
63 | static int mailmime_charset_parse(const char * message, size_t length, | ||
64 | size_t * index, char ** charset); | ||
65 | |||
66 | enum { | ||
67 | MAILMIME_ENCODING_B, | ||
68 | MAILMIME_ENCODING_Q | ||
69 | }; | ||
70 | |||
71 | static int mailmime_encoding_parse(const char * message, size_t length, | ||
72 | size_t * index, int * result); | ||
73 | |||
74 | static int mailmime_etoken_parse(const char * message, size_t length, | ||
75 | size_t * index, char ** result); | ||
76 | |||
77 | static int | ||
78 | mailmime_non_encoded_word_parse(const char * message, size_t length, | ||
79 | size_t * index, | ||
80 | char ** result); | ||
81 | |||
82 | static int | ||
83 | mailmime_encoded_word_parse(const char * message, size_t length, | ||
84 | size_t * index, | ||
85 | struct mailmime_encoded_word ** result); | ||
86 | |||
87 | |||
88 | enum { | ||
89 | TYPE_ERROR, | ||
90 | TYPE_WORD, | ||
91 | TYPE_ENCODED_WORD, | ||
92 | }; | ||
93 | |||
94 | int mailmime_encoded_phrase_parse(const char * default_fromcode, | ||
95 | const char * message, size_t length, | ||
96 | size_t * index, const char * tocode, | ||
97 | char ** result) | ||
98 | { | ||
99 | MMAPString * gphrase; | ||
100 | struct mailmime_encoded_word * word; | ||
101 | int first; | ||
102 | size_t cur_token; | ||
103 | int r; | ||
104 | int res; | ||
105 | char * str; | ||
106 | char * wordutf8; | ||
107 | int type; | ||
108 | |||
109 | cur_token = * index; | ||
110 | |||
111 | gphrase = mmap_string_new(""); | ||
112 | if (gphrase == NULL) { | ||
113 | res = MAILIMF_ERROR_MEMORY; | ||
114 | goto err; | ||
115 | } | ||
116 | |||
117 | first = TRUE; | ||
118 | |||
119 | type = TYPE_ERROR; /* XXX - removes a gcc warning */ | ||
120 | |||
121 | while (1) { | ||
122 | |||
123 | r = mailmime_encoded_word_parse(message, length, &cur_token, &word); | ||
124 | if (r == MAILIMF_NO_ERROR) { | ||
125 | if (!first) { | ||
126 | if (type != TYPE_ENCODED_WORD) { | ||
127 | if (mmap_string_append_c(gphrase, ' ') == NULL) { | ||
128 | mailmime_encoded_word_free(word); | ||
129 | res = MAILIMF_ERROR_MEMORY; | ||
130 | goto free; | ||
131 | } | ||
132 | } | ||
133 | } | ||
134 | type = TYPE_ENCODED_WORD; | ||
135 | wordutf8 = NULL; | ||
136 | r = charconv(tocode, word->wd_charset, word->wd_text, | ||
137 | strlen(word->wd_text), &wordutf8); | ||
138 | switch (r) { | ||
139 | case MAIL_CHARCONV_ERROR_MEMORY: | ||
140 | mailmime_encoded_word_free(word); | ||
141 | res = MAILIMF_ERROR_MEMORY; | ||
142 | goto free; | ||
143 | |||
144 | case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET: | ||
145 | case MAIL_CHARCONV_ERROR_CONV: | ||
146 | mailmime_encoded_word_free(word); | ||
147 | res = MAILIMF_ERROR_PARSE; | ||
148 | goto free; | ||
149 | } | ||
150 | |||
151 | if (wordutf8 != NULL) { | ||
152 | if (mmap_string_append(gphrase, wordutf8) == NULL) { | ||
153 | mailmime_encoded_word_free(word); | ||
154 | free(wordutf8); | ||
155 | res = MAILIMF_ERROR_MEMORY; | ||
156 | goto free; | ||
157 | } | ||
158 | free(wordutf8); | ||
159 | } | ||
160 | mailmime_encoded_word_free(word); | ||
161 | first = FALSE; | ||
162 | } | ||
163 | else if (r == MAILIMF_ERROR_PARSE) { | ||
164 | /* do nothing */ | ||
165 | } | ||
166 | else { | ||
167 | res = r; | ||
168 | goto free; | ||
169 | } | ||
170 | |||
171 | if (r == MAILIMF_ERROR_PARSE) { | ||
172 | char * raw_word; | ||
173 | |||
174 | r = mailmime_non_encoded_word_parse(message, length, | ||
175 | &cur_token, &raw_word); | ||
176 | if (r == MAILIMF_NO_ERROR) { | ||
177 | if (!first) { | ||
178 | if (mmap_string_append_c(gphrase, ' ') == NULL) { | ||
179 | free(raw_word); | ||
180 | res = MAILIMF_ERROR_MEMORY; | ||
181 | goto free; | ||
182 | } | ||
183 | } | ||
184 | type = TYPE_WORD; | ||
185 | |||
186 | wordutf8 = NULL; | ||
187 | r = charconv(tocode, default_fromcode, raw_word, | ||
188 | strlen(raw_word), &wordutf8); | ||
189 | |||
190 | switch (r) { | ||
191 | case MAIL_CHARCONV_ERROR_MEMORY: | ||
192 | free(raw_word); | ||
193 | res = MAILIMF_ERROR_MEMORY; | ||
194 | goto free; | ||
195 | |||
196 | case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET: | ||
197 | case MAIL_CHARCONV_ERROR_CONV: | ||
198 | free(raw_word); | ||
199 | res = MAILIMF_ERROR_PARSE; | ||
200 | goto free; | ||
201 | } | ||
202 | |||
203 | if (mmap_string_append(gphrase, wordutf8) == NULL) { | ||
204 | free(wordutf8); | ||
205 | free(raw_word); | ||
206 | res = MAILIMF_ERROR_MEMORY; | ||
207 | goto free; | ||
208 | } | ||
209 | |||
210 | free(wordutf8); | ||
211 | free(raw_word); | ||
212 | first = FALSE; | ||
213 | } | ||
214 | else if (r == MAILIMF_ERROR_PARSE) { | ||
215 | break; | ||
216 | } | ||
217 | else { | ||
218 | res = r; | ||
219 | goto free; | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 | if (first) { | ||
225 | res = MAILIMF_ERROR_PARSE; | ||
226 | goto free; | ||
227 | } | ||
228 | |||
229 | str = strdup(gphrase->str); | ||
230 | if (str == NULL) { | ||
231 | res = MAILIMF_ERROR_MEMORY; | ||
232 | goto free; | ||
233 | } | ||
234 | mmap_string_free(gphrase); | ||
235 | |||
236 | * result = str; | ||
237 | * index = cur_token; | ||
238 | |||
239 | return MAILIMF_NO_ERROR; | ||
240 | |||
241 | free: | ||
242 | mmap_string_free(gphrase); | ||
243 | err: | ||
244 | return res; | ||
245 | } | ||
246 | |||
247 | static int | ||
248 | mailmime_non_encoded_word_parse(const char * message, size_t length, | ||
249 | size_t * index, | ||
250 | char ** result) | ||
251 | { | ||
252 | int end; | ||
253 | size_t cur_token; | ||
254 | int res; | ||
255 | char * text; | ||
256 | int r; | ||
257 | size_t begin; | ||
258 | |||
259 | cur_token = * index; | ||
260 | |||
261 | r = mailimf_fws_parse(message, length, &cur_token); | ||
262 | if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) { | ||
263 | res = r; | ||
264 | goto err; | ||
265 | } | ||
266 | |||
267 | begin = cur_token; | ||
268 | |||
269 | end = FALSE; | ||
270 | while (1) { | ||
271 | if (cur_token >= length) | ||
272 | break; | ||
273 | |||
274 | switch (message[cur_token]) { | ||
275 | case ' ': | ||
276 | case '\t': | ||
277 | case '\r': | ||
278 | case '\n': | ||
279 | end = TRUE; | ||
280 | break; | ||
281 | } | ||
282 | |||
283 | if (end) | ||
284 | break; | ||
285 | |||
286 | cur_token ++; | ||
287 | } | ||
288 | |||
289 | if (cur_token - begin == 0) { | ||
290 | res = MAILIMF_ERROR_PARSE; | ||
291 | goto err; | ||
292 | } | ||
293 | |||
294 | text = malloc(cur_token - begin + 1); | ||
295 | if (text == NULL) { | ||
296 | res = MAILIMF_ERROR_MEMORY; | ||
297 | goto err; | ||
298 | } | ||
299 | |||
300 | memcpy(text, message + begin, cur_token - begin); | ||
301 | text[cur_token - begin] = '\0'; | ||
302 | |||
303 | * index = cur_token; | ||
304 | * result = text; | ||
305 | |||
306 | return MAILIMF_NO_ERROR; | ||
307 | |||
308 | err: | ||
309 | return res; | ||
310 | } | ||
311 | |||
312 | static int mailmime_encoded_word_parse(const char * message, size_t length, | ||
313 | size_t * index, | ||
314 | struct mailmime_encoded_word ** result) | ||
315 | { | ||
316 | size_t cur_token; | ||
317 | char * charset; | ||
318 | int encoding; | ||
319 | char * text; | ||
320 | size_t end_encoding; | ||
321 | char * decoded; | ||
322 | size_t decoded_len; | ||
323 | struct mailmime_encoded_word * ew; | ||
324 | int r; | ||
325 | int res; | ||
326 | int opening_quote; | ||
327 | int end; | ||
328 | |||
329 | cur_token = * index; | ||
330 | |||
331 | r = mailimf_fws_parse(message, length, &cur_token); | ||
332 | if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) { | ||
333 | res = r; | ||
334 | goto err; | ||
335 | } | ||
336 | |||
337 | opening_quote = FALSE; | ||
338 | r = mailimf_char_parse(message, length, &cur_token, '\"'); | ||
339 | if (r == MAILIMF_NO_ERROR) { | ||
340 | opening_quote = TRUE; | ||
341 | } | ||
342 | else if (r == MAILIMF_ERROR_PARSE) { | ||
343 | /* do nothing */ | ||
344 | } | ||
345 | else { | ||
346 | res = r; | ||
347 | goto err; | ||
348 | } | ||
349 | |||
350 | r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "=?"); | ||
351 | if (r != MAILIMF_NO_ERROR) { | ||
352 | res = r; | ||
353 | goto err; | ||
354 | } | ||
355 | |||
356 | r = mailmime_charset_parse(message, length, &cur_token, &charset); | ||
357 | if (r != MAILIMF_NO_ERROR) { | ||
358 | res = r; | ||
359 | goto err; | ||
360 | } | ||
361 | |||
362 | r = mailimf_char_parse(message, length, &cur_token, '?'); | ||
363 | if (r != MAILIMF_NO_ERROR) { | ||
364 | res = r; | ||
365 | goto free_charset; | ||
366 | } | ||
367 | |||
368 | r = mailmime_encoding_parse(message, length, &cur_token, &encoding); | ||
369 | if (r != MAILIMF_NO_ERROR) { | ||
370 | res = r; | ||
371 | goto free_charset; | ||
372 | } | ||
373 | |||
374 | r = mailimf_char_parse(message, length, &cur_token, '?'); | ||
375 | if (r != MAILIMF_NO_ERROR) { | ||
376 | res = r; | ||
377 | goto free_charset; | ||
378 | } | ||
379 | |||
380 | end = FALSE; | ||
381 | end_encoding = cur_token; | ||
382 | while (1) { | ||
383 | if (end_encoding >= length) | ||
384 | break; | ||
385 | |||
386 | switch (message[end_encoding]) { | ||
387 | case '?': | ||
388 | #if 0 | ||
389 | case ' ': | ||
390 | #endif | ||
391 | end = TRUE; | ||
392 | break; | ||
393 | } | ||
394 | |||
395 | if (end) | ||
396 | break; | ||
397 | |||
398 | end_encoding ++; | ||
399 | } | ||
400 | |||
401 | decoded_len = 0; | ||
402 | decoded = NULL; | ||
403 | switch (encoding) { | ||
404 | case MAILMIME_ENCODING_B: | ||
405 | r = mailmime_base64_body_parse(message, end_encoding, | ||
406 | &cur_token, &decoded, | ||
407 | &decoded_len); | ||
408 | |||
409 | if (r != MAILIMF_NO_ERROR) { | ||
410 | res = r; | ||
411 | goto free_charset; | ||
412 | } | ||
413 | break; | ||
414 | case MAILMIME_ENCODING_Q: | ||
415 | r = mailmime_quoted_printable_body_parse(message, end_encoding, | ||
416 | &cur_token, &decoded, | ||
417 | &decoded_len, TRUE); | ||
418 | |||
419 | if (r != MAILIMF_NO_ERROR) { | ||
420 | res = r; | ||
421 | goto free_charset; | ||
422 | } | ||
423 | |||
424 | break; | ||
425 | } | ||
426 | |||
427 | text = malloc(decoded_len + 1); | ||
428 | if (text == NULL) { | ||
429 | res = MAILIMF_ERROR_MEMORY; | ||
430 | goto free_charset; | ||
431 | } | ||
432 | |||
433 | if (decoded_len > 0) | ||
434 | memcpy(text, decoded, decoded_len); | ||
435 | text[decoded_len] = '\0'; | ||
436 | |||
437 | mailmime_decoded_part_free(decoded); | ||
438 | |||
439 | r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "?="); | ||
440 | if (r != MAILIMF_NO_ERROR) { | ||
441 | res = r; | ||
442 | goto free_encoded_text; | ||
443 | } | ||
444 | |||
445 | if (opening_quote) { | ||
446 | r = mailimf_char_parse(message, length, &cur_token, '\"'); | ||
447 | if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) { | ||
448 | res = r; | ||
449 | goto free_encoded_text; | ||
450 | } | ||
451 | } | ||
452 | |||
453 | ew = mailmime_encoded_word_new(charset, text); | ||
454 | if (ew == NULL) { | ||
455 | res = MAILIMF_ERROR_MEMORY; | ||
456 | goto free_encoded_text; | ||
457 | } | ||
458 | |||
459 | * result = ew; | ||
460 | * index = cur_token; | ||
461 | |||
462 | return MAILIMF_NO_ERROR; | ||
463 | |||
464 | free_encoded_text: | ||
465 | mailmime_encoded_text_free(text); | ||
466 | free_charset: | ||
467 | mailmime_charset_free(charset); | ||
468 | err: | ||
469 | return res; | ||
470 | } | ||
471 | |||
472 | static int mailmime_charset_parse(const char * message, size_t length, | ||
473 | size_t * index, char ** charset) | ||
474 | { | ||
475 | return mailmime_etoken_parse(message, length, index, charset); | ||
476 | } | ||
477 | |||
478 | static int mailmime_encoding_parse(const char * message, size_t length, | ||
479 | size_t * index, int * result) | ||
480 | { | ||
481 | size_t cur_token; | ||
482 | int encoding; | ||
483 | |||
484 | cur_token = * index; | ||
485 | |||
486 | if (cur_token >= length) | ||
487 | return MAILIMF_ERROR_PARSE; | ||
488 | |||
489 | switch ((char) toupper((unsigned char) message[cur_token])) { | ||
490 | case 'Q': | ||
491 | encoding = MAILMIME_ENCODING_Q; | ||
492 | break; | ||
493 | case 'B': | ||
494 | encoding = MAILMIME_ENCODING_B; | ||
495 | break; | ||
496 | default: | ||
497 | return MAILIMF_ERROR_INVAL; | ||
498 | } | ||
499 | |||
500 | cur_token ++; | ||
501 | |||
502 | * result = encoding; | ||
503 | * index = cur_token; | ||
504 | |||
505 | return MAILIMF_NO_ERROR; | ||
506 | } | ||
507 | |||
508 | int is_etoken_char(char ch) | ||
509 | { | ||
510 | unsigned char uch = ch; | ||
511 | |||
512 | if (uch < 31) | ||
513 | return FALSE; | ||
514 | |||
515 | switch (uch) { | ||
516 | case ' ': | ||
517 | case '(': | ||
518 | case ')': | ||
519 | case '<': | ||
520 | case '>': | ||
521 | case '@': | ||
522 | case ',': | ||
523 | case ';': | ||
524 | case ':': | ||
525 | case '"': | ||
526 | case '/': | ||
527 | case '[': | ||
528 | case ']': | ||
529 | case '?': | ||
530 | case '.': | ||
531 | case '=': | ||
532 | return FALSE; | ||
533 | } | ||
534 | |||
535 | return TRUE; | ||
536 | } | ||
537 | |||
538 | static int mailmime_etoken_parse(const char * message, size_t length, | ||
539 | size_t * index, char ** result) | ||
540 | { | ||
541 | return mailimf_custom_string_parse(message, length, | ||
542 | index, result, | ||
543 | is_etoken_char); | ||
544 | } | ||