summaryrefslogtreecommitdiffabout
path: root/libetpan/src/low-level/mime/mailmime_decode.c
Unidiff
Diffstat (limited to 'libetpan/src/low-level/mime/mailmime_decode.c') (more/less context) (ignore whitespace changes)
-rw-r--r--libetpan/src/low-level/mime/mailmime_decode.c544
1 files changed, 544 insertions, 0 deletions
diff --git a/libetpan/src/low-level/mime/mailmime_decode.c b/libetpan/src/low-level/mime/mailmime_decode.c
new file mode 100644
index 0000000..715ddad
--- a/dev/null
+++ b/libetpan/src/low-level/mime/mailmime_decode.c
@@ -0,0 +1,544 @@
1/*
2 * libEtPan! -- a mail stuff library
3 *
4 * Copyright (C) 2001, 2005 - DINH Viet Hoa
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the libEtPan! project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * $Id$
34 */
35
36/*
37 RFC 2047 : MIME (Multipurpose Internet Mail Extensions) Part Three:
38 Message Header Extensions for Non-ASCII Text
39*/
40
41#include "mailmime_decode.h"
42
43#include <ctype.h>
44#include <unistd.h>
45#include <sys/mman.h>
46#include <string.h>
47#include <stdlib.h>
48
49#include "mailmime_content.h"
50
51#include "charconv.h"
52#include "mmapstring.h"
53#include "mailimf.h"
54
55#ifndef TRUE
56#define TRUE 1
57#endif
58
59#ifndef FALSE
60#define FALSE 0
61#endif
62
63static int mailmime_charset_parse(const char * message, size_t length,
64 size_t * index, char ** charset);
65
66enum {
67 MAILMIME_ENCODING_B,
68 MAILMIME_ENCODING_Q
69};
70
71static int mailmime_encoding_parse(const char * message, size_t length,
72 size_t * index, int * result);
73
74static int mailmime_etoken_parse(const char * message, size_t length,
75 size_t * index, char ** result);
76
77static int
78mailmime_non_encoded_word_parse(const char * message, size_t length,
79 size_t * index,
80 char ** result);
81
82static int
83mailmime_encoded_word_parse(const char * message, size_t length,
84 size_t * index,
85 struct mailmime_encoded_word ** result);
86
87
88enum {
89 TYPE_ERROR,
90 TYPE_WORD,
91 TYPE_ENCODED_WORD,
92};
93
94int mailmime_encoded_phrase_parse(const char * default_fromcode,
95 const char * message, size_t length,
96 size_t * index, const char * tocode,
97 char ** result)
98{
99 MMAPString * gphrase;
100 struct mailmime_encoded_word * word;
101 int first;
102 size_t cur_token;
103 int r;
104 int res;
105 char * str;
106 char * wordutf8;
107 int type;
108
109 cur_token = * index;
110
111 gphrase = mmap_string_new("");
112 if (gphrase == NULL) {
113 res = MAILIMF_ERROR_MEMORY;
114 goto err;
115 }
116
117 first = TRUE;
118
119 type = TYPE_ERROR; /* XXX - removes a gcc warning */
120
121 while (1) {
122
123 r = mailmime_encoded_word_parse(message, length, &cur_token, &word);
124 if (r == MAILIMF_NO_ERROR) {
125 if (!first) {
126 if (type != TYPE_ENCODED_WORD) {
127 if (mmap_string_append_c(gphrase, ' ') == NULL) {
128 mailmime_encoded_word_free(word);
129 res = MAILIMF_ERROR_MEMORY;
130 goto free;
131 }
132 }
133 }
134 type = TYPE_ENCODED_WORD;
135 wordutf8 = NULL;
136 r = charconv(tocode, word->wd_charset, word->wd_text,
137 strlen(word->wd_text), &wordutf8);
138 switch (r) {
139 case MAIL_CHARCONV_ERROR_MEMORY:
140 mailmime_encoded_word_free(word);
141 res = MAILIMF_ERROR_MEMORY;
142 goto free;
143
144 case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
145 case MAIL_CHARCONV_ERROR_CONV:
146 mailmime_encoded_word_free(word);
147 res = MAILIMF_ERROR_PARSE;
148 goto free;
149 }
150
151 if (wordutf8 != NULL) {
152 if (mmap_string_append(gphrase, wordutf8) == NULL) {
153 mailmime_encoded_word_free(word);
154 free(wordutf8);
155 res = MAILIMF_ERROR_MEMORY;
156 goto free;
157 }
158 free(wordutf8);
159 }
160 mailmime_encoded_word_free(word);
161 first = FALSE;
162 }
163 else if (r == MAILIMF_ERROR_PARSE) {
164 /* do nothing */
165 }
166 else {
167 res = r;
168 goto free;
169 }
170
171 if (r == MAILIMF_ERROR_PARSE) {
172 char * raw_word;
173
174 r = mailmime_non_encoded_word_parse(message, length,
175 &cur_token, &raw_word);
176 if (r == MAILIMF_NO_ERROR) {
177 if (!first) {
178 if (mmap_string_append_c(gphrase, ' ') == NULL) {
179 free(raw_word);
180 res = MAILIMF_ERROR_MEMORY;
181 goto free;
182 }
183 }
184 type = TYPE_WORD;
185
186 wordutf8 = NULL;
187 r = charconv(tocode, default_fromcode, raw_word,
188 strlen(raw_word), &wordutf8);
189
190 switch (r) {
191 case MAIL_CHARCONV_ERROR_MEMORY:
192 free(raw_word);
193 res = MAILIMF_ERROR_MEMORY;
194 goto free;
195
196 case MAIL_CHARCONV_ERROR_UNKNOWN_CHARSET:
197 case MAIL_CHARCONV_ERROR_CONV:
198 free(raw_word);
199 res = MAILIMF_ERROR_PARSE;
200 goto free;
201 }
202
203 if (mmap_string_append(gphrase, wordutf8) == NULL) {
204 free(wordutf8);
205 free(raw_word);
206 res = MAILIMF_ERROR_MEMORY;
207 goto free;
208 }
209
210 free(wordutf8);
211 free(raw_word);
212 first = FALSE;
213 }
214 else if (r == MAILIMF_ERROR_PARSE) {
215 break;
216 }
217 else {
218 res = r;
219 goto free;
220 }
221 }
222 }
223
224 if (first) {
225 res = MAILIMF_ERROR_PARSE;
226 goto free;
227 }
228
229 str = strdup(gphrase->str);
230 if (str == NULL) {
231 res = MAILIMF_ERROR_MEMORY;
232 goto free;
233 }
234 mmap_string_free(gphrase);
235
236 * result = str;
237 * index = cur_token;
238
239 return MAILIMF_NO_ERROR;
240
241 free:
242 mmap_string_free(gphrase);
243 err:
244 return res;
245}
246
247static int
248mailmime_non_encoded_word_parse(const char * message, size_t length,
249 size_t * index,
250 char ** result)
251{
252 int end;
253 size_t cur_token;
254 int res;
255 char * text;
256 int r;
257 size_t begin;
258
259 cur_token = * index;
260
261 r = mailimf_fws_parse(message, length, &cur_token);
262 if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
263 res = r;
264 goto err;
265 }
266
267 begin = cur_token;
268
269 end = FALSE;
270 while (1) {
271 if (cur_token >= length)
272 break;
273
274 switch (message[cur_token]) {
275 case ' ':
276 case '\t':
277 case '\r':
278 case '\n':
279 end = TRUE;
280 break;
281 }
282
283 if (end)
284 break;
285
286 cur_token ++;
287 }
288
289 if (cur_token - begin == 0) {
290 res = MAILIMF_ERROR_PARSE;
291 goto err;
292 }
293
294 text = malloc(cur_token - begin + 1);
295 if (text == NULL) {
296 res = MAILIMF_ERROR_MEMORY;
297 goto err;
298 }
299
300 memcpy(text, message + begin, cur_token - begin);
301 text[cur_token - begin] = '\0';
302
303 * index = cur_token;
304 * result = text;
305
306 return MAILIMF_NO_ERROR;
307
308 err:
309 return res;
310}
311
312static int mailmime_encoded_word_parse(const char * message, size_t length,
313 size_t * index,
314 struct mailmime_encoded_word ** result)
315{
316 size_t cur_token;
317 char * charset;
318 int encoding;
319 char * text;
320 size_t end_encoding;
321 char * decoded;
322 size_t decoded_len;
323 struct mailmime_encoded_word * ew;
324 int r;
325 int res;
326 int opening_quote;
327 int end;
328
329 cur_token = * index;
330
331 r = mailimf_fws_parse(message, length, &cur_token);
332 if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
333 res = r;
334 goto err;
335 }
336
337 opening_quote = FALSE;
338 r = mailimf_char_parse(message, length, &cur_token, '\"');
339 if (r == MAILIMF_NO_ERROR) {
340 opening_quote = TRUE;
341 }
342 else if (r == MAILIMF_ERROR_PARSE) {
343 /* do nothing */
344 }
345 else {
346 res = r;
347 goto err;
348 }
349
350 r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "=?");
351 if (r != MAILIMF_NO_ERROR) {
352 res = r;
353 goto err;
354 }
355
356 r = mailmime_charset_parse(message, length, &cur_token, &charset);
357 if (r != MAILIMF_NO_ERROR) {
358 res = r;
359 goto err;
360 }
361
362 r = mailimf_char_parse(message, length, &cur_token, '?');
363 if (r != MAILIMF_NO_ERROR) {
364 res = r;
365 goto free_charset;
366 }
367
368 r = mailmime_encoding_parse(message, length, &cur_token, &encoding);
369 if (r != MAILIMF_NO_ERROR) {
370 res = r;
371 goto free_charset;
372 }
373
374 r = mailimf_char_parse(message, length, &cur_token, '?');
375 if (r != MAILIMF_NO_ERROR) {
376 res = r;
377 goto free_charset;
378 }
379
380 end = FALSE;
381 end_encoding = cur_token;
382 while (1) {
383 if (end_encoding >= length)
384 break;
385
386 switch (message[end_encoding]) {
387 case '?':
388#if 0
389 case ' ':
390#endif
391 end = TRUE;
392 break;
393 }
394
395 if (end)
396 break;
397
398 end_encoding ++;
399 }
400
401 decoded_len = 0;
402 decoded = NULL;
403 switch (encoding) {
404 case MAILMIME_ENCODING_B:
405 r = mailmime_base64_body_parse(message, end_encoding,
406 &cur_token, &decoded,
407 &decoded_len);
408
409 if (r != MAILIMF_NO_ERROR) {
410 res = r;
411 goto free_charset;
412 }
413 break;
414 case MAILMIME_ENCODING_Q:
415 r = mailmime_quoted_printable_body_parse(message, end_encoding,
416 &cur_token, &decoded,
417 &decoded_len, TRUE);
418
419 if (r != MAILIMF_NO_ERROR) {
420 res = r;
421 goto free_charset;
422 }
423
424 break;
425 }
426
427 text = malloc(decoded_len + 1);
428 if (text == NULL) {
429 res = MAILIMF_ERROR_MEMORY;
430 goto free_charset;
431 }
432
433 if (decoded_len > 0)
434 memcpy(text, decoded, decoded_len);
435 text[decoded_len] = '\0';
436
437 mailmime_decoded_part_free(decoded);
438
439 r = mailimf_token_case_insensitive_parse(message, length, &cur_token, "?=");
440 if (r != MAILIMF_NO_ERROR) {
441 res = r;
442 goto free_encoded_text;
443 }
444
445 if (opening_quote) {
446 r = mailimf_char_parse(message, length, &cur_token, '\"');
447 if ((r != MAILIMF_NO_ERROR) && (r != MAILIMF_ERROR_PARSE)) {
448 res = r;
449 goto free_encoded_text;
450 }
451 }
452
453 ew = mailmime_encoded_word_new(charset, text);
454 if (ew == NULL) {
455 res = MAILIMF_ERROR_MEMORY;
456 goto free_encoded_text;
457 }
458
459 * result = ew;
460 * index = cur_token;
461
462 return MAILIMF_NO_ERROR;
463
464 free_encoded_text:
465 mailmime_encoded_text_free(text);
466 free_charset:
467 mailmime_charset_free(charset);
468 err:
469 return res;
470}
471
472static int mailmime_charset_parse(const char * message, size_t length,
473 size_t * index, char ** charset)
474{
475 return mailmime_etoken_parse(message, length, index, charset);
476}
477
478static int mailmime_encoding_parse(const char * message, size_t length,
479 size_t * index, int * result)
480{
481 size_t cur_token;
482 int encoding;
483
484 cur_token = * index;
485
486 if (cur_token >= length)
487 return MAILIMF_ERROR_PARSE;
488
489 switch ((char) toupper((unsigned char) message[cur_token])) {
490 case 'Q':
491 encoding = MAILMIME_ENCODING_Q;
492 break;
493 case 'B':
494 encoding = MAILMIME_ENCODING_B;
495 break;
496 default:
497 return MAILIMF_ERROR_INVAL;
498 }
499
500 cur_token ++;
501
502 * result = encoding;
503 * index = cur_token;
504
505 return MAILIMF_NO_ERROR;
506}
507
508int is_etoken_char(char ch)
509{
510 unsigned char uch = ch;
511
512 if (uch < 31)
513 return FALSE;
514
515 switch (uch) {
516 case ' ':
517 case '(':
518 case ')':
519 case '<':
520 case '>':
521 case '@':
522 case ',':
523 case ';':
524 case ':':
525 case '"':
526 case '/':
527 case '[':
528 case ']':
529 case '?':
530 case '.':
531 case '=':
532 return FALSE;
533 }
534
535 return TRUE;
536}
537
538static int mailmime_etoken_parse(const char * message, size_t length,
539 size_t * index, char ** result)
540{
541 return mailimf_custom_string_parse(message, length,
542 index, result,
543 is_etoken_char);
544}