summaryrefslogtreecommitdiffabout
path: root/libetpan/src/low-level/mbox/mailmbox_parse.c
Unidiff
Diffstat (limited to 'libetpan/src/low-level/mbox/mailmbox_parse.c') (more/less context) (ignore whitespace changes)
-rw-r--r--libetpan/src/low-level/mbox/mailmbox_parse.c620
1 files changed, 620 insertions, 0 deletions
diff --git a/libetpan/src/low-level/mbox/mailmbox_parse.c b/libetpan/src/low-level/mbox/mailmbox_parse.c
new file mode 100644
index 0000000..65642ac
--- a/dev/null
+++ b/libetpan/src/low-level/mbox/mailmbox_parse.c
@@ -0,0 +1,620 @@
1/*
2 * libEtPan! -- a mail stuff library
3 *
4 * Copyright (C) 2001, 2005 - DINH Viet Hoa
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the libEtPan! project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * $Id$
34 */
35
36#include "mailmbox_parse.h"
37
38#include "mailmbox.h"
39
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <string.h>
43#include <stdlib.h>
44
45#define UID_HEADER "X-LibEtPan-UID:"
46
47#ifndef TRUE
48#define TRUE 1
49#endif
50
51#ifndef FALSE
52#define FALSE 0
53#endif
54
55enum {
56 UNSTRUCTURED_START,
57 UNSTRUCTURED_CR,
58 UNSTRUCTURED_LF,
59 UNSTRUCTURED_WSP,
60 UNSTRUCTURED_OUT
61};
62
63static inline int
64mailmbox_fields_parse(char * str, size_t length,
65 size_t * index,
66 uint32_t * puid,
67 size_t * phlen)
68{
69 size_t cur_token;
70 int r;
71 size_t hlen;
72 size_t uid;
73 int end;
74
75 cur_token = * index;
76
77 end = FALSE;
78 uid = 0;
79 while (!end) {
80 size_t begin;
81
82 begin = cur_token;
83
84 r = mailimf_ignore_field_parse(str, length, &cur_token);
85 switch (r) {
86 case MAILIMF_NO_ERROR:
87 if (str[begin] == 'X') {
88
89 if (strncasecmp(str + begin, UID_HEADER, strlen(UID_HEADER)) == 0) {
90 begin += strlen(UID_HEADER);
91
92 while (str[begin] == ' ')
93 begin ++;
94
95 uid = strtoul(str + begin, NULL, 10);
96 }
97 }
98
99 break;
100 case MAILIMF_ERROR_PARSE:
101 default:
102 end = TRUE;
103 break;
104 }
105 }
106
107 hlen = cur_token - * index;
108
109 * phlen = hlen;
110 * puid = uid;
111 * index = cur_token;
112
113 return MAILMBOX_NO_ERROR;
114}
115
116enum {
117 IN_MAIL,
118 FIRST_CR,
119 FIRST_LF,
120 SECOND_CR,
121 SECOND_LF,
122 PARSING_F,
123 PARSING_R,
124 PARSING_O,
125 PARSING_M,
126 OUT_MAIL
127};
128
129
130
131
132static inline int
133mailmbox_single_parse(char * str, size_t length,
134 size_t * index,
135 size_t * pstart,
136 size_t * pstart_len,
137 size_t * pheaders,
138 size_t * pheaders_len,
139 size_t * pbody,
140 size_t * pbody_len,
141 size_t * psize,
142 size_t * ppadding,
143 uint32_t * puid)
144{
145 size_t cur_token;
146 size_t start;
147 size_t start_len;
148 size_t headers;
149 size_t headers_len;
150 size_t body;
151 size_t end;
152 size_t next;
153 size_t message_length;
154 uint32_t uid;
155 int r;
156#if 0
157 int in_mail_data;
158#endif
159#if 0
160 size_t begin;
161#endif
162
163 int state;
164
165 cur_token = * index;
166
167 if (cur_token >= length)
168 return MAILMBOX_ERROR_PARSE;
169
170 start = cur_token;
171 start_len = 0;
172 headers = cur_token;
173
174 if (cur_token + 5 < length) {
175 if (strncmp(str + cur_token, "From ", 5) == 0) {
176 cur_token += 5;
177 while (str[cur_token] != '\n') {
178 cur_token ++;
179 if (cur_token >= length)
180 break;
181 }
182 if (cur_token < length) {
183 cur_token ++;
184 headers = cur_token;
185 start_len = headers - start;
186 }
187 }
188 }
189
190 next = length;
191
192 r = mailmbox_fields_parse(str, length, &cur_token,
193 &uid, &headers_len);
194 if (r != MAILMBOX_NO_ERROR)
195 return r;
196
197 /* save position */
198#if 0
199 begin = cur_token;
200#endif
201
202 mailimf_crlf_parse(str, length, &cur_token);
203
204#if 0
205 if (str[cur_token] == 'F') {
206 printf("start !\n");
207 printf("%50.50s\n", str + cur_token);
208 getchar();
209 }
210#endif
211
212 body = cur_token;
213
214 /* restore position */
215 /* cur_token = begin; */
216
217 state = FIRST_LF;
218
219 end = length;
220
221#if 0
222 in_mail_data = 0;
223#endif
224 while (state != OUT_MAIL) {
225
226 if (cur_token >= length) {
227 if (state == IN_MAIL)
228 end = length;
229 next = length;
230 break;
231 }
232
233 switch(state) {
234 case IN_MAIL:
235 switch(str[cur_token]) {
236 case '\r':
237 state = FIRST_CR;
238 break;
239 case '\n':
240 state = FIRST_LF;
241 break;
242 case 'F':
243 if (cur_token == body) {
244 end = cur_token;
245 next = cur_token;
246 state = PARSING_F;
247 }
248 break;
249#if 0
250 default:
251 in_mail_data = 1;
252 break;
253#endif
254 }
255 break;
256
257 case FIRST_CR:
258 end = cur_token;
259 switch(str[cur_token]) {
260 case '\r':
261 state = SECOND_CR;
262 break;
263 case '\n':
264 state = FIRST_LF;
265 break;
266 default:
267 state = IN_MAIL;
268#if 0
269 in_mail_data = 1;
270#endif
271 break;
272 }
273 break;
274
275 case FIRST_LF:
276 end = cur_token;
277 switch(str[cur_token]) {
278 case '\r':
279 state = SECOND_CR;
280 break;
281 case '\n':
282 state = SECOND_LF;
283 break;
284 default:
285 state = IN_MAIL;
286#if 0
287 in_mail_data = 1;
288#endif
289 break;
290 }
291 break;
292
293 case SECOND_CR:
294 switch(str[cur_token]) {
295 case '\r':
296 end = cur_token;
297 break;
298 case '\n':
299 state = SECOND_LF;
300 break;
301 case 'F':
302 next = cur_token;
303 state = PARSING_F;
304 break;
305 default:
306 state = IN_MAIL;
307#if 0
308 in_mail_data = 1;
309#endif
310 break;
311 }
312 break;
313
314 case SECOND_LF:
315 switch(str[cur_token]) {
316 case '\r':
317 state = SECOND_CR;
318 break;
319 case '\n':
320 end = cur_token;
321 break;
322 case 'F':
323 next = cur_token;
324 state = PARSING_F;
325 break;
326 default:
327 state = IN_MAIL;
328#if 0
329 in_mail_data = 1;
330#endif
331 break;
332 }
333 break;
334
335 case PARSING_F:
336 switch(str[cur_token]) {
337 case 'r':
338 state = PARSING_R;
339 break;
340 default:
341 state = IN_MAIL;
342#if 0
343 in_mail_data = 1;
344#endif
345 break;
346 }
347 break;
348
349 case PARSING_R:
350 switch(str[cur_token]) {
351 case 'o':
352 state = PARSING_O;
353 break;
354 default:
355 state = IN_MAIL;
356#if 0
357 in_mail_data = 1;
358#endif
359 break;
360 }
361 break;
362
363 case PARSING_O:
364 switch(str[cur_token]) {
365 case 'm':
366 state = PARSING_M;
367 break;
368 default:
369 state = IN_MAIL;
370#if 0
371 in_mail_data = 1;
372#endif
373 break;
374 }
375 break;
376
377 case PARSING_M:
378 switch(str[cur_token]) {
379 case ' ':
380 state = OUT_MAIL;
381 break;
382 default:
383 state = IN_MAIL;
384 break;
385 }
386 break;
387 }
388
389 cur_token ++;
390 }
391
392 message_length = end - start;
393
394 * pstart = start;
395 * pstart_len = start_len;
396 * pheaders = headers;
397 * pheaders_len = headers_len;
398 * pbody = body;
399 * pbody_len = end - body;
400 * psize = message_length;
401 * ppadding = next - end;
402 * puid = uid;
403
404 * index = next;
405
406 return MAILMBOX_NO_ERROR;
407}
408
409
410int
411mailmbox_parse_additionnal(struct mailmbox_folder * folder,
412 size_t * index)
413{
414 size_t cur_token;
415
416 size_t start;
417 size_t start_len;
418 size_t headers;
419 size_t headers_len;
420 size_t body;
421 size_t body_len;
422 size_t size;
423 size_t padding;
424 uint32_t uid;
425 int r;
426 int res;
427
428 uint32_t max_uid;
429 uint32_t first_index;
430 unsigned int i;
431 unsigned int j;
432
433 cur_token = * index;
434
435 /* remove temporary UID that we will parse */
436
437 first_index = carray_count(folder->mb_tab);
438
439 for(i = 0 ; i < carray_count(folder->mb_tab) ; i++) {
440 struct mailmbox_msg_info * info;
441
442 info = carray_get(folder->mb_tab, i);
443
444 if (info->msg_start < cur_token) {
445 continue;
446 }
447
448 if (!info->msg_written_uid) {
449 chashdatum key;
450
451 key.data = &info->msg_uid;
452 key.len = sizeof(info->msg_uid);
453
454 chash_delete(folder->mb_hash, &key, NULL);
455 carray_delete_fast(folder->mb_tab, i);
456 mailmbox_msg_info_free(info);
457 if (i < first_index)
458 first_index = i;
459 }
460 }
461
462 /* make a sequence in the table */
463
464 max_uid = folder->mb_written_uid;
465
466 i = 0;
467 j = 0;
468 while (i < carray_count(folder->mb_tab)) {
469 struct mailmbox_msg_info * info;
470
471 info = carray_get(folder->mb_tab, i);
472 if (info != NULL) {
473 carray_set(folder->mb_tab, j, info);
474
475 if (info->msg_uid > max_uid)
476 max_uid = info->msg_uid;
477
478 info->msg_index = j;
479 j ++;
480 }
481 i ++;
482 }
483 carray_set_size(folder->mb_tab, j);
484
485 /* parse content */
486
487 first_index = j;
488
489 while (1) {
490 struct mailmbox_msg_info * info;
491 chashdatum key;
492 chashdatum data;
493
494 r = mailmbox_single_parse(folder->mb_mapping, folder->mb_mapping_size,
495 &cur_token,
496 &start, &start_len,
497 &headers, &headers_len,
498 &body, &body_len,
499 &size, &padding, &uid);
500 if (r == MAILMBOX_NO_ERROR) {
501 /* do nothing */
502 }
503 else if (r == MAILMBOX_ERROR_PARSE)
504 break;
505 else {
506 res = r;
507 goto err;
508 }
509
510 key.data = &uid;
511 key.len = sizeof(uid);
512
513 r = chash_get(folder->mb_hash, &key, &data);
514 if (r == 0) {
515 info = data.data;
516
517 if (!info->msg_written_uid) {
518 /* some new mail has been written and override an
519 existing temporary UID */
520
521 chash_delete(folder->mb_hash, &key, NULL);
522 info->msg_uid = 0;
523
524 if (info->msg_index < first_index)
525 first_index = info->msg_index;
526 }
527 else
528 uid = 0;
529 }
530
531 if (uid > max_uid)
532 max_uid = uid;
533
534 r = mailmbox_msg_info_update(folder,
535 start, start_len, headers, headers_len,
536 body, body_len, size, padding, uid);
537 if (r != MAILMBOX_NO_ERROR) {
538 res = r;
539 goto err;
540 }
541 }
542
543 * index = cur_token;
544
545 folder->mb_written_uid = max_uid;
546
547 /* attribute uid */
548
549 for(i = first_index ; i < carray_count(folder->mb_tab) ; i ++) {
550 struct mailmbox_msg_info * info;
551 chashdatum key;
552 chashdatum data;
553
554 info = carray_get(folder->mb_tab, i);
555
556 if (info->msg_uid != 0) {
557 continue;
558 }
559
560 max_uid ++;
561 info->msg_uid = max_uid;
562
563 key.data = &info->msg_uid;
564 key.len = sizeof(info->msg_uid);
565 data.data = info;
566 data.len = 0;
567
568 r = chash_set(folder->mb_hash, &key, &data, NULL);
569 if (r < 0) {
570 res = MAILMBOX_ERROR_MEMORY;
571 goto err;
572 }
573 }
574
575 folder->mb_max_uid = max_uid;
576
577 return MAILMBOX_NO_ERROR;
578
579 err:
580 return res;
581}
582
583static void flush_uid(struct mailmbox_folder * folder)
584{
585 unsigned int i;
586
587 for(i = 0 ; i < carray_count(folder->mb_tab) ; i++) {
588 struct mailmbox_msg_info * info;
589
590 info = carray_get(folder->mb_tab, i);
591 if (info != NULL)
592 mailmbox_msg_info_free(info);
593 }
594
595 chash_clear(folder->mb_hash);
596 carray_set_size(folder->mb_tab, 0);
597}
598
599int mailmbox_parse(struct mailmbox_folder * folder)
600{
601 int r;
602 int res;
603 size_t cur_token;
604
605 flush_uid(folder);
606
607 cur_token = 0;
608
609 r = mailmbox_parse_additionnal(folder, &cur_token);
610
611 if (r != MAILMBOX_NO_ERROR) {
612 res = r;
613 goto err;
614 }
615
616 return MAILMBOX_NO_ERROR;
617
618 err:
619 return res;
620}