summaryrefslogtreecommitdiffabout
path: root/libetpan/src/driver/tools/mailthread.c
Side-by-side diff
Diffstat (limited to 'libetpan/src/driver/tools/mailthread.c') (more/less context) (ignore whitespace changes)
-rw-r--r--libetpan/src/driver/tools/mailthread.c1742
1 files changed, 1742 insertions, 0 deletions
diff --git a/libetpan/src/driver/tools/mailthread.c b/libetpan/src/driver/tools/mailthread.c
new file mode 100644
index 0000000..32f73cd
--- a/dev/null
+++ b/libetpan/src/driver/tools/mailthread.c
@@ -0,0 +1,1742 @@
+/*
+ * libEtPan! -- a mail stuff library
+ *
+ * Copyright (C) 2001, 2005 - DINH Viet Hoa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the libEtPan! project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id$
+ */
+
+#include "mailthread.h"
+#include "mailthread_types.h"
+
+#include <string.h>
+#include <time.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "mail.h"
+#include "chash.h"
+#include "carray.h"
+#include "clist.h"
+#include "mailmessage.h"
+
+static inline char * get_msg_id(mailmessage * msg)
+{
+ if (msg->msg_single_fields.fld_message_id != NULL)
+ return msg->msg_single_fields.fld_message_id->mid_value;
+ else
+ return NULL;
+}
+
+static inline clist * get_ref(mailmessage * msg)
+{
+ if (msg->msg_single_fields.fld_references != NULL)
+ return msg->msg_single_fields.fld_references->mid_list;
+ else
+ return NULL;
+}
+
+static inline clist * get_in_reply_to(mailmessage * msg)
+{
+ if (msg->msg_single_fields.fld_in_reply_to != NULL)
+ return msg->msg_single_fields.fld_in_reply_to->mid_list;
+ else
+ return NULL;
+}
+
+static inline int skip_subj_blob(char * subj, size_t * begin,
+ size_t length)
+{
+ /* subj-blob = "[" *BLOBCHAR "]" *WSP */
+ size_t cur_token;
+
+ cur_token = * begin;
+
+ if (subj[cur_token] != '[')
+ return FALSE;
+
+ cur_token ++;
+
+ while (1) {
+ if (cur_token >= length)
+ return FALSE;
+
+ if (subj[cur_token] == '[')
+ return FALSE;
+
+ if (subj[cur_token] == ']')
+ break;
+
+ cur_token ++;
+ }
+
+ cur_token ++;
+
+ while (1) {
+ if (cur_token >= length)
+ break;
+
+ if (subj[cur_token] != ' ')
+ break;
+
+ cur_token ++;
+ }
+
+ * begin = cur_token;
+
+ return TRUE;
+}
+
+static inline int skip_subj_refwd(char * subj, size_t * begin,
+ size_t length)
+{
+ /* subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" */
+ size_t cur_token;
+ int prefix;
+
+ cur_token = * begin;
+
+ prefix = FALSE;
+ if (length >= 3) {
+ if (strncasecmp(subj + cur_token, "fwd", 3) == 0) {
+ cur_token += 3;
+ prefix = TRUE;
+ }
+ }
+ if (!prefix) {
+ if (length >= 2) {
+ if (strncasecmp(subj + cur_token, "fw", 2) == 0) {
+ cur_token += 2;
+ prefix = TRUE;
+ }
+ else if (strncasecmp(subj + cur_token, "re", 2) == 0) {
+ cur_token += 2;
+ prefix = TRUE;
+ }
+ }
+ }
+
+ if (!prefix)
+ return FALSE;
+
+ while (1) {
+ if (cur_token >= length)
+ break;
+
+ if (subj[cur_token] != ' ')
+ break;
+
+ cur_token ++;
+ }
+
+ skip_subj_blob(subj, &cur_token, length);
+
+ if (subj[cur_token] != ':')
+ return FALSE;
+
+ cur_token ++;
+
+ * begin = cur_token;
+
+ return TRUE;
+}
+
+static inline int skip_subj_leader(struct mailmessage_tree * tree,
+ char * subj, size_t * begin,
+ size_t length)
+{
+ size_t cur_token;
+
+ cur_token = * begin;
+
+ /* subj-leader = (*subj-blob subj-refwd) / WSP */
+
+ if (subj[cur_token] == ' ') {
+ cur_token ++;
+ }
+ else {
+ while (cur_token < length) {
+ if (!skip_subj_blob(subj, &cur_token, length))
+ break;
+ }
+ if (!skip_subj_refwd(subj, &cur_token, length))
+ return FALSE;
+ tree->node_is_reply = TRUE;
+ }
+
+ * begin = cur_token;
+
+ return TRUE;
+}
+
+
+static char * extract_subject(char * default_from,
+ struct mailmessage_tree * tree,
+ char * str)
+{
+ char * subj;
+ char * cur;
+ char * write_pos;
+ size_t len;
+ size_t begin;
+
+ char * decoded;
+ size_t cur_token;
+
+ int do_repeat_5;
+ int do_repeat_6;
+ int r;
+
+ /*
+ (1) Convert any RFC 2047 encoded-words in the subject to
+ UTF-8.
+ */
+
+ decoded = NULL;
+
+ cur_token = 0;
+ r = mailmime_encoded_phrase_parse(default_from, str, strlen(str),
+ &cur_token, "utf-8",
+ &decoded);
+
+ if (r == MAILIMF_NO_ERROR) {
+ subj = decoded;
+ }
+ else
+ subj = strdup(str);
+
+ len = strlen(subj);
+
+ /*
+ Convert all tabs and continuations to space.
+ Convert all multiple spaces to a single space.
+ */
+
+ cur = subj;
+ write_pos = subj;
+ while (* cur != '\0') {
+ int cont;
+
+ switch (* cur) {
+ case '\t':
+ case '\r':
+ case '\n':
+ cont = TRUE;
+
+ cur ++;
+ while (* cur && cont) {
+ switch (* cur) {
+ case '\t':
+ case '\r':
+ case '\n':
+ cont = TRUE;
+ break;
+ default:
+ cont = FALSE;
+ break;
+ }
+ cur ++;
+ }
+
+ * write_pos = ' ';
+ write_pos ++;
+
+ break;
+
+ default:
+ * write_pos = * cur;
+ write_pos ++;
+
+ cur ++;
+
+ break;
+ }
+ }
+ * write_pos = '\0';
+
+ begin = 0;
+
+ do {
+ do_repeat_6 = FALSE;
+
+ /*
+ (2) Remove all trailing text of the subject that matches
+ the subj-trailer ABNF, repeat until no more matches are
+ possible.
+ */
+
+ while (len > 0) {
+ int chg;
+
+ chg = FALSE;
+
+ /* subj-trailer = "(fwd)" / WSP */
+ if (subj[len - 1] == ' ') {
+ subj[len - 1] = '\0';
+ len --;
+ }
+ else {
+ if (len < 5)
+ break;
+
+ if (strncasecmp(subj + len - 5, "(fwd)", 5) != 0)
+ break;
+
+ subj[len - 5] = '\0';
+ len -= 5;
+ tree->node_is_reply = TRUE;
+ }
+ }
+
+ do {
+ size_t saved_begin;
+
+ do_repeat_5 = FALSE;
+
+ /*
+ (3) Remove all prefix text of the subject that matches the
+ subj-leader ABNF.
+ */
+
+ if (skip_subj_leader(tree, subj, &begin, len))
+ do_repeat_5 = TRUE;
+
+ /*
+ (4) If there is prefix text of the subject that matches the
+ subj-blob ABNF, and removing that prefix leaves a non-empty
+ subj-base, then remove the prefix text.
+ */
+
+ saved_begin = begin;
+ if (skip_subj_blob(subj, &begin, len)) {
+ if (begin == len) {
+ /* this will leave a empty subject base */
+ begin = saved_begin;
+ }
+ else
+ do_repeat_5 = TRUE;
+ }
+
+ /*
+ (5) Repeat (3) and (4) until no matches remain.
+ Note: it is possible to defer step (2) until step (6),
+ but this requires checking for subj-trailer in step (4).
+ */
+
+ }
+ while (do_repeat_5);
+
+ /*
+ (6) If the resulting text begins with the subj-fwd-hdr ABNF
+ and ends with the subj-fwd-trl ABNF, remove the
+ subj-fwd-hdr and subj-fwd-trl and repeat from step (2).
+ */
+
+ if (len >= 5) {
+ size_t saved_begin;
+
+ saved_begin = begin;
+ if (strncasecmp(subj + begin, "[fwd:", 5) == 0) {
+ begin += 5;
+
+ if (subj[len - 1] != ']')
+ saved_begin = begin;
+ else {
+ tree->node_is_reply = TRUE;
+
+ subj[len - 1] = '\0';
+ len --;
+ do_repeat_6 = TRUE;
+ }
+ }
+ }
+
+ }
+ while (do_repeat_6);
+
+ /*
+ (7) The resulting text is the "base subject" used in
+ threading.
+ */
+
+ /* convert to upper case */
+
+ cur = subj + begin;
+ write_pos = subj;
+
+ while (* cur != '\0') {
+ * write_pos = (char) toupper((unsigned char) * cur);
+ cur ++;
+ write_pos ++;
+ }
+ * write_pos = '\0';
+
+ return subj;
+}
+
+static int get_extracted_subject(char * default_from,
+ struct mailmessage_tree * tree,
+ char ** result)
+{
+ if (tree->node_msg->msg_single_fields.fld_subject != NULL) {
+ char * subj;
+
+ subj = extract_subject(default_from,
+ tree, tree->node_msg->msg_single_fields.fld_subject->sbj_value);
+ if (subj == NULL)
+ return MAIL_ERROR_MEMORY;
+
+ * result = subj;
+
+ return MAIL_NO_ERROR;
+ }
+
+ return MAIL_ERROR_SUBJECT_NOT_FOUND;
+}
+
+static int get_thread_subject(char * default_from,
+ struct mailmessage_tree * tree,
+ char ** result)
+{
+ char * thread_subject;
+ int r;
+ unsigned int i;
+
+ if (tree->node_msg != NULL) {
+ if (tree->node_msg->msg_fields != NULL) {
+ r = get_extracted_subject(default_from, tree, &thread_subject);
+
+ if (r != MAIL_NO_ERROR)
+ return r;
+
+ * result = thread_subject;
+ return MAIL_NO_ERROR;
+ }
+ }
+
+ for(i = 0 ; i < carray_count(tree->node_children) ; i ++) {
+ struct mailmessage_tree * child;
+
+ child = carray_get(tree->node_children, i);
+
+ r = get_thread_subject(default_from, child, &thread_subject);
+
+ switch (r) {
+ case MAIL_NO_ERROR:
+ * result = thread_subject;
+ return MAIL_NO_ERROR;
+
+ case MAIL_ERROR_SUBJECT_NOT_FOUND:
+ /* do nothing */
+ break;
+
+ default:
+ return r;
+ }
+ }
+
+ return MAIL_ERROR_SUBJECT_NOT_FOUND;
+}
+
+
+
+#ifndef WRONG
+#define WRONG (-1)
+#endif /* !defined WRONG */
+
+static int tmcomp(struct tm * atmp, struct tm * btmp)
+{
+ register int result;
+
+ if ((result = (atmp->tm_year - btmp->tm_year)) == 0 &&
+ (result = (atmp->tm_mon - btmp->tm_mon)) == 0 &&
+ (result = (atmp->tm_mday - btmp->tm_mday)) == 0 &&
+ (result = (atmp->tm_hour - btmp->tm_hour)) == 0 &&
+ (result = (atmp->tm_min - btmp->tm_min)) == 0)
+ result = atmp->tm_sec - btmp->tm_sec;
+ return result;
+}
+
+static time_t mkgmtime(struct tm * tmp)
+{
+ register int dir;
+ register int bits;
+ register int saved_seconds;
+ time_t t;
+ struct tm yourtm, *mytm;
+
+ yourtm = *tmp;
+ saved_seconds = yourtm.tm_sec;
+ yourtm.tm_sec = 0;
+ /*
+ ** Calculate the number of magnitude bits in a time_t
+ ** (this works regardless of whether time_t is
+ ** signed or unsigned, though lint complains if unsigned).
+ */
+ for (bits = 0, t = 1; t > 0; ++bits, t <<= 1)
+ ;
+ /*
+ ** If time_t is signed, then 0 is the median value,
+ ** if time_t is unsigned, then 1 << bits is median.
+ */
+ t = (t < 0) ? 0 : ((time_t) 1 << bits);
+ for ( ; ; ) {
+ mytm = gmtime(&t);
+ dir = tmcomp(mytm, &yourtm);
+ if (dir != 0) {
+ if (bits-- < 0)
+ return WRONG;
+ if (bits < 0)
+ --t;
+ else if (dir > 0)
+ t -= (time_t) 1 << bits;
+ else t += (time_t) 1 << bits;
+ continue;
+ }
+ break;
+ }
+ t += saved_seconds;
+ return t;
+}
+
+static inline time_t get_date(mailmessage * msg)
+{
+ struct tm tmval;
+ time_t timeval;
+ struct mailimf_date_time * date_time;
+
+ if (msg->msg_single_fields.fld_orig_date == NULL)
+ return (time_t) -1;
+
+ date_time = msg->msg_single_fields.fld_orig_date->dt_date_time;
+
+ tmval.tm_sec = date_time->dt_sec;
+ tmval.tm_min = date_time->dt_min;
+ tmval.tm_hour = date_time->dt_hour;
+ tmval.tm_sec = date_time->dt_sec;
+ tmval.tm_mday = date_time->dt_day;
+ tmval.tm_mon = date_time->dt_month - 1;
+ tmval.tm_year = date_time->dt_year - 1900;
+
+ timeval = mkgmtime(&tmval);
+
+ timeval -= date_time->dt_zone * 36;
+
+ return timeval;
+}
+
+static inline int is_descendant(struct mailmessage_tree * node,
+ struct mailmessage_tree * maybe_child)
+{
+ unsigned int i;
+
+ for(i = 0 ; i < carray_count(node->node_children) ; i++) {
+ struct mailmessage_tree * tree;
+
+ tree = carray_get(node->node_children, i);
+ if (tree == maybe_child)
+ return TRUE;
+ if (carray_count(tree->node_children) != 0)
+ if (is_descendant(tree, maybe_child))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int delete_dummy(carray * rootlist, carray * sibling_list,
+ unsigned int cur, unsigned int * pnext)
+{
+ struct mailmessage_tree * env_tree;
+ int res;
+ int r;
+ unsigned int cur_child;
+ unsigned int next;
+
+ env_tree = carray_get(sibling_list, cur);
+
+ cur_child = 0;
+ while (cur_child < carray_count(env_tree->node_children)) {
+ delete_dummy(rootlist, env_tree->node_children, cur_child, &cur_child);
+ }
+
+ if (env_tree->node_msg == NULL) {
+ if (carray_count(env_tree->node_children) == 0) {
+
+ /* If it is a dummy message with NO children, delete it. */
+ mailmessage_tree_free(env_tree);
+ carray_delete(sibling_list, cur);
+ next = cur;
+ }
+ else {
+ /* If it is a dummy message with children, delete it, but
+ promote its children to the current level. */
+
+ /*
+ Do not promote the children if doing so would make them
+ children of the root, unless there is only one child.
+ */
+
+ cur_child = 0;
+ if ((sibling_list != rootlist) ||
+ (carray_count(env_tree->node_children) == 1)) {
+ while (cur_child < carray_count(env_tree->node_children)) {
+ struct mailmessage_tree * child;
+
+ child = carray_get(env_tree->node_children, cur_child);
+ r = carray_add(sibling_list, child, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto err;
+ }
+ /* set new parent of the children */
+ child->node_parent = env_tree->node_parent;
+
+ carray_delete(env_tree->node_children, cur_child);
+ }
+ mailmessage_tree_free(env_tree);
+ carray_delete(sibling_list, cur);
+ next = cur;
+ }
+ else
+ next = cur + 1;
+ }
+ }
+ else
+ next = cur + 1;
+
+ * pnext = next;
+
+ return MAIL_NO_ERROR;
+
+ err:
+ return res;
+}
+
+static inline time_t tree_get_date(struct mailmessage_tree * tree)
+{
+ if (tree->node_msg != NULL) {
+ return tree->node_date;
+ }
+ else {
+ struct mailmessage_tree * subtree;
+
+ if (carray_count(tree->node_children) == 0)
+ return (time_t) -1;
+
+ subtree = carray_get(tree->node_children, 0);
+
+ return subtree->node_date;
+ }
+}
+
+static inline uint32_t tree_get_index(struct mailmessage_tree * tree)
+{
+ if (tree->node_msg == NULL)
+ return 0;
+
+ return tree->node_msg->msg_index;
+}
+
+int mailthread_tree_timecomp(struct mailmessage_tree ** ptree1,
+ struct mailmessage_tree ** ptree2)
+{
+ time_t date1;
+ time_t date2;
+
+ date1 = tree_get_date(* ptree1);
+ date2 = tree_get_date(* ptree2);
+
+ if ((date1 == (time_t) -1) || (date2 == (time_t) -1)) {
+ uint32_t index1;
+ uint32_t index2;
+
+ index1 = tree_get_index(* ptree1);
+ index2 = tree_get_index(* ptree2);
+ return (int) ((long) index1 - (long) index2);
+ }
+
+ return (int) ((long) date1 - (long) date2);
+}
+
+static int tree_subj_time_comp(struct mailmessage_tree ** ptree1,
+ struct mailmessage_tree ** ptree2)
+{
+ char * subj1;
+ char * subj2;
+ time_t date1;
+ time_t date2;
+ int r;
+
+ subj1 = (* ptree1)->node_base_subject;
+ subj2 = (* ptree2)->node_base_subject;
+
+ if ((subj1 != NULL) && (subj2 != NULL))
+ r = strcmp(subj1, subj2);
+ else {
+ if ((subj1 == NULL) && (subj2 == NULL))
+ r = 0;
+ else if (subj1 == NULL)
+ r = -1;
+ else /* subj2 == NULL */
+ r = 1;
+ }
+
+ if (r != 0)
+ return r;
+
+ date1 = (* ptree1)->node_date;
+ date2 = (* ptree2)->node_date;
+
+ if ((date1 == (time_t) -1) || (date2 == (time_t) -1))
+ return ((int32_t) (* ptree1)->node_msg->msg_index) -
+ ((int32_t) (* ptree2)->node_msg->msg_index);
+
+ return (int) ((long) date1 - (long) date2);
+}
+
+
+
+int mail_thread_sort(struct mailmessage_tree * tree,
+ int (* comp_func)(struct mailmessage_tree **,
+ struct mailmessage_tree **),
+ int sort_sub)
+{
+ unsigned int cur;
+ int r;
+ int res;
+
+ for(cur = 0 ; cur < carray_count(tree->node_children) ; cur ++) {
+ struct mailmessage_tree * subtree;
+
+ subtree = carray_get(tree->node_children, cur);
+
+ if (sort_sub) {
+ r = mail_thread_sort(subtree, comp_func, sort_sub);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto err;
+ }
+ }
+ }
+
+ qsort(carray_data(tree->node_children), carray_count(tree->node_children),
+ sizeof(struct mailmessage_tree *),
+ (int (*)(const void *, const void *)) comp_func);
+
+ return MAIL_NO_ERROR;
+
+ err:
+ return res;
+}
+
+
+static int
+mail_build_thread_references(char * default_from,
+ struct mailmessage_list * env_list,
+ struct mailmessage_tree ** result,
+ int use_subject,
+ int (* comp_func)(struct mailmessage_tree **,
+ struct mailmessage_tree **))
+{
+ int r;
+ int res;
+ chash * msg_id_hash;
+ unsigned int cur;
+ struct mailmessage_tree * root;
+ carray * rootlist;
+ carray * msg_list;
+ unsigned int i;
+ chash * subject_hash;
+
+ msg_id_hash = chash_new(128, CHASH_COPYNONE);
+ if (msg_id_hash == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto err;
+ }
+
+ root = mailmessage_tree_new(NULL, (time_t) -1, NULL);
+ if (root == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_hash;
+ }
+ rootlist = root->node_children;
+
+ msg_list = carray_new(128);
+ if (msg_list == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_root;
+ }
+
+ /* collect message-ID */
+ for(i = 0 ; i < carray_count(env_list->msg_tab) ; i ++) {
+ mailmessage * msg;
+ char * msgid;
+ struct mailmessage_tree * env_tree;
+ chashdatum hashkey;
+ chashdatum hashdata;
+ chashdatum hashold;
+ time_t date;
+
+ msg = carray_get(env_list->msg_tab, i);
+
+ if (msg == NULL)
+ continue;
+
+ if (msg->msg_fields != NULL) {
+ msgid = get_msg_id(msg);
+
+ if (msgid == NULL) {
+ msgid = mailimf_get_message_id();
+ }
+ else {
+ hashkey.data = msgid;
+ hashkey.len = strlen(msgid);
+
+ if (chash_get(msg_id_hash, &hashkey, &hashdata) == 0)
+ msgid = mailimf_get_message_id();
+ else
+ msgid = strdup(msgid);
+ }
+
+ if (msgid == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ date = get_date(msg);
+
+ env_tree = mailmessage_tree_new(msgid, date, msg);
+ if (env_tree == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ r = carray_add(msg_list, env_tree, NULL);
+ if (r < 0) {
+ mailmessage_tree_free(env_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ hashkey.data = msgid;
+ hashkey.len = strlen(msgid);
+
+ hashdata.data = env_tree;
+ hashdata.len = 0;
+
+ r = chash_set(msg_id_hash, &hashkey, &hashdata, &hashold);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+ }
+ }
+
+ /* (1) for all messages */
+
+ for(cur = 0 ; cur < carray_count(msg_list) ; cur ++) {
+ struct mailmessage_tree * env_tree;
+ mailmessage * msg;
+ clist * ref;
+
+ env_tree = carray_get(msg_list, cur);
+
+ msg = env_tree->node_msg;
+
+ ref = NULL;
+ if (msg != NULL) {
+ ref = get_ref(msg);
+ if (ref == NULL)
+ ref = get_in_reply_to(msg);
+ }
+
+ /* (A) Using the Message IDs in the message's references, link
+ the corresponding messages (those whose Message-ID header
+ line contains the given reference Message ID) together as
+ parent/child.
+ */
+
+ if (ref != NULL) {
+ /* try to start a tree */
+
+ clistiter * cur_ref;
+ chashdatum hashkey;
+ chashdatum hashdata;
+ chashdatum hashold;
+ struct mailmessage_tree * env_cur_tree;
+ struct mailmessage_tree * last_env_cur_tree;
+
+ env_cur_tree = NULL;
+ for(cur_ref = clist_begin(ref) ; cur_ref != NULL ;
+ cur_ref = clist_next(cur_ref)) {
+ char * msgid;
+
+ last_env_cur_tree = env_cur_tree;
+
+ msgid = clist_content(cur_ref);
+
+ hashkey.data = msgid;
+ hashkey.len = strlen(msgid);
+
+ r = chash_get(msg_id_hash, &hashkey, &hashdata);
+ if (r < 0) {
+ /* not found, create a dummy message */
+ msgid = strdup(msgid);
+ if (msgid == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ env_cur_tree = mailmessage_tree_new(msgid, (time_t) -1, NULL);
+ if (env_cur_tree == NULL) {
+ free(msgid);
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ r = carray_add(msg_list, env_cur_tree, NULL);
+ if (r < 0) {
+ mailmessage_tree_free(env_cur_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ hashkey.data = msgid;
+ hashkey.len = strlen(msgid);
+
+ hashdata.data = env_cur_tree;
+ hashdata.len = 0;
+
+ r = chash_set(msg_id_hash, &hashkey, &hashdata, &hashold);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+ }
+ else {
+ env_cur_tree = hashdata.data;
+ }
+
+ if (last_env_cur_tree != NULL) {
+ if (env_cur_tree->node_parent == NULL) {
+ /* make it one child */
+ if (env_cur_tree != last_env_cur_tree) {
+ if (!is_descendant(env_cur_tree, last_env_cur_tree)) {
+ /* set parent */
+ env_cur_tree->node_parent = last_env_cur_tree;
+ r = carray_add(last_env_cur_tree->node_children,
+ env_cur_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* (B) Create a parent/child link between the last reference
+ (or NIL if there are no references) and the current message.
+ If the current message already has a parent, it is probably
+ the result of a truncated References header line, so break
+ the current parent/child link before creating the new
+ correct one.
+ */
+
+ last_env_cur_tree = env_cur_tree;
+
+ if (last_env_cur_tree != NULL) {
+ if (env_tree->node_parent == NULL) {
+ if (last_env_cur_tree != env_tree) {
+ if (!is_descendant(env_tree, last_env_cur_tree)) {
+ /* set parent */
+ env_tree->node_parent = last_env_cur_tree;
+ r = carray_add(last_env_cur_tree->node_children, env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ chash_free(msg_id_hash);
+ msg_id_hash = NULL;
+
+ /* (2) Gather together all of the messages that have no parents
+ and make them all children (siblings of one another) of a dummy
+ parent (the "root").
+ */
+
+ for(cur = 0 ; cur < carray_count(msg_list) ; cur ++) {
+ struct mailmessage_tree * env_tree;
+
+ env_tree = carray_get(msg_list, cur);
+ if (env_tree->node_parent == NULL) {
+ r = carray_add(rootlist, env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+ /* set parent */
+ env_tree->node_parent = root;
+ }
+ }
+
+ carray_free(msg_list);
+ msg_list = NULL;
+
+ /* (3) Prune dummy messages from the thread tree.
+ */
+
+ cur = 0;
+ while (cur < carray_count(rootlist)) {
+ r = delete_dummy(rootlist, rootlist, cur, &cur);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free_list;
+ }
+ }
+
+ /* (4) Sort the messages under the root (top-level siblings only)
+ by sent date.
+ */
+
+ r = mail_thread_sort(root, mailthread_tree_timecomp, FALSE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free_list;
+ }
+
+ if (use_subject) {
+
+ /* (5) Gather together messages under the root that have the same
+ extracted subject text.
+
+ (A) Create a table for associating extracted subjects with
+ messages.
+ */
+
+ subject_hash = chash_new(128, CHASH_COPYVALUE);
+ if (subject_hash == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_list;
+ }
+
+ /*
+ (B) Populate the subject table with one message per
+ extracted subject. For each child of the root:
+ */
+
+ for(cur = 0 ; cur < carray_count(rootlist) ; cur ++) {
+ struct mailmessage_tree * env_tree;
+ chashdatum key;
+ chashdatum data;
+ char * base_subject;
+ int r;
+
+ env_tree = carray_get(rootlist, cur);
+
+ /*
+ (i) Find the subject of this thread by extracting the
+ base subject from the current message, or its first child
+ if the current message is a dummy.
+ */
+
+ r = get_thread_subject(default_from, env_tree, &base_subject);
+
+ /*
+ (ii) If the extracted subject is empty, skip this
+ message.
+ */
+
+ if (r == MAIL_ERROR_SUBJECT_NOT_FOUND) {
+ /* no subject found */
+ continue;
+ }
+ else if (r == MAIL_NO_ERROR) {
+ if (* base_subject == '\0') {
+ /* subject empty */
+ free(base_subject);
+ continue;
+ }
+ else {
+ /* do nothing */
+ }
+ }
+ else {
+ res = r;
+ goto free_subject_hash;
+ }
+
+ env_tree->node_base_subject = base_subject;
+
+ /*
+ (iii) Lookup the message associated with this extracted
+ subject in the table.
+ */
+
+ key.data = base_subject;
+ key.len = strlen(base_subject);
+
+ r = chash_get(subject_hash, &key, &data);
+
+ if (r < 0) {
+ /*
+ (iv) If there is no message in the table with this
+ subject, add the current message and the extracted
+ subject to the subject table.
+ */
+
+ data.data = &cur;
+ data.len = sizeof(cur);
+
+ r = chash_set(subject_hash, &key, &data, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ }
+ else {
+ /*
+ Otherwise, replace the message in the table with the
+ current message if the message in the table is not a
+ dummy AND either of the following criteria are true:
+ The current message is a dummy, OR
+ The message in the table is a reply or forward (its
+ original subject contains a subj-refwd part and/or a
+ "(fwd)" subj-trailer) and the current message is not.
+ */
+ struct mailmessage_tree * msg_in_table;
+ unsigned int * iter_in_table;
+ int replace;
+
+ iter_in_table = data.data;
+ msg_in_table = carray_get(rootlist, cur);
+
+ replace = FALSE;
+ /* message is dummy if info is NULL */
+ if (msg_in_table->node_msg != NULL) {
+
+ if (env_tree->node_msg == NULL)
+ replace = TRUE;
+ else {
+ if (env_tree->node_is_reply && !env_tree->node_is_reply)
+ replace = TRUE;
+ }
+ }
+
+ if (replace) {
+ data.data = &cur;
+ data.len = sizeof(cur);
+
+ r = chash_set(subject_hash, &key, &data, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ }
+ }
+ }
+
+ /*
+ (C) Merge threads with the same subject. For each child of
+ the root:
+ */
+
+ cur = 0;
+ while (cur < carray_count(rootlist)) {
+ struct mailmessage_tree * env_tree;
+ chashdatum key;
+ chashdatum data;
+ int r;
+ struct mailmessage_tree * main_tree;
+ unsigned int * main_cur;
+
+ env_tree = carray_get(rootlist, cur);
+
+ if (env_tree == NULL)
+ goto next_msg;
+
+ /*
+ (i) Find the subject of this thread as in step 4.B.i
+ above.
+ */
+
+ /* already done in tree->node_base_subject */
+
+ /*
+ (ii) If the extracted subject is empty, skip this
+ message.
+ */
+
+ if (env_tree->node_base_subject == NULL)
+ goto next_msg;
+
+ if (* env_tree->node_base_subject == '\0')
+ goto next_msg;
+
+ /*
+ (iii) Lookup the message associated with this extracted
+ subject in the table.
+ */
+
+ key.data = env_tree->node_base_subject;
+ key.len = strlen(env_tree->node_base_subject);
+
+ r = chash_get(subject_hash, &key, &data);
+ if (r < 0)
+ goto next_msg;
+
+ /*
+ (iv) If the message in the table is the current message,
+ skip this message.
+ */
+
+ main_cur = data.data;
+ if (* main_cur == cur)
+ goto next_msg;
+
+ /*
+ Otherwise, merge the current message with the one in the
+ table using the following rules:
+ */
+
+ main_tree = carray_get(rootlist, * main_cur);
+
+ /*
+ If both messages are dummies, append the current
+ message's children to the children of the message in
+ the table (the children of both messages become
+ siblings), and then delete the current message.
+ */
+
+ if ((env_tree->node_msg == NULL) && (main_tree->node_msg == NULL)) {
+ unsigned int old_size;
+
+ old_size = carray_count(main_tree->node_children);
+
+ r = carray_set_size(main_tree->node_children, old_size +
+ carray_count(env_tree->node_children));
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+
+ for(i = 0 ; i < carray_count(env_tree->node_children) ; i ++) {
+ struct mailmessage_tree * child;
+
+ child = carray_get(env_tree->node_children, i);
+ carray_set(main_tree->node_children, old_size + i, child);
+ /* set parent */
+ child->node_parent = main_tree;
+ }
+ carray_set_size(env_tree->node_children, 0);
+ /* this is the only case where children can be NULL,
+ this is before freeing it */
+ mailmessage_tree_free(env_tree);
+ carray_delete_fast(rootlist, cur);
+ }
+
+ /*
+ If the message in the table is a dummy and the current
+ message is not, make the current message a child of
+ the message in the table (a sibling of it's children).
+ */
+
+ else if (main_tree->node_msg == NULL) {
+ r = carray_add(main_tree->node_children, env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ /* set parent */
+ env_tree->node_parent = main_tree;
+
+ carray_delete_fast(rootlist, cur);
+ }
+
+ /*
+ If the current message is a reply or forward and the
+ message in the table is not, make the current message
+ a child of the message in the table (a sibling of it's
+ children).
+ */
+
+ else if (env_tree->node_is_reply && !main_tree->node_is_reply) {
+ r = carray_add(main_tree->node_children, env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ /* set parent */
+ env_tree->node_parent = main_tree;
+
+ carray_delete_fast(rootlist, cur);
+ }
+
+ /*
+ Otherwise, create a new dummy message and make both
+ the current message and the message in the table
+ children of the dummy. Then replace the message in
+ the table with the dummy message.
+ Note: Subject comparisons are case-insensitive, as
+ described under "Internationalization
+ Considerations."
+ */
+
+ else {
+ struct mailmessage_tree * new_main_tree;
+ char * base_subject;
+ unsigned int last;
+
+ new_main_tree = mailmessage_tree_new(NULL, (time_t) -1, NULL);
+ if (new_main_tree == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+
+ /* main_tree->node_base_subject is never NULL */
+
+ base_subject = strdup(main_tree->node_base_subject);
+ if (base_subject == NULL) {
+ mailmessage_tree_free(new_main_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+
+ new_main_tree->node_base_subject = base_subject;
+
+ r = carray_add(rootlist, new_main_tree, &last);
+ if (r < 0) {
+ mailmessage_tree_free(new_main_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+
+ r = carray_add(new_main_tree->node_children, main_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ /* set parent */
+ main_tree->node_parent = new_main_tree;
+
+ carray_delete_fast(rootlist, * main_cur);
+
+ r = carray_add(new_main_tree->node_children, env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ /* set parent */
+ env_tree->node_parent = new_main_tree;
+
+ carray_delete_fast(rootlist, cur);
+
+ data.data = &last;
+ data.len = sizeof(last);
+
+ r = chash_set(subject_hash, &key, &data, NULL);
+
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free_subject_hash;
+ }
+ }
+
+ continue;
+
+ next_msg:
+ cur ++;
+ continue;
+ }
+
+ i = 0;
+ for(cur = 0 ; cur < carray_count(rootlist) ; cur ++) {
+ struct mailmessage_tree * env_tree;
+
+ env_tree = carray_get(rootlist, cur);
+ if (env_tree == NULL)
+ continue;
+
+ carray_set(rootlist, i, env_tree);
+ i ++;
+ }
+ carray_set_size(rootlist, i);
+
+ chash_free(subject_hash);
+ }
+
+ /*
+ (6) Traverse the messages under the root and sort each set of
+ siblings by sent date. Traverse the messages in such a way
+ that the "youngest" set of siblings are sorted first, and the
+ "oldest" set of siblings are sorted last (grandchildren are
+ sorted before children, etc).
+
+ In the case of an exact match on
+ sent date or if either of the Date: headers used in a
+ comparison can not be parsed, use the order in which the
+ messages appear in the mailbox (that is, by sequence number) to
+ determine the order. In the case of a dummy message (which can
+ only occur with top-level siblings), use its first child for
+ sorting.
+ */
+
+#if 0
+ if (comp_func != NULL) {
+ r = mail_thread_sort(root, comp_func, TRUE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free_list;
+ }
+ }
+#endif
+ if (comp_func == NULL)
+ comp_func = mailthread_tree_timecomp;
+
+ r = mail_thread_sort(root, comp_func, TRUE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free_list;
+ }
+
+ * result = root;
+
+ return MAIL_NO_ERROR;
+
+ free_subject_hash:
+ chash_free(subject_hash);
+ free_list:
+ if (msg_list != NULL) {
+ for(i = 0 ; i < carray_count(msg_list) ; i ++)
+ mailmessage_tree_free(carray_get(msg_list, i));
+ carray_free(msg_list);
+ }
+ free_root:
+ mailmessage_tree_free_recursive(root);
+ free_hash:
+ if (msg_id_hash != NULL)
+ chash_free(msg_id_hash);
+ err:
+ return res;
+}
+
+
+
+static int
+mail_build_thread_orderedsubject(char * default_from,
+ struct mailmessage_list * env_list,
+ struct mailmessage_tree ** result,
+ int (* comp_func)(struct mailmessage_tree **,
+ struct mailmessage_tree **))
+{
+ unsigned int i;
+ carray * rootlist;
+ unsigned int cur;
+ struct mailmessage_tree * root;
+ int res;
+ int r;
+ struct mailmessage_tree * current_thread;
+
+ root = mailmessage_tree_new(NULL, (time_t) -1, NULL);
+ if (root == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto err;
+ }
+ rootlist = root->node_children;
+
+ /*
+ The ORDEREDSUBJECT threading algorithm is also referred to as
+ "poor man's threading."
+ */
+
+ for(i = 0 ; i < carray_count(env_list->msg_tab) ; i ++) {
+ mailmessage * msg;
+ struct mailmessage_tree * env_tree;
+ char * base_subject;
+ time_t date;
+
+ msg = carray_get(env_list->msg_tab, i);
+
+ if (msg == NULL)
+ continue;
+
+ if (msg->msg_fields != NULL) {
+
+ date = get_date(msg);
+
+ env_tree = mailmessage_tree_new(NULL, date, msg);
+ if (env_tree == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free;
+ }
+
+ /* set parent */
+ env_tree->node_parent = root;
+ r = carray_add(rootlist, env_tree, NULL);
+ if (r < 0) {
+ mailmessage_tree_free(env_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free;
+ }
+
+ r = get_extracted_subject(default_from, env_tree, &base_subject);
+ switch (r) {
+ case MAIL_NO_ERROR:
+ env_tree->node_base_subject = base_subject;
+ break;
+
+ case MAIL_ERROR_SUBJECT_NOT_FOUND:
+ break;
+
+ default:
+ res = r;
+ goto free;
+ }
+ }
+ }
+
+ /*
+ The searched messages are sorted by
+ subject and then by the sent date.
+ */
+
+ r = mail_thread_sort(root, tree_subj_time_comp, FALSE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free;
+ }
+
+ /*
+ The messages are then split
+ into separate threads, with each thread containing messages
+ with the same extracted subject text.
+ */
+
+ current_thread = NULL;
+
+ cur = 0;
+ while (cur < carray_count(rootlist)) {
+ struct mailmessage_tree * cur_env_tree;
+
+ cur_env_tree = carray_get(rootlist, cur);
+ if (current_thread == NULL) {
+ current_thread = cur_env_tree;
+ cur ++;
+ continue;
+ }
+
+ if ((cur_env_tree->node_base_subject == NULL) ||
+ (current_thread->node_base_subject == NULL)) {
+ current_thread = cur_env_tree;
+ cur ++;
+ continue;
+ }
+
+ if (strcmp(cur_env_tree->node_base_subject,
+ current_thread->node_base_subject) == 0) {
+
+ /* set parent */
+ cur_env_tree->node_parent = current_thread;
+ r = carray_add(current_thread->node_children, cur_env_tree, NULL);
+ if (r < 0) {
+ res = MAIL_ERROR_MEMORY;
+ goto free;
+ }
+
+ carray_delete(rootlist, cur);
+ }
+ else
+ cur ++;
+ current_thread = cur_env_tree;
+ }
+
+ /*
+ Finally, the threads are
+ sorted by the sent date of the first message in the thread.
+ Note that each message in a thread is a child (as opposed to a
+ sibling) of the previous message.
+ */
+
+#if 0
+ if (comp_func != NULL) {
+ r = mail_thread_sort(root, comp_func, FALSE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free;
+ }
+ }
+#endif
+
+ if (comp_func == NULL)
+ comp_func = mailthread_tree_timecomp;
+
+ r = mail_thread_sort(root, comp_func, FALSE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free;
+ }
+
+ * result = root;
+
+ return MAIL_NO_ERROR;
+
+ free:
+ mailmessage_tree_free_recursive(root);
+ err:
+ return res;
+}
+
+
+static int
+mail_build_thread_none(char * default_from,
+ struct mailmessage_list * env_list,
+ struct mailmessage_tree ** result,
+ int (* comp_func)(struct mailmessage_tree **,
+ struct mailmessage_tree **))
+{
+ unsigned int i;
+ carray * rootlist;
+ struct mailmessage_tree * root;
+ int res;
+ int r;
+
+ root = mailmessage_tree_new(NULL, (time_t) -1, NULL);
+ if (root == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto err;
+ }
+ rootlist = root->node_children;
+
+ for(i = 0 ; i < carray_count(env_list->msg_tab) ; i ++) {
+ mailmessage * msg;
+ struct mailmessage_tree * env_tree;
+ char * base_subject;
+ time_t date;
+
+ msg = carray_get(env_list->msg_tab, i);
+
+ if (msg == NULL)
+ continue;
+
+ if (msg->msg_fields != NULL) {
+
+ date = get_date(msg);
+
+ env_tree = mailmessage_tree_new(NULL, date, msg);
+ if (env_tree == NULL) {
+ res = MAIL_ERROR_MEMORY;
+ goto free;
+ }
+
+ /* set parent */
+ env_tree->node_parent = root;
+ r = carray_add(rootlist, env_tree, NULL);
+ if (r < 0) {
+ mailmessage_tree_free(env_tree);
+ res = MAIL_ERROR_MEMORY;
+ goto free;
+ }
+
+ r = get_extracted_subject(default_from, env_tree, &base_subject);
+ switch (r) {
+ case MAIL_NO_ERROR:
+ env_tree->node_base_subject = base_subject;
+ break;
+
+ case MAIL_ERROR_SUBJECT_NOT_FOUND:
+ break;
+
+ default:
+ res = r;
+ goto free;
+ }
+ }
+ }
+
+ if (comp_func == NULL)
+ comp_func = mailthread_tree_timecomp;
+
+ r = mail_thread_sort(root, comp_func, FALSE);
+ if (r != MAIL_NO_ERROR) {
+ res = r;
+ goto free;
+ }
+
+ * result = root;
+
+ return MAIL_NO_ERROR;
+
+ free:
+ mailmessage_tree_free_recursive(root);
+ err:
+ return res;
+}
+
+
+int mail_build_thread(int type, char * default_from,
+ struct mailmessage_list * env_list,
+ struct mailmessage_tree ** result,
+ int (* comp_func)(struct mailmessage_tree **,
+ struct mailmessage_tree **))
+{
+ unsigned int i;
+
+ for(i = 0 ; i < carray_count(env_list->msg_tab) ; i ++)
+ mailmessage_resolve_single_fields(carray_get(env_list->msg_tab, i));
+
+ switch (type) {
+ case MAIL_THREAD_REFERENCES:
+ return mail_build_thread_references(default_from,
+ env_list, result, TRUE, comp_func);
+
+ case MAIL_THREAD_REFERENCES_NO_SUBJECT:
+ return mail_build_thread_references(default_from,
+ env_list, result, FALSE, comp_func);
+
+ case MAIL_THREAD_ORDEREDSUBJECT:
+ return mail_build_thread_orderedsubject(default_from,
+ env_list, result, comp_func);
+
+ case MAIL_THREAD_NONE:
+ return mail_build_thread_none(default_from,
+ env_list, result, comp_func);
+
+ default:
+ return MAIL_ERROR_NOT_IMPLEMENTED;
+ }
+}