19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.List;
28 import java.util.UUID;
39 final class EmailMessageThreader {
41 private int bogus_id_count = 0;
43 private EmailMessageThreader(){}
45 public static void threadMessages(List<EmailMessage> emailMessages) {
46 EmailMessageThreader instance =
new EmailMessageThreader();
48 Map<String, EmailContainer> id_table = instance.createIDTable(emailMessages);
49 Set<EmailContainer> rootSet = instance.getRootSet(id_table);
51 instance.pruneEmptyContainers(rootSet);
53 Set<EmailContainer> finalRootSet = instance.groupBySubject(rootSet);
55 instance.assignThreadIDs(finalRootSet);
67 private Map<String, EmailContainer> createIDTable(List<EmailMessage> emailMessages) {
68 HashMap<String, EmailContainer> id_table =
new HashMap<>();
70 for (EmailMessage message : emailMessages) {
71 String messageID = message.getMessageID();
74 EmailContainer container = id_table.get(messageID);
77 if (container != null) {
81 if (container.hasMessage()) {
82 messageID = String.format(
"<Bogus-id: %d >", bogus_id_count++);
85 container.setMessage(message);
89 if (container == null) {
90 container =
new EmailContainer(message);
91 id_table.put(messageID, container);
94 processMessageReferences(message, container, id_table);
108 void processMessageReferences(EmailMessage message, EmailContainer container, Map<String, EmailContainer> id_table) {
109 List<String> referenceList = message.getReferences();
112 String inReplyToID = message.getInReplyToID();
113 if (inReplyToID != null && !inReplyToID.isEmpty()) {
114 if (referenceList == null) {
115 referenceList =
new ArrayList<>();
118 referenceList.add(inReplyToID);
122 if (referenceList == null) {
126 EmailContainer parent_ref = null;
129 for (String refID : referenceList) {
132 ref = id_table.get(refID);
135 ref =
new EmailContainer();
136 id_table.put(refID, ref);
140 if (parent_ref != null
142 && !parent_ref.equals(ref)
143 && !parent_ref.isChild(ref)) {
144 ref.setParent(parent_ref);
145 parent_ref.addChild(ref);
153 if (parent_ref != null
154 && (parent_ref.equals(container)
155 || container.isChild(parent_ref))) {
163 if (container.hasParent()) {
164 container.getParent().removeChild(container);
165 container.setParent(null);
168 if (parent_ref != null) {
169 container.setParent(container);
170 parent_ref.addChild(container);
182 Set<EmailContainer> getRootSet(Map<?, EmailContainer> id_table) {
183 HashSet<EmailContainer> rootSet =
new HashSet<>();
185 id_table.values().stream().filter((container)
186 -> (!container.hasParent())).forEachOrdered((container) -> {
187 rootSet.add(container);
199 void pruneEmptyContainers(Set<EmailContainer> containerSet) {
200 Set<EmailContainer> containersToRemove =
new HashSet<>();
201 containerSet.forEach((container) -> {
202 if (!container.hasMessage() && !container.hasChildren()) {
203 containersToRemove.add(container);
205 pruneChildren(container);
209 containerSet.removeAll(containersToRemove);
220 boolean pruneChildren(EmailContainer parent) {
221 if (parent == null) {
225 Set<EmailContainer> children = parent.getChildren();
227 if (children == null) {
231 EmailContainer grandParent = parent.getParent();
232 Set<EmailContainer>
remove =
new HashSet<>();
233 Set<EmailContainer> add =
new HashSet<>();
234 for (EmailContainer child : parent.getChildren()) {
235 if (pruneChildren(child)) {
237 add.addAll(child.getChildren());
238 child.setParent(null);
239 child.clearChildren();
244 parent.addChildren(add);
245 parent.removeChildren(
remove);
247 if (!parent.hasMessage() && grandParent != null) {
248 children.forEach((child) -> {
249 child.setParent(grandParent);
275 Set<EmailContainer> groupBySubject(Set<EmailContainer> rootSet) {
276 Map<String, EmailContainer> subject_table = createSubjectTable(rootSet);
278 Set<EmailContainer> finalSet =
new HashSet<>();
280 for (EmailContainer rootSetContainer : rootSet) {
281 String rootSubject = rootSetContainer.getSimplifiedSubject();
283 EmailContainer tableContainer = subject_table.get(rootSubject);
284 if (tableContainer == null || tableContainer.equals(rootSetContainer)) {
285 finalSet.add(rootSetContainer);
290 if (tableContainer.getMessage() == null && rootSetContainer.getMessage() == null) {
291 tableContainer.addChildren(rootSetContainer.getChildren());
292 rootSetContainer.clearChildren();
298 if ((tableContainer.getMessage() == null && rootSetContainer.getMessage() != null)
299 || (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null)) {
301 if (tableContainer.getMessage() == null) {
302 tableContainer.addChild(rootSetContainer);
305 rootSetContainer.addChild(tableContainer);
306 subject_table.remove(rootSubject, tableContainer);
307 subject_table.put(rootSubject, rootSetContainer);
309 finalSet.add(rootSetContainer);
318 if (tableContainer.getMessage() != null
319 && !tableContainer.isReplySubject()
320 && rootSetContainer.isReplySubject()) {
321 tableContainer.addChild(rootSetContainer);
328 if (tableContainer.getMessage() != null
329 && tableContainer.isReplySubject()
330 && !rootSetContainer.isReplySubject()) {
331 rootSetContainer.addChild(tableContainer);
332 subject_table.put(rootSubject, rootSetContainer);
333 finalSet.add(rootSetContainer);
340 EmailContainer newParent =
new EmailContainer();
341 newParent.addChild(tableContainer);
342 newParent.addChild(rootSetContainer);
343 subject_table.remove(rootSubject, tableContainer);
344 subject_table.put(rootSubject, newParent);
346 finalSet.add(newParent);
359 Map<String, EmailContainer> createSubjectTable(Set<EmailContainer> rootSet) {
360 HashMap<String, EmailContainer> subject_table =
new HashMap<>();
362 for (EmailContainer rootSetContainer : rootSet) {
364 boolean reSubject =
false;
366 if (rootSetContainer.hasMessage()) {
367 subject = rootSetContainer.getMessage().getSimplifiedSubject();
368 reSubject = rootSetContainer.getMessage().isReplySubject();
369 }
else if (rootSetContainer.hasChildren()) {
370 Iterator<EmailContainer> childrenIterator = rootSetContainer.getChildren().iterator();
371 while (childrenIterator.hasNext()) {
372 EmailMessage childMessage = childrenIterator.next().getMessage();
373 if (childMessage != null) {
374 subject = childMessage.getSimplifiedSubject();
375 if (!subject.isEmpty()) {
376 reSubject = childMessage.isReplySubject();
383 if (subject == null || subject.isEmpty()) {
387 EmailContainer tableContainer = subject_table.get(subject);
395 if (tableContainer == null ||
396 (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null) ||
397 (!reSubject && (tableContainer.getMessage() != null && tableContainer.getMessage().isReplySubject()))) {
398 subject_table.put(subject, rootSetContainer);
403 return subject_table;
417 private void assignThreadIDs(Set<EmailContainer> containerSet) {
418 for(EmailContainer container: containerSet) {
420 String threadID = UUID.randomUUID().toString();
422 addThreadID(container, threadID);
434 private void addThreadID(EmailContainer container, String threadID) {
435 if(container == null) {
439 EmailMessage message = container.getMessage();
440 if(message != null) {
441 message.setMessageThreadID(threadID);
444 if(container.hasChildren()) {
445 for(EmailContainer child: container.getChildren()) {
446 addThreadID(child, threadID);
455 final class EmailContainer {
457 private EmailMessage message;
458 private EmailContainer parent;
459 private Set<EmailContainer> children;
474 EmailContainer(EmailMessage message) {
475 this.message = message;
483 EmailMessage getMessage() {
492 void setMessage(EmailMessage message) {
493 this.message = message;
501 boolean hasMessage() {
502 return message != null;
512 String getSimplifiedSubject() {
514 if (message != null) {
515 subject = message.getSimplifiedSubject();
516 }
else if (children != null) {
517 for (EmailContainer child : children) {
518 if (child.hasMessage()) {
519 subject = child.getSimplifiedSubject();
522 if (subject != null && !subject.isEmpty()) {
537 boolean isReplySubject() {
538 if (message != null) {
539 return message.isReplySubject();
540 }
else if (children != null) {
541 for (EmailContainer child : children) {
542 if (child.hasMessage()) {
543 boolean isReply = child.isReplySubject();
560 EmailContainer getParent() {
569 void setParent(EmailContainer container) {
578 boolean hasParent() {
579 return parent != null;
589 boolean addChild(EmailContainer child) {
590 if (children == null) {
591 children =
new HashSet<>();
594 return children.add(child);
607 boolean addChildren(Set<EmailContainer> children) {
608 if (children == null || children.isEmpty()) {
612 if (this.children == null) {
613 this.children =
new HashSet<>();
616 return this.children.addAll(children);
628 boolean removeChildren(Set<EmailContainer> children) {
629 if (children != null) {
630 return this.children.removeAll(children);
640 void clearChildren() {
641 if( children != null ) {
654 boolean removeChild(EmailContainer child) {
655 if(children != null) {
656 return children.remove(child);
667 boolean hasChildren() {
668 return children != null && !children.isEmpty();
676 Set<EmailContainer> getChildren() {
689 boolean isChild(EmailContainer container) {
690 return isChild(container,
new HashSet<>());
703 private boolean isChild(EmailContainer container, Set<EmailContainer> processedContainers) {
704 processedContainers.add(
this);
705 if (children == null || children.isEmpty()) {
707 }
else if (children.contains(container)) {
710 for (EmailContainer child : children) {
713 if ((!processedContainers.contains(child)) && child.isChild(container, processedContainers)) {