Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
62 
69 class MboxParser {
70 
71  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
72  private DefaultMessageBuilder messageBuilder;
73  private IngestServices services;
74  private StringBuilder errors;
75 
79  private static final String HTML_TYPE = "text/html"; //NON-NLS
80 
84  private String localPath;
85 
86  MboxParser(IngestServices services, String localPath) {
87  this.services = services;
88  this.localPath = localPath;
89  messageBuilder = new DefaultMessageBuilder();
90  MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
91  // disable line length checks.
92  messageBuilder.setMimeEntityConfig(config);
93  errors = new StringBuilder();
94  }
95 
96  static boolean isValidMimeTypeMbox(byte[] buffer) {
97  return (new String(buffer)).startsWith("From "); //NON-NLS
98  }
99 
107  List<EmailMessage> parse(File mboxFile, long fileID) {
108  // Detect possible charsets
109  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
110 
111  CharsetEncoder theEncoder = null;
112  Iterable<CharBufferWrapper> mboxIterator = null;
113  // Loop through the possible encoders and find the first one that works.
114  // That will usually be one of the first ones.
115  for (CharsetEncoder encoder : encoders) {
116  try {
117  mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
118  theEncoder = encoder;
119  break;
120  } catch (CharConversionException | UnsupportedCharsetException ex) {
121  // Not the right encoder
122  } catch (IllegalArgumentException ex) {
123  // Not the right encoder
124  } catch (IOException ex) {
125  logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
126  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
127  return new ArrayList<>();
128  }
129  }
130 
131  // If no encoders work, post an error message and return.
132  if (mboxIterator == null || theEncoder == null) {
133  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
134  return new ArrayList<>();
135  }
136 
137  List<EmailMessage> emails = new ArrayList<>();
138  long failCount = 0;
139 
140  // Parse each message and extract an EmailMessage structure
141  for (CharBufferWrapper message : mboxIterator) {
142  try {
143  Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
144  emails.add(extractEmail(msg, fileID));
145  } catch (RuntimeException | IOException ex) {
146  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
147  failCount++;
148  }
149  }
150 
151  if (failCount > 0) {
152  addErrorMessage(
153  NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
154  }
155  return emails;
156  }
157 
158  String getErrors() {
159  return errors.toString();
160  }
161 
170  private EmailMessage extractEmail(Message msg, long fileID) {
171  EmailMessage email = new EmailMessage();
172  // Basic Info
173  email.setSender(getAddresses(msg.getFrom()));
174  email.setRecipients(getAddresses(msg.getTo()));
175  email.setBcc(getAddresses(msg.getBcc()));
176  email.setCc(getAddresses(msg.getCc()));
177  email.setSubject(msg.getSubject());
178  email.setSentDate(msg.getDate());
179  email.setLocalPath(localPath);
180 
181  // Body
182  if (msg.isMultipart()) {
183  handleMultipart(email, (Multipart) msg.getBody(), fileID);
184  } else {
185  handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
186  }
187 
188  return email;
189  }
190 
199  private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
200  List<Entity> entities = multi.getBodyParts();
201  for (int index = 0; index < entities.size(); index++) {
202  Entity e = entities.get(index);
203  if (e.isMultipart()) {
204  handleMultipart(email, (Multipart) e.getBody(), fileID);
205  } else if (e.getDispositionType() != null
206  && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
207  handleAttachment(email, e, fileID, index);
208  } else if (e.getMimeType().equals(HTML_TYPE)
209  || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
210  handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
211  } else {
212  // Ignore other types.
213  }
214  }
215  }
216 
227  private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
228  BufferedReader r;
229  try {
230  r = new BufferedReader(tb.getReader());
231  StringBuilder bodyString = new StringBuilder();
232  String line;
233  while ((line = r.readLine()) != null) {
234  bodyString.append(line).append("\n");
235  }
236  bodyString.append("\n-----HEADERS-----\n");
237  for(Field field: fields) {
238  String nextLine = field.getName() + ": " + field.getBody();
239  bodyString.append("\n").append(nextLine);
240  }
241  bodyString.append("\n\n---END HEADERS--\n\n");
242 
243  switch (type) {
244  case ContentTypeField.TYPE_TEXT_PLAIN:
245  email.setTextBody(bodyString.toString());
246  break;
247  case HTML_TYPE:
248  email.setHtmlBody(bodyString.toString());
249  break;
250  default:
251  // Not interested in other text types.
252  break;
253  }
254  } catch (IOException ex) {
255  logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
256  }
257  }
258 
266  private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
267  String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
268  String filename = e.getFilename();
269 
270  // sanitize name. Had an attachment with a Japanese encoded path that
271  // invalid characters and attachment could not be saved.
272  filename = filename.replaceAll("\\?", "_");
273  filename = filename.replaceAll("<", "_");
274  filename = filename.replaceAll(">", "_");
275  filename = filename.replaceAll(":", "_");
276  filename = filename.replaceAll("\"", "_");
277  filename = filename.replaceAll("/", "_");
278  filename = filename.replaceAll("\\\\", "_");
279  filename = filename.replaceAll("|", "_");
280  filename = filename.replaceAll("\\*", "_");
281 
282  // also had some crazy long names, so make random one if we get those.
283  // also from Japanese image that had encoded name
284  if (filename.length() > 64) {
285  filename = UUID.randomUUID().toString();
286  }
287 
288  String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
289  String outPath = outputDirPath + uniqueFilename;
290  EncodedFileOutputStream fos;
291  BinaryBody bb;
292  try {
293  fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
294  } catch (IOException ex) {
295  addErrorMessage(
296  NbBundle.getMessage(this.getClass(),
297  "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
298  logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
299  return;
300  }
301 
302  try {
303  Body b = e.getBody();
304  if (b instanceof BinaryBody) {
305  bb = (BinaryBody) b;
306  bb.writeTo(fos);
307  } else {
308  // This could potentially be other types. Only seen this once.
309  }
310  } catch (IOException ex) {
311  logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
312  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
313  return;
314  } finally {
315  try {
316  fos.close();
317  } catch (IOException ex) {
318  logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
319  }
320  }
321 
322  EmailMessage.Attachment attach = new EmailMessage.Attachment();
323  attach.setName(filename);
324  attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
325  + File.separator + uniqueFilename);
326  attach.setSize(new File(outPath).length());
327  attach.setEncodingType(TskData.EncodingType.XOR1);
328  email.addAttachment(attach);
329  }
330 
339  private String getAddresses(MailboxList mailboxList) {
340  if (mailboxList == null) {
341  return "";
342  }
343  StringBuilder addresses = new StringBuilder();
344  for (Mailbox m : mailboxList) {
345  addresses.append(m.toString()).append("; ");
346  }
347  return addresses.toString();
348  }
349 
358  private String getAddresses(AddressList addressList) {
359  return (addressList == null) ? "" : getAddresses(addressList.flatten());
360  }
361 
370  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
371  InputStream is;
372  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
373 
374  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
375  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
376  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
377  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
378  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
379  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
380 
381  try {
382  is = new BufferedInputStream(new FileInputStream(mboxFile));
383  } catch (FileNotFoundException ex) {
384  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
385  return possibleEncoders;
386  }
387 
388  try {
389  CharsetDetector detector = new CharsetDetector();
390  detector.setText(is);
391  CharsetMatch[] matches = detector.detectAll();
392  for (CharsetMatch match : matches) {
393  try {
394  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
395  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
396  // Don't add unsupported charsets to the list
397  }
398  }
399  return possibleEncoders;
400  } catch (IOException | IllegalArgumentException ex) {
401  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
402  return possibleEncoders;
403  } finally {
404  try {
405  is.close();
406  } catch (IOException ex) {
407  logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
408  }
409  }
410  }
411 
412  private void addErrorMessage(String msg) {
413  errors.append("<li>").append(msg).append("</li>"); //NON-NLS
414  }
415 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Apr 24 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.