Autopsy  4.17.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
39 import org.apache.james.mime4j.dom.Message;
40 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
41 import org.apache.james.mime4j.mboxiterator.MboxIterator;
42 import org.apache.tika.parser.txt.CharsetDetector;
43 import org.apache.tika.parser.txt.CharsetMatch;
44 import org.apache.commons.validator.routines.EmailValidator;
45 import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
46 import org.openide.util.NbBundle;
47 import org.sleuthkit.datamodel.AbstractFile;
48 
52 class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
53 
54  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
55 
56  private Iterator<EmailMessage> emailIterator = null;
57 
58  private MboxIterator mboxIterable;
59 
60  private MboxParser(String localPath) {
61  setLocalPath(localPath);
62  }
63 
64  static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
65  String mboxHeaderLine = new String(buffer);
66  if (mboxHeaderLine.startsWith("From ")) {
67  String mimeType = abstractFile.getMIMEType();
68 
69  // if it is not present, attempt to use the FileTypeDetector to determine
70  if (mimeType == null || mimeType.isEmpty()) {
71  FileTypeDetector fileTypeDetector = null;
72  try {
73  fileTypeDetector = new FileTypeDetector();
74  } catch (FileTypeDetector.FileTypeDetectorInitException ex) {
75  logger.log(Level.WARNING, String.format("Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
76  return false;
77  }
78  mimeType = fileTypeDetector.getMIMEType(abstractFile);
79  }
80  if (mimeType.equalsIgnoreCase("application/mbox")) {
81  return true;
82  }
83  }
84  return false; //NON-NLS
85  }
86 
97  static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
98  MboxParser parser = new MboxParser(localPath);
99  parser.createIterator(mboxFile, 0, false);
100  return parser;
101  }
102 
113  static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
114  MboxParser parser = new MboxParser(localPath);
115  parser.createIterator(mboxFile, fileID, true);
116 
117  return parser;
118  }
119 
128  private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
129  // Detect possible charsets
130  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
131 
132  // Loop through the possible encoders and find the first one that works.
133  // That will usually be one of the first ones.
134  for (CharsetEncoder encoder : encoders) {
135  try {
136  mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
137  if (mboxIterable != null) {
138  emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
139  }
140  break;
141  } catch (CharConversionException | UnsupportedCharsetException ex) {
142  // Not the right encoder
143  } catch (IllegalArgumentException ex) {
144  // Not the right encoder
145  } catch (IOException ex) {
146  logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
147  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
148  }
149  }
150  }
151 
152  @Override
153  public boolean hasNext() {
154  return emailIterator != null && emailIterator.hasNext();
155  }
156 
157  @Override
158  public EmailMessage next() {
159  return emailIterator != null ? emailIterator.next() : null;
160  }
161 
162  @Override
163  public void close() throws Exception {
164  if(mboxIterable != null) {
165  mboxIterable.close();
166  }
167  }
168 
177  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
178  InputStream is;
179  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
180 
181  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
182  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
183  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
184  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
185  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
186  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
187 
188  try {
189  is = new BufferedInputStream(new FileInputStream(mboxFile));
190  } catch (FileNotFoundException ex) {
191  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
192  return possibleEncoders;
193  }
194 
195  try {
196  CharsetDetector detector = new CharsetDetector();
197  detector.setText(is);
198  CharsetMatch[] matches = detector.detectAll();
199  for (CharsetMatch match : matches) {
200  try {
201  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
202  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
203  // Don't add unsupported charsets to the list
204  }
205  }
206  return possibleEncoders;
207  } catch (IOException | IllegalArgumentException ex) {
208  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
209  return possibleEncoders;
210  } finally {
211  try {
212  is.close();
213  } catch (IOException ex) {
214  logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
215  }
216  }
217  }
218 
222  final class MBoxEmailIterator implements Iterator<EmailMessage> {
223 
224  private final Iterator<CharBufferWrapper> mboxIterator;
225  private final CharsetEncoder encoder;
226  private final long fileID;
227  private final boolean wholeMsg;
228 
229  MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
230  mboxIterator = mboxIter;
231  this.encoder = encoder;
232  this.fileID = fileID;
233  this.wholeMsg = wholeMsg;
234  }
235 
236  @Override
237  public boolean hasNext() {
238  return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
239  }
240 
241  @Override
242  public EmailMessage next() {
243  CharBufferWrapper messageBuffer = mboxIterator.next();
244 
245  try {
246  Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
247  if (wholeMsg) {
248  return extractEmail(msg, getLocalPath(), fileID);
249  } else {
250  return extractPartialEmail(msg);
251  }
252  } catch (RuntimeException | IOException ex) {
253  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
254  }
255  return null;
256  }
257 
258  }
259 }

Copyright © 2012-2021 Basis Technology. Generated on: Tue Jan 19 2021
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.