Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
39 import org.apache.james.mime4j.dom.Message;
40 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
41 import org.apache.james.mime4j.mboxiterator.MboxIterator;
42 import org.apache.tika.parser.txt.CharsetDetector;
43 import org.apache.tika.parser.txt.CharsetMatch;
44 import org.apache.commons.validator.routines.EmailValidator;
45 import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
46 import org.openide.util.NbBundle;
47 import org.sleuthkit.datamodel.AbstractFile;
48 
52 class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
53 
54  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
55 
56  private Iterator<EmailMessage> emailIterator = null;
57 
58  private MboxIterator mboxIterable;
59 
60  private MboxParser(String localPath) {
61  setLocalPath(localPath);
62  }
63 
64  static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
65  String mboxHeaderLine = new String(buffer);
66  if (mboxHeaderLine.startsWith("From ")) {
67  String mimeType = abstractFile.getMIMEType();
68 
69  // if it is not present, attempt to use the FileTypeDetector to determine
70  if (mimeType == null || mimeType.isEmpty()) {
71  FileTypeDetector fileTypeDetector = null;
72  try {
73  fileTypeDetector = new FileTypeDetector();
74  } catch (FileTypeDetector.FileTypeDetectorInitException ex) {
75  logger.log(Level.WARNING, String.format("Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
76  return false;
77  }
78  mimeType = fileTypeDetector.getMIMEType(abstractFile);
79  }
80  if (mimeType.equalsIgnoreCase("application/mbox")) {
81  return true;
82  }
83  }
84  return false; //NON-NLS
85  }
86 
97  static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
98  MboxParser parser = new MboxParser(localPath);
99  parser.createIterator(mboxFile, 0, false);
100  return parser;
101  }
102 
113  static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
114  MboxParser parser = new MboxParser(localPath);
115  parser.createIterator(mboxFile, fileID, true);
116 
117  return parser;
118  }
119 
128  private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
129  // Detect possible charsets
130  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
131 
132  // Loop through the possible encoders and find the first one that works.
133  // That will usually be one of the first ones.
134  for (CharsetEncoder encoder : encoders) {
135  try {
136  mboxIterable = MboxIterator
137  .fromFile(mboxFile)
138  // use more permissive from line from mbox iterator 0.8.0, but handling CRLF/LF
139  .fromLine("^From .*\r?\n")
140  .charset(encoder.charset())
141  .build();
142  if (mboxIterable != null) {
143  emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
144  }
145  break;
146  } catch (CharConversionException | UnsupportedCharsetException ex) {
147  // Not the right encoder
148  } catch (IllegalArgumentException ex) {
149  // Not the right encoder
150  } catch (IOException ex) {
151  logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
152  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
153  }
154  }
155  }
156 
157  @Override
158  public boolean hasNext() {
159  return emailIterator != null && emailIterator.hasNext();
160  }
161 
162  @Override
163  public EmailMessage next() {
164  return emailIterator != null ? emailIterator.next() : null;
165  }
166 
167  @Override
168  public void close() throws IOException{
169  if(mboxIterable != null) {
170  mboxIterable.close();
171  }
172  }
173 
182  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
183  InputStream is;
184  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
185 
186  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
187  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
188  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
189  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
190  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
191  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
192 
193  try {
194  is = new BufferedInputStream(new FileInputStream(mboxFile));
195  } catch (FileNotFoundException ex) {
196  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
197  return possibleEncoders;
198  }
199 
200  try {
201  CharsetDetector detector = new CharsetDetector();
202  detector.setText(is);
203  CharsetMatch[] matches = detector.detectAll();
204  for (CharsetMatch match : matches) {
205  try {
206  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
207  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
208  // Don't add unsupported charsets to the list
209  }
210  }
211  return possibleEncoders;
212  } catch (IOException | IllegalArgumentException ex) {
213  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
214  return possibleEncoders;
215  } finally {
216  try {
217  is.close();
218  } catch (IOException ex) {
219  logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
220  }
221  }
222  }
223 
227  final class MBoxEmailIterator implements Iterator<EmailMessage> {
228 
229  private final Iterator<CharBufferWrapper> mboxIterator;
230  private final CharsetEncoder encoder;
231  private final long fileID;
232  private final boolean wholeMsg;
233 
234  MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
235  mboxIterator = mboxIter;
236  this.encoder = encoder;
237  this.fileID = fileID;
238  this.wholeMsg = wholeMsg;
239  }
240 
241  @Override
242  public boolean hasNext() {
243  return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
244  }
245 
246  @Override
247  public EmailMessage next() {
248  CharBufferWrapper messageBuffer = mboxIterator.next();
249 
250  try {
251  Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
252  if (wholeMsg) {
253  return extractEmail(msg, getLocalPath(), fileID);
254  } else {
255  return extractPartialEmail(msg);
256  }
257  } catch (RuntimeException | IOException ex) {
258  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
259  }
260  return null;
261  }
262 
263  }
264 }

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.