19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
39 import org.apache.james.mime4j.dom.Message;
40 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
41 import org.apache.james.mime4j.mboxiterator.MboxIterator;
42 import org.apache.tika.parser.txt.CharsetDetector;
43 import org.apache.tika.parser.txt.CharsetMatch;
44 import org.apache.commons.validator.routines.EmailValidator;
45 import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
46 import org.openide.util.NbBundle;
52 class MboxParser
extends MimeJ4MessageParser implements Iterator<EmailMessage> {
54 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
56 private Iterator<EmailMessage> emailIterator = null;
58 private MboxIterator mboxIterable;
60 private MboxParser(String localPath) {
61 setLocalPath(localPath);
64 static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
65 String mboxHeaderLine =
new String(buffer);
66 if (mboxHeaderLine.startsWith(
"From ")) {
67 String mimeType = abstractFile.getMIMEType();
70 if (mimeType == null || mimeType.isEmpty()) {
71 FileTypeDetector fileTypeDetector = null;
73 fileTypeDetector =
new FileTypeDetector();
74 }
catch (FileTypeDetector.FileTypeDetectorInitException ex) {
75 logger.log(Level.WARNING, String.format(
"Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
78 mimeType = fileTypeDetector.getMIMEType(abstractFile);
80 if (mimeType.equalsIgnoreCase(
"application/mbox")) {
97 static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
98 MboxParser parser =
new MboxParser(localPath);
99 parser.createIterator(mboxFile, 0,
false);
113 static MboxParser getEmailIterator(String localPath, File mboxFile,
long fileID) {
114 MboxParser parser =
new MboxParser(localPath);
115 parser.createIterator(mboxFile, fileID,
true);
128 private void createIterator(File mboxFile,
long fileID,
boolean wholeMsg) {
130 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
134 for (CharsetEncoder encoder : encoders) {
136 mboxIterable = MboxIterator
139 .fromLine(
"^From .*\r?\n")
140 .charset(encoder.charset())
142 if (mboxIterable != null) {
143 emailIterator =
new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
146 }
catch (CharConversionException | UnsupportedCharsetException ex) {
148 }
catch (IllegalArgumentException ex) {
150 }
catch (IOException ex) {
151 logger.log(Level.WARNING, String.format(
"Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex);
152 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
158 public boolean hasNext() {
159 return emailIterator != null && emailIterator.hasNext();
163 public EmailMessage next() {
164 return emailIterator != null ? emailIterator.next() : null;
168 public void close() throws IOException{
169 if(mboxIterable != null) {
170 mboxIterable.close();
182 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
184 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
186 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
187 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
188 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
189 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
190 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
191 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
194 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
195 }
catch (FileNotFoundException ex) {
196 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
197 return possibleEncoders;
201 CharsetDetector detector =
new CharsetDetector();
202 detector.setText(is);
203 CharsetMatch[] matches = detector.detectAll();
204 for (CharsetMatch match : matches) {
206 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
207 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
211 return possibleEncoders;
212 }
catch (IOException | IllegalArgumentException ex) {
213 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
214 return possibleEncoders;
218 }
catch (IOException ex) {
219 logger.log(Level.WARNING,
"Failed to close input stream");
227 final class MBoxEmailIterator
implements Iterator<EmailMessage> {
229 private final Iterator<CharBufferWrapper> mboxIterator;
230 private final CharsetEncoder encoder;
231 private final long fileID;
232 private final boolean wholeMsg;
234 MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder,
long fileID,
boolean wholeMsg) {
235 mboxIterator = mboxIter;
236 this.encoder = encoder;
237 this.fileID = fileID;
238 this.wholeMsg = wholeMsg;
242 public boolean hasNext() {
243 return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
247 public EmailMessage next() {
248 CharBufferWrapper messageBuffer = mboxIterator.next();
251 Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
253 return extractEmail(msg, getLocalPath(), fileID);
255 return extractPartialEmail(msg);
257 }
catch (RuntimeException | IOException ex) {
258 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());