19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
39 import org.apache.james.mime4j.dom.Message;
40 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
41 import org.apache.james.mime4j.mboxiterator.MboxIterator;
42 import org.apache.tika.parser.txt.CharsetDetector;
43 import org.apache.tika.parser.txt.CharsetMatch;
44 import org.apache.commons.validator.routines.EmailValidator;
45 import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
46 import org.openide.util.NbBundle;
52 class MboxParser
extends MimeJ4MessageParser implements Iterator<EmailMessage> {
54 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
56 private Iterator<EmailMessage> emailIterator = null;
58 private MboxIterator mboxIterable;
60 private MboxParser(String localPath) {
61 setLocalPath(localPath);
64 static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
65 String mboxHeaderLine =
new String(buffer);
66 if (mboxHeaderLine.startsWith(
"From ")) {
67 String mimeType = abstractFile.getMIMEType();
70 if (mimeType == null || mimeType.isEmpty()) {
71 FileTypeDetector fileTypeDetector = null;
73 fileTypeDetector =
new FileTypeDetector();
74 }
catch (FileTypeDetector.FileTypeDetectorInitException ex) {
75 logger.log(Level.WARNING, String.format(
"Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
78 mimeType = fileTypeDetector.getMIMEType(abstractFile);
80 if (mimeType.equalsIgnoreCase(
"application/mbox")) {
97 static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
98 MboxParser parser =
new MboxParser(localPath);
99 parser.createIterator(mboxFile, 0,
false);
113 static MboxParser getEmailIterator(String localPath, File mboxFile,
long fileID) {
114 MboxParser parser =
new MboxParser(localPath);
115 parser.createIterator(mboxFile, fileID,
true);
128 private void createIterator(File mboxFile,
long fileID,
boolean wholeMsg) {
130 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
134 for (CharsetEncoder encoder : encoders) {
136 mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
137 if (mboxIterable != null) {
138 emailIterator =
new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
141 }
catch (CharConversionException | UnsupportedCharsetException ex) {
143 }
catch (IllegalArgumentException ex) {
145 }
catch (IOException ex) {
146 logger.log(Level.WARNING, String.format(
"Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex);
147 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
153 public boolean hasNext() {
154 return emailIterator != null && emailIterator.hasNext();
158 public EmailMessage next() {
159 return emailIterator != null ? emailIterator.next() : null;
163 public void close() throws IOException{
164 if(mboxIterable != null) {
165 mboxIterable.close();
177 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
179 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
181 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
182 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
183 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
184 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
185 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
186 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
189 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
190 }
catch (FileNotFoundException ex) {
191 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
192 return possibleEncoders;
196 CharsetDetector detector =
new CharsetDetector();
197 detector.setText(is);
198 CharsetMatch[] matches = detector.detectAll();
199 for (CharsetMatch match : matches) {
201 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
202 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
206 return possibleEncoders;
207 }
catch (IOException | IllegalArgumentException ex) {
208 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
209 return possibleEncoders;
213 }
catch (IOException ex) {
214 logger.log(Level.WARNING,
"Failed to close input stream");
222 final class MBoxEmailIterator
implements Iterator<EmailMessage> {
224 private final Iterator<CharBufferWrapper> mboxIterator;
225 private final CharsetEncoder encoder;
226 private final long fileID;
227 private final boolean wholeMsg;
229 MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder,
long fileID,
boolean wholeMsg) {
230 mboxIterator = mboxIter;
231 this.encoder = encoder;
232 this.fileID = fileID;
233 this.wholeMsg = wholeMsg;
237 public boolean hasNext() {
238 return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
242 public EmailMessage next() {
243 CharBufferWrapper messageBuffer = mboxIterator.next();
246 Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
248 return extractEmail(msg, getLocalPath(), fileID);
250 return extractPartialEmail(msg);
252 }
catch (RuntimeException | IOException ex) {
253 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());