19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
38 import org.apache.james.mime4j.dom.Message;
39 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
40 import org.apache.james.mime4j.mboxiterator.MboxIterator;
41 import org.apache.tika.parser.txt.CharsetDetector;
42 import org.apache.tika.parser.txt.CharsetMatch;
43 import org.openide.util.NbBundle;
48 class MboxParser
extends MimeJ4MessageParser implements Iterator<EmailMessage> {
50 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
52 private Iterator<EmailMessage> emailIterator = null;
54 private MboxParser(String localPath) {
55 setLocalPath(localPath);
58 static boolean isValidMimeTypeMbox(byte[] buffer) {
59 return (
new String(buffer)).startsWith(
"From ");
72 static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
73 MboxParser parser =
new MboxParser(localPath);
74 parser.createIterator(mboxFile, 0,
false);
88 static MboxParser getEmailIterator(String localPath, File mboxFile,
long fileID) {
89 MboxParser parser =
new MboxParser(localPath);
90 parser.createIterator(mboxFile, fileID,
true);
103 private void createIterator(File mboxFile,
long fileID,
boolean wholeMsg) {
105 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
109 for (CharsetEncoder encoder : encoders) {
111 Iterable<CharBufferWrapper> mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
112 if (mboxIterable != null) {
113 emailIterator =
new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
116 }
catch (CharConversionException | UnsupportedCharsetException ex) {
118 }
catch (IllegalArgumentException ex) {
120 }
catch (IOException ex) {
121 logger.log(Level.WARNING, String.format(
"Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex);
122 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
128 public boolean hasNext() {
129 return emailIterator != null && emailIterator.hasNext();
133 public EmailMessage next() {
134 return emailIterator != null ? emailIterator.next() : null;
145 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
147 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
149 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
150 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
151 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
152 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
153 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
154 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
157 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
158 }
catch (FileNotFoundException ex) {
159 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
160 return possibleEncoders;
164 CharsetDetector detector =
new CharsetDetector();
165 detector.setText(is);
166 CharsetMatch[] matches = detector.detectAll();
167 for (CharsetMatch match : matches) {
169 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
170 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
174 return possibleEncoders;
175 }
catch (IOException | IllegalArgumentException ex) {
176 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
177 return possibleEncoders;
181 }
catch (IOException ex) {
182 logger.log(Level.WARNING,
"Failed to close input stream");
190 final class MBoxEmailIterator
implements Iterator<EmailMessage> {
192 private final Iterator<CharBufferWrapper> mboxIterator;
193 private final CharsetEncoder encoder;
194 private final long fileID;
195 private final boolean wholeMsg;
197 MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder,
long fileID,
boolean wholeMsg) {
198 mboxIterator = mboxIter;
199 this.encoder = encoder;
200 this.fileID = fileID;
201 this.wholeMsg = wholeMsg;
205 public boolean hasNext() {
206 return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
210 public EmailMessage next() {
211 CharBufferWrapper messageBuffer = mboxIterator.next();
214 Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
216 return extractEmail(msg, getLocalPath(), fileID);
218 return extractPartialEmail(msg);
220 }
catch (RuntimeException | IOException ex) {
221 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());