19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
69 private static final Logger logger = Logger.
getLogger(MboxParser.class.getName());
70 private DefaultMessageBuilder messageBuilder;
71 private IngestServices services;
72 private StringBuilder errors;
77 private static final String HTML_TYPE =
"text/html";
82 private String localPath;
84 MboxParser(IngestServices services, String localPath) {
85 this.services = services;
86 this.localPath = localPath;
87 messageBuilder =
new DefaultMessageBuilder();
88 MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
90 messageBuilder.setMimeEntityConfig(config);
91 errors =
new StringBuilder();
94 static boolean isValidMimeTypeMbox(byte[] buffer) {
95 return (
new String(buffer)).startsWith(
"From ");
105 List<EmailMessage> parse(File mboxFile,
long fileID) {
107 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
109 CharsetEncoder theEncoder = null;
110 Iterable<CharBufferWrapper> mboxIterator = null;
113 for (CharsetEncoder encoder : encoders) {
115 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
116 theEncoder = encoder;
118 }
catch (CharConversionException | UnsupportedCharsetException ex) {
120 }
catch (IllegalArgumentException ex) {
122 }
catch (IOException ex) {
123 logger.log(Level.WARNING,
"couldn't find mbox file.", ex);
124 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
125 return new ArrayList<>();
130 if (mboxIterator == null || theEncoder == null) {
131 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.couldntFindCharset"));
132 return new ArrayList<>();
135 List<EmailMessage> emails =
new ArrayList<>();
139 for (CharBufferWrapper message : mboxIterator) {
141 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
142 emails.add(extractEmail(msg, fileID));
143 }
catch (RuntimeException | IOException ex) {
144 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());
151 NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
157 return errors.toString();
168 private EmailMessage extractEmail(Message msg,
long fileID) {
169 EmailMessage email =
new EmailMessage();
171 email.setSender(getAddresses(msg.getFrom()));
172 email.setRecipients(getAddresses(msg.getTo()));
173 email.setBcc(getAddresses(msg.getBcc()));
174 email.setCc(getAddresses(msg.getCc()));
175 email.setSubject(msg.getSubject());
176 email.setSentDate(msg.getDate());
177 email.setLocalPath(localPath);
180 if (msg.isMultipart()) {
181 handleMultipart(email, (Multipart) msg.getBody(), fileID);
183 handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
197 private void handleMultipart(EmailMessage email, Multipart multi,
long fileID) {
198 List<Entity> entities = multi.getBodyParts();
199 for (
int index = 0; index < entities.size(); index++) {
200 Entity e = entities.get(index);
201 if (e.isMultipart()) {
202 handleMultipart(email, (Multipart) e.getBody(), fileID);
203 }
else if (e.getDispositionType() != null
204 && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
205 handleAttachment(email, e, fileID, index);
206 }
else if (e.getMimeType().equals(HTML_TYPE)
207 || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
208 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
225 private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
228 r =
new BufferedReader(tb.getReader());
229 StringBuilder bodyString =
new StringBuilder();
231 while ((line = r.readLine()) != null) {
232 bodyString.append(line).append(
"\n");
234 bodyString.append(
"\n-----HEADERS-----\n");
235 for(Field field: fields) {
236 String nextLine = field.getName() +
": " + field.getBody();
237 bodyString.append(
"\n").append(nextLine);
239 bodyString.append(
"\n\n---END HEADERS--\n\n");
242 case ContentTypeField.TYPE_TEXT_PLAIN:
243 email.setTextBody(bodyString.toString());
246 email.setHtmlBody(bodyString.toString());
252 }
catch (IOException ex) {
253 logger.log(Level.WARNING,
"Error getting text body of mbox message", ex);
264 private void handleAttachment(EmailMessage email, Entity e,
long fileID,
int index) {
265 String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
266 String filename = e.getFilename();
270 filename = filename.replaceAll(
"\\?",
"_");
271 filename = filename.replaceAll(
"<",
"_");
272 filename = filename.replaceAll(
">",
"_");
273 filename = filename.replaceAll(
":",
"_");
274 filename = filename.replaceAll(
"\"",
"_");
275 filename = filename.replaceAll(
"/",
"_");
276 filename = filename.replaceAll(
"\\\\",
"_");
277 filename = filename.replaceAll(
"|",
"_");
278 filename = filename.replaceAll(
"\\*",
"_");
282 if (filename.length() > 64) {
283 filename = UUID.randomUUID().toString();
286 String uniqueFilename = fileID +
"-" + index +
"-" + email.getSentDate() +
"-" + filename;
287 String outPath = outputDirPath + uniqueFilename;
288 FileOutputStream fos;
291 fos =
new FileOutputStream(outPath);
292 }
catch (FileNotFoundException ex) {
294 NbBundle.getMessage(
this.getClass(),
295 "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
296 logger.log(Level.INFO,
"Failed to create file output stream for: " + outPath, ex);
301 Body b = e.getBody();
302 if (b instanceof BinaryBody) {
308 }
catch (IOException ex) {
309 logger.log(Level.INFO,
"Failed to write mbox email attachment to disk.", ex);
310 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.handleAttch.failedWriteToDisk", filename));
315 }
catch (IOException ex) {
316 logger.log(Level.INFO,
"Failed to close file output stream", ex);
320 EmailMessage.Attachment attach =
new EmailMessage.Attachment();
321 attach.setName(filename);
322 attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
323 + File.separator + uniqueFilename);
324 attach.setSize(
new File(outPath).length());
325 email.addAttachment(attach);
336 private String getAddresses(MailboxList mailboxList) {
337 if (mailboxList == null) {
340 StringBuilder addresses =
new StringBuilder();
341 for (Mailbox m : mailboxList) {
342 addresses.append(m.toString()).append(
"; ");
344 return addresses.toString();
355 private String getAddresses(AddressList addressList) {
356 return (addressList == null) ?
"" : getAddresses(addressList.flatten());
367 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
369 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
371 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
372 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
373 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
374 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
375 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
376 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
379 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
380 }
catch (FileNotFoundException ex) {
381 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
382 return possibleEncoders;
386 CharsetDetector detector =
new CharsetDetector();
387 detector.setText(is);
388 CharsetMatch[] matches = detector.detectAll();
389 for (CharsetMatch match : matches) {
391 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
392 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
396 return possibleEncoders;
397 }
catch (IOException | IllegalArgumentException ex) {
398 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
399 return possibleEncoders;
403 }
catch (IOException ex) {
404 logger.log(Level.INFO,
"Failed to close input stream");
409 private void addErrorMessage(String msg) {
410 errors.append(
"<li>").append(msg).append(
"</li>");
Logger getLogger(String moduleDisplayName)