19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
71 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
72 private DefaultMessageBuilder messageBuilder;
73 private IngestServices services;
74 private StringBuilder errors;
79 private static final String HTML_TYPE =
"text/html";
84 private String localPath;
86 MboxParser(IngestServices services, String localPath) {
87 this.services = services;
88 this.localPath = localPath;
89 messageBuilder =
new DefaultMessageBuilder();
90 MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
92 messageBuilder.setMimeEntityConfig(config);
93 errors =
new StringBuilder();
96 static boolean isValidMimeTypeMbox(byte[] buffer) {
97 return (
new String(buffer)).startsWith(
"From ");
107 List<EmailMessage> parse(File mboxFile,
long fileID) {
109 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
111 CharsetEncoder theEncoder = null;
112 Iterable<CharBufferWrapper> mboxIterator = null;
115 for (CharsetEncoder encoder : encoders) {
117 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
118 theEncoder = encoder;
120 }
catch (CharConversionException | UnsupportedCharsetException ex) {
122 }
catch (IllegalArgumentException ex) {
124 }
catch (IOException ex) {
125 logger.log(Level.WARNING,
"couldn't find mbox file.", ex);
126 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
127 return new ArrayList<>();
132 if (mboxIterator == null || theEncoder == null) {
133 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.couldntFindCharset"));
134 return new ArrayList<>();
137 List<EmailMessage> emails =
new ArrayList<>();
141 for (CharBufferWrapper message : mboxIterator) {
143 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
144 emails.add(extractEmail(msg, fileID));
145 }
catch (RuntimeException | IOException ex) {
146 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());
153 NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
159 return errors.toString();
170 private EmailMessage extractEmail(Message msg,
long fileID) {
171 EmailMessage email =
new EmailMessage();
173 email.setSender(getAddresses(msg.getFrom()));
174 email.setRecipients(getAddresses(msg.getTo()));
175 email.setBcc(getAddresses(msg.getBcc()));
176 email.setCc(getAddresses(msg.getCc()));
177 email.setSubject(msg.getSubject());
178 email.setSentDate(msg.getDate());
179 email.setLocalPath(localPath);
182 if (msg.isMultipart()) {
183 handleMultipart(email, (Multipart) msg.getBody(), fileID);
185 handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
199 private void handleMultipart(EmailMessage email, Multipart multi,
long fileID) {
200 List<Entity> entities = multi.getBodyParts();
201 for (
int index = 0; index < entities.size(); index++) {
202 Entity e = entities.get(index);
203 if (e.isMultipart()) {
204 handleMultipart(email, (Multipart) e.getBody(), fileID);
205 }
else if (e.getDispositionType() != null
206 && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
207 handleAttachment(email, e, fileID, index);
208 }
else if (e.getMimeType().equals(HTML_TYPE)
209 || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
210 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
227 private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
230 r =
new BufferedReader(tb.getReader());
231 StringBuilder bodyString =
new StringBuilder();
233 while ((line = r.readLine()) != null) {
234 bodyString.append(line).append(
"\n");
236 bodyString.append(
"\n-----HEADERS-----\n");
237 for(Field field: fields) {
238 String nextLine = field.getName() +
": " + field.getBody();
239 bodyString.append(
"\n").append(nextLine);
241 bodyString.append(
"\n\n---END HEADERS--\n\n");
244 case ContentTypeField.TYPE_TEXT_PLAIN:
245 email.setTextBody(bodyString.toString());
248 email.setHtmlBody(bodyString.toString());
254 }
catch (IOException ex) {
255 logger.log(Level.WARNING,
"Error getting text body of mbox message", ex);
266 private void handleAttachment(EmailMessage email, Entity e,
long fileID,
int index) {
267 String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
268 String filename = e.getFilename();
272 filename = filename.replaceAll(
"\\?",
"_");
273 filename = filename.replaceAll(
"<",
"_");
274 filename = filename.replaceAll(
">",
"_");
275 filename = filename.replaceAll(
":",
"_");
276 filename = filename.replaceAll(
"\"",
"_");
277 filename = filename.replaceAll(
"/",
"_");
278 filename = filename.replaceAll(
"\\\\",
"_");
279 filename = filename.replaceAll(
"|",
"_");
280 filename = filename.replaceAll(
"\\*",
"_");
284 if (filename.length() > 64) {
285 filename = UUID.randomUUID().toString();
288 String uniqueFilename = fileID +
"-" + index +
"-" + email.getSentDate() +
"-" + filename;
289 String outPath = outputDirPath + uniqueFilename;
290 EncodedFileOutputStream fos;
293 fos =
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
294 }
catch (IOException ex) {
296 NbBundle.getMessage(
this.getClass(),
297 "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
298 logger.log(Level.INFO,
"Failed to create file output stream for: " + outPath, ex);
303 Body b = e.getBody();
304 if (b instanceof BinaryBody) {
310 }
catch (IOException ex) {
311 logger.log(Level.INFO,
"Failed to write mbox email attachment to disk.", ex);
312 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.handleAttch.failedWriteToDisk", filename));
317 }
catch (IOException ex) {
318 logger.log(Level.INFO,
"Failed to close file output stream", ex);
322 EmailMessage.Attachment attach =
new EmailMessage.Attachment();
323 attach.setName(filename);
324 attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
325 + File.separator + uniqueFilename);
326 attach.setSize(
new File(outPath).length());
327 attach.setEncodingType(TskData.EncodingType.XOR1);
328 email.addAttachment(attach);
339 private String getAddresses(MailboxList mailboxList) {
340 if (mailboxList == null) {
343 StringBuilder addresses =
new StringBuilder();
344 for (Mailbox m : mailboxList) {
345 addresses.append(m.toString()).append(
"; ");
347 return addresses.toString();
358 private String getAddresses(AddressList addressList) {
359 return (addressList == null) ?
"" : getAddresses(addressList.flatten());
370 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
372 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
374 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
375 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
376 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
377 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
378 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
379 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
382 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
383 }
catch (FileNotFoundException ex) {
384 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
385 return possibleEncoders;
389 CharsetDetector detector =
new CharsetDetector();
390 detector.setText(is);
391 CharsetMatch[] matches = detector.detectAll();
392 for (CharsetMatch match : matches) {
394 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
395 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
399 return possibleEncoders;
400 }
catch (IOException | IllegalArgumentException ex) {
401 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
402 return possibleEncoders;
406 }
catch (IOException ex) {
407 logger.log(Level.INFO,
"Failed to close input stream");
412 private void addErrorMessage(String msg) {
413 errors.append(
"<li>").append(msg).append(
"</li>");