19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
72 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
73 private DefaultMessageBuilder messageBuilder;
74 private IngestServices services;
75 private StringBuilder errors;
80 private static final String HTML_TYPE =
"text/html";
85 private String localPath;
87 MboxParser(IngestServices services, String localPath) {
88 this.services = services;
89 this.localPath = localPath;
90 messageBuilder =
new DefaultMessageBuilder();
91 MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
93 messageBuilder.setMimeEntityConfig(config);
94 errors =
new StringBuilder();
97 static boolean isValidMimeTypeMbox(byte[] buffer) {
98 return (
new String(buffer)).startsWith(
"From ");
108 List<EmailMessage> parse(File mboxFile,
long fileID) {
110 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
112 CharsetEncoder theEncoder = null;
113 Iterable<CharBufferWrapper> mboxIterator = null;
116 for (CharsetEncoder encoder : encoders) {
118 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
119 theEncoder = encoder;
121 }
catch (CharConversionException | UnsupportedCharsetException ex) {
123 }
catch (IllegalArgumentException ex) {
125 }
catch (IOException ex) {
126 logger.log(Level.WARNING,
"couldn't find mbox file.", ex);
127 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
128 return new ArrayList<>();
133 if (mboxIterator == null || theEncoder == null) {
134 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.couldntFindCharset"));
135 return new ArrayList<>();
138 List<EmailMessage> emails =
new ArrayList<>();
142 for (CharBufferWrapper message : mboxIterator) {
144 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
145 emails.add(extractEmail(msg, fileID));
146 }
catch (RuntimeException | IOException ex) {
147 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());
154 NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
160 return errors.toString();
171 private EmailMessage extractEmail(Message msg,
long fileID) {
172 EmailMessage email =
new EmailMessage();
174 email.setSender(getAddresses(msg.getFrom()));
175 email.setRecipients(getAddresses(msg.getTo()));
176 email.setBcc(getAddresses(msg.getBcc()));
177 email.setCc(getAddresses(msg.getCc()));
178 email.setSubject(msg.getSubject());
179 email.setSentDate(msg.getDate());
180 email.setLocalPath(localPath);
183 if (msg.isMultipart()) {
184 handleMultipart(email, (Multipart) msg.getBody(), fileID);
186 handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
200 private void handleMultipart(EmailMessage email, Multipart multi,
long fileID) {
201 List<Entity> entities = multi.getBodyParts();
202 for (
int index = 0; index < entities.size(); index++) {
203 Entity e = entities.get(index);
204 if (e.isMultipart()) {
205 handleMultipart(email, (Multipart) e.getBody(), fileID);
206 }
else if (e.getDispositionType() != null
207 && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
208 handleAttachment(email, e, fileID, index);
209 }
else if (e.getMimeType().equals(HTML_TYPE)
210 || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
211 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
228 private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
231 r =
new BufferedReader(tb.getReader());
232 StringBuilder bodyString =
new StringBuilder();
233 StringBuilder headersString =
new StringBuilder();
235 while ((line = r.readLine()) != null) {
236 bodyString.append(line).append(
"\n");
239 headersString.append(
"\n-----HEADERS-----\n");
240 for(Field field: fields) {
241 String nextLine = field.getName() +
": " + field.getBody();
242 headersString.append(
"\n").append(nextLine);
244 headersString.append(
"\n\n---END HEADERS--\n\n");
246 email.setHeaders(headersString.toString());
249 case ContentTypeField.TYPE_TEXT_PLAIN:
250 email.setTextBody(bodyString.toString());
253 email.setHtmlBody(bodyString.toString());
259 }
catch (IOException ex) {
260 logger.log(Level.WARNING,
"Error getting text body of mbox message", ex);
271 @NbBundle.Messages ({
"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
272 private void handleAttachment(EmailMessage email, Entity e,
long fileID,
int index) {
273 String outputDirPath;
274 String relModuleOutputPath;
276 outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
277 relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
278 }
catch (NoCurrentCaseException ex) {
279 addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
280 logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex);
283 String filename = e.getFilename();
287 filename = filename.replaceAll(
"\\?",
"_");
288 filename = filename.replaceAll(
"<",
"_");
289 filename = filename.replaceAll(
">",
"_");
290 filename = filename.replaceAll(
":",
"_");
291 filename = filename.replaceAll(
"\"",
"_");
292 filename = filename.replaceAll(
"/",
"_");
293 filename = filename.replaceAll(
"\\\\",
"_");
294 filename = filename.replaceAll(
"|",
"_");
295 filename = filename.replaceAll(
"\\*",
"_");
299 if (filename.length() > 64) {
300 filename = UUID.randomUUID().toString();
303 String uniqueFilename = fileID +
"-" + index +
"-" + email.getSentDate() +
"-" + filename;
304 String outPath = outputDirPath + uniqueFilename;
305 EncodedFileOutputStream fos;
308 fos =
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
309 }
catch (IOException ex) {
311 NbBundle.getMessage(
this.getClass(),
312 "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
313 logger.log(Level.INFO,
"Failed to create file output stream for: " + outPath, ex);
318 Body b = e.getBody();
319 if (b instanceof BinaryBody) {
325 }
catch (IOException ex) {
326 logger.log(Level.INFO,
"Failed to write mbox email attachment to disk.", ex);
327 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.handleAttch.failedWriteToDisk", filename));
332 }
catch (IOException ex) {
333 logger.log(Level.INFO,
"Failed to close file output stream", ex);
337 EmailMessage.Attachment attach =
new EmailMessage.Attachment();
338 attach.setName(filename);
339 attach.setLocalPath(relModuleOutputPath + uniqueFilename);
340 attach.setSize(
new File(outPath).length());
341 attach.setEncodingType(TskData.EncodingType.XOR1);
342 email.addAttachment(attach);
353 private String getAddresses(MailboxList mailboxList) {
354 if (mailboxList == null) {
357 StringBuilder addresses =
new StringBuilder();
358 for (Mailbox m : mailboxList) {
359 addresses.append(m.toString()).append(
"; ");
361 return addresses.toString();
372 private String getAddresses(AddressList addressList) {
373 return (addressList == null) ?
"" : getAddresses(addressList.flatten());
384 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
386 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
388 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
389 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
390 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
391 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
392 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
393 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
396 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
397 }
catch (FileNotFoundException ex) {
398 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
399 return possibleEncoders;
403 CharsetDetector detector =
new CharsetDetector();
404 detector.setText(is);
405 CharsetMatch[] matches = detector.detectAll();
406 for (CharsetMatch match : matches) {
408 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
409 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
413 return possibleEncoders;
414 }
catch (IOException | IllegalArgumentException ex) {
415 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
416 return possibleEncoders;
420 }
catch (IOException ex) {
421 logger.log(Level.INFO,
"Failed to close input stream");
426 private void addErrorMessage(String msg) {
427 errors.append(
"<li>").append(msg).append(
"</li>");