19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.file.Paths;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.logging.Level;
28 import org.apache.poi.hslf.model.Picture;
29 import org.apache.poi.hslf.usermodel.PictureData;
30 import org.apache.poi.hslf.usermodel.SlideShow;
31 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
32 import org.apache.poi.hwpf.HWPFDocument;
33 import org.apache.poi.hwpf.model.PicturesTable;
34 import org.apache.poi.ss.usermodel.Workbook;
35 import org.apache.poi.xslf.usermodel.XMLSlideShow;
36 import org.apache.poi.xslf.usermodel.XSLFPictureData;
37 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
38 import org.apache.poi.xwpf.usermodel.XWPFDocument;
39 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
40 import org.openide.util.NbBundle;
54 class ImageExtractor {
56 private final FileManager fileManager;
57 private final IngestServices services;
58 private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
59 private final IngestJobContext context;
60 private String parentFileName;
61 private final String UNKNOWN_NAME_PREFIX =
"image_";
62 private final FileTypeDetector fileTypeDetector;
64 private String moduleDirRelative;
65 private String moduleDirAbsolute;
70 enum SupportedImageExtractionFormats {
72 DOC(
"application/msword"),
73 DOCX(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
74 PPT(
"application/vnd.ms-powerpoint"),
75 PPTX(
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
76 XLS(
"application/vnd.ms-excel"),
77 XLSX(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
79 private final String mimeType;
81 SupportedImageExtractionFormats(
final String mimeType) {
82 this.mimeType = mimeType;
86 public String toString() {
91 private SupportedImageExtractionFormats abstractFileExtractionFormat;
93 ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
95 this.fileManager = Case.getCurrentCase().getServices().getFileManager();
96 this.services = IngestServices.getInstance();
97 this.context = context;
98 this.fileTypeDetector = fileTypeDetector;
99 this.moduleDirRelative = moduleDirRelative;
100 this.moduleDirAbsolute = moduleDirAbsolute;
112 boolean isImageExtractionSupported(AbstractFile abstractFile) {
114 String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
115 for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
116 if (s.toString().equals(abstractFileMimeType)) {
117 abstractFileExtractionFormat = s;
122 }
catch (TskCoreException ex) {
123 logger.log(Level.WARNING,
"Error executing FileTypeDetector.getFileType()", ex);
137 void extractImage(AbstractFile abstractFile) {
142 List<ExtractedImage> listOfExtractedImages = null;
143 List<AbstractFile> listOfExtractedImageAbstractFiles = null;
144 this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
147 if (abstractFile.hasChildren()) {
149 if (
new File(getOutputFolderPath(parentFileName)).exists()) {
150 logger.log(Level.INFO,
"File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName());
154 }
catch (TskCoreException e) {
155 logger.log(Level.WARNING, String.format(
"Error checking if file already has been processed, skipping: %s", parentFileName), e);
158 switch (abstractFileExtractionFormat) {
160 listOfExtractedImages = extractImagesFromDoc(abstractFile);
163 listOfExtractedImages = extractImagesFromDocx(abstractFile);
166 listOfExtractedImages = extractImagesFromPpt(abstractFile);
169 listOfExtractedImages = extractImagesFromPptx(abstractFile);
172 listOfExtractedImages = extractImagesFromXls(abstractFile);
175 listOfExtractedImages = extractImagesFromXlsx(abstractFile);
181 if (listOfExtractedImages == null) {
185 listOfExtractedImageAbstractFiles =
new ArrayList<>();
186 for (ExtractedImage extractedImage : listOfExtractedImages) {
188 listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
189 extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
190 true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null, TskData.EncodingType.XOR1));
191 }
catch (TskCoreException ex) {
192 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex);
195 if (!listOfExtractedImages.isEmpty()) {
196 services.fireModuleContentEvent(
new ModuleContentEvent(abstractFile));
197 context.addFilesToJob(listOfExtractedImageAbstractFiles);
209 private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
210 List<ExtractedImage> listOfExtractedImages;
211 HWPFDocument doc = null;
213 doc =
new HWPFDocument(
new ReadContentInputStream(af));
214 }
catch (Throwable ex) {
216 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()), ex);
220 PicturesTable pictureTable = null;
221 List<
org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = null;
223 pictureTable = doc.getPicturesTable();
224 listOfAllPictures = pictureTable.getAllPictures();
225 }
catch (Exception ex) {
227 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
231 String outputFolderPath;
232 if (listOfAllPictures.isEmpty()) {
235 outputFolderPath = getOutputFolderPath(this.parentFileName);
237 if (outputFolderPath == null) {
240 listOfExtractedImages =
new ArrayList<>();
242 for (
org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
243 String fileName = picture.suggestFullFileName();
245 data = picture.getContent();
246 }
catch (Exception ex) {
248 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
251 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
253 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
256 return listOfExtractedImages;
267 private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
268 List<ExtractedImage> listOfExtractedImages;
269 XWPFDocument docx = null;
271 docx =
new XWPFDocument(
new ReadContentInputStream(af));
272 }
catch (Throwable ex) {
274 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex);
277 List<XWPFPictureData> listOfAllPictures = null;
279 listOfAllPictures = docx.getAllPictures();
280 }
catch (Exception ex) {
282 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
288 String outputFolderPath;
289 if (listOfAllPictures.isEmpty()) {
292 outputFolderPath = getOutputFolderPath(this.parentFileName);
294 if (outputFolderPath == null) {
295 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
298 listOfExtractedImages =
new ArrayList<>();
300 for (XWPFPictureData xwpfPicture : listOfAllPictures) {
301 String fileName = xwpfPicture.getFileName();
303 data = xwpfPicture.getData();
304 }
catch (Exception ex) {
306 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
309 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
310 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
312 return listOfExtractedImages;
323 private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
324 List<ExtractedImage> listOfExtractedImages;
325 SlideShow ppt = null;
327 ppt =
new SlideShow(
new ReadContentInputStream(af));
328 }
catch (Throwable ex) {
330 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()), ex);
335 PictureData[] listOfAllPictures = null;
337 listOfAllPictures = ppt.getPictureData();
338 }
catch (Exception ex) {
340 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
346 String outputFolderPath;
347 if (listOfAllPictures.length == 0) {
350 outputFolderPath = getOutputFolderPath(this.parentFileName);
352 if (outputFolderPath == null) {
353 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
360 listOfExtractedImages =
new ArrayList<>();
362 for (PictureData pictureData : listOfAllPictures) {
366 int type = pictureData.getType();
387 String imageName = UNKNOWN_NAME_PREFIX + i + ext;
389 data = pictureData.getData();
390 }
catch (Exception ex) {
392 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
395 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
396 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
399 return listOfExtractedImages;
410 private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
411 List<ExtractedImage> listOfExtractedImages;
414 pptx =
new XMLSlideShow(
new ReadContentInputStream(af));
415 }
catch (Throwable ex) {
417 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex);
420 List<XSLFPictureData> listOfAllPictures = null;
422 listOfAllPictures = pptx.getAllPictures();
423 }
catch (Exception ex) {
425 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
431 String outputFolderPath;
432 if (listOfAllPictures.isEmpty()) {
435 outputFolderPath = getOutputFolderPath(this.parentFileName);
437 if (outputFolderPath == null) {
438 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
442 listOfExtractedImages =
new ArrayList<>();
444 for (XSLFPictureData xslsPicture : listOfAllPictures) {
448 String fileName = xslsPicture.getFileName();
450 data = xslsPicture.getData();
451 }
catch (Exception ex) {
453 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
456 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
457 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
461 return listOfExtractedImages;
473 private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
474 List<ExtractedImage> listOfExtractedImages;
478 xls =
new HSSFWorkbook(
new ReadContentInputStream(af));
479 }
catch (Throwable ex) {
481 logger.log(Level.WARNING, String.format(
"%s%s", NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()), af.getName()), ex);
485 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
487 listOfAllPictures = xls.getAllPictures();
488 }
catch (Exception ex) {
490 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
496 String outputFolderPath;
497 if (listOfAllPictures.isEmpty()) {
500 outputFolderPath = getOutputFolderPath(this.parentFileName);
502 if (outputFolderPath == null) {
503 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
508 listOfExtractedImages =
new ArrayList<>();
510 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
511 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
513 data = pictureData.getData();
514 }
catch (Exception ex) {
516 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
519 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
520 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
523 return listOfExtractedImages;
535 private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
536 List<ExtractedImage> listOfExtractedImages;
539 xlsx =
new XSSFWorkbook(
new ReadContentInputStream(af));
540 }
catch (Throwable ex) {
542 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()), ex);
546 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
548 listOfAllPictures = xlsx.getAllPictures();
549 }
catch (Exception ex) {
551 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
557 String outputFolderPath;
558 if (listOfAllPictures.isEmpty()) {
561 outputFolderPath = getOutputFolderPath(this.parentFileName);
563 if (outputFolderPath == null) {
564 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
569 listOfExtractedImages =
new ArrayList<>();
571 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
572 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
574 data = pictureData.getData();
575 }
catch (Exception ex) {
577 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex);
580 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
581 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
584 return listOfExtractedImages;
595 private void writeExtractedImage(String outputPath, byte[] data) {
596 try (EncodedFileOutputStream fos =
new EncodedFileOutputStream(
new FileOutputStream(outputPath), TskData.EncodingType.XOR1)) {
598 }
catch (IOException ex) {
599 logger.log(Level.WARNING,
"Could not write to the provided location: " + outputPath, ex);
612 private String getOutputFolderPath(String parentFileName) {
613 String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
614 File outputFilePath =
new File(outputFolderPath);
615 if (!outputFilePath.exists()) {
617 outputFilePath.mkdirs();
618 }
catch (SecurityException ex) {
619 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
623 return outputFolderPath;
635 private String getFileRelativePath(String fileName) {
637 return "/" + moduleDirRelative +
"/" + this.parentFileName +
"/" + fileName;
658 ExtractedImage(String fileName, String localPath,
long size, AbstractFile parentFile) {
662 ExtractedImage(String fileName, String localPath,
long size,
long ctime,
long crtime,
long atime,
long mtime, AbstractFile parentFile) {