19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
 
   22 import java.io.FileOutputStream;
 
   23 import java.io.IOException;
 
   24 import java.nio.file.Paths;
 
   25 import java.util.ArrayList;
 
   26 import java.util.List;
 
   27 import java.util.logging.Level;
 
   28 import org.apache.poi.hslf.model.Picture;
 
   29 import org.apache.poi.hslf.usermodel.PictureData;
 
   30 import org.apache.poi.hslf.usermodel.SlideShow;
 
   31 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 
   32 import org.apache.poi.hwpf.HWPFDocument;
 
   33 import org.apache.poi.hwpf.model.PicturesTable;
 
   34 import org.apache.poi.ss.usermodel.Workbook;
 
   35 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 
   36 import org.apache.poi.xslf.usermodel.XSLFPictureData;
 
   37 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
   38 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 
   39 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
 
   40 import org.openide.util.NbBundle;
 
   54 class ImageExtractor {
 
   56     private final FileManager fileManager;
 
   57     private final IngestServices services;
 
   58     private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
 
   59     private final IngestJobContext context;
 
   60     private String parentFileName;
 
   61     private final String UNKNOWN_NAME_PREFIX = 
"image_"; 
 
   62     private final FileTypeDetector fileTypeDetector;
 
   64     private String moduleDirRelative;
 
   65     private String moduleDirAbsolute;
 
   70     enum SupportedImageExtractionFormats {
 
   72         DOC(
"application/msword"), 
 
   73         DOCX(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"), 
 
   74         PPT(
"application/vnd.ms-powerpoint"), 
 
   75         PPTX(
"application/vnd.openxmlformats-officedocument.presentationml.presentation"), 
 
   76         XLS(
"application/vnd.ms-excel"), 
 
   77         XLSX(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); 
 
   79         private final String mimeType;
 
   81         SupportedImageExtractionFormats(
final String mimeType) {
 
   82             this.mimeType = mimeType;
 
   86         public String toString() {
 
   91     private SupportedImageExtractionFormats abstractFileExtractionFormat;
 
   93     ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
 
   95         this.fileManager = Case.getCurrentCase().getServices().getFileManager();
 
   96         this.services = IngestServices.getInstance();
 
   97         this.context = context;
 
   98         this.fileTypeDetector = fileTypeDetector;
 
   99         this.moduleDirRelative = moduleDirRelative;
 
  100         this.moduleDirAbsolute = moduleDirAbsolute;
 
  112     boolean isImageExtractionSupported(AbstractFile abstractFile) {
 
  114             String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
 
  115             for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
 
  116                 if (s.toString().equals(abstractFileMimeType)) {
 
  117                     abstractFileExtractionFormat = s;
 
  122         } 
catch (TskCoreException ex) {
 
  123             logger.log(Level.WARNING, 
"Error executing FileTypeDetector.getFileType()", ex); 
 
  137     void extractImage(AbstractFile abstractFile) {
 
  142         List<ExtractedImage> listOfExtractedImages = null;
 
  143         List<AbstractFile> listOfExtractedImageAbstractFiles = null;
 
  144         this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
 
  147             if (abstractFile.hasChildren()) {
 
  149                 if (
new File(getOutputFolderPath(parentFileName)).exists()) {
 
  150                     logger.log(Level.INFO, 
"File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName()); 
 
  154         } 
catch (TskCoreException e) {
 
  155             logger.log(Level.WARNING, String.format(
"Error checking if file already has been processed, skipping: %s", parentFileName), e); 
 
  158         switch (abstractFileExtractionFormat) {
 
  160                 listOfExtractedImages = extractImagesFromDoc(abstractFile);
 
  163                 listOfExtractedImages = extractImagesFromDocx(abstractFile);
 
  166                 listOfExtractedImages = extractImagesFromPpt(abstractFile);
 
  169                 listOfExtractedImages = extractImagesFromPptx(abstractFile);
 
  172                 listOfExtractedImages = extractImagesFromXls(abstractFile);
 
  175                 listOfExtractedImages = extractImagesFromXlsx(abstractFile);
 
  181         if (listOfExtractedImages == null) {
 
  185         listOfExtractedImageAbstractFiles = 
new ArrayList<>();
 
  186         for (ExtractedImage extractedImage : listOfExtractedImages) {
 
  188                 listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
 
  189                         extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
 
  190                         true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null, TskData.EncodingType.XOR1));
 
  191             } 
catch (TskCoreException ex) {
 
  192                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex); 
 
  195         if (!listOfExtractedImages.isEmpty()) {
 
  196             services.fireModuleContentEvent(
new ModuleContentEvent(abstractFile));
 
  197             context.addFilesToJob(listOfExtractedImageAbstractFiles);
 
  209     private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
 
  210         List<ExtractedImage> listOfExtractedImages;
 
  211         HWPFDocument doc = null;
 
  213             doc = 
new HWPFDocument(
new ReadContentInputStream(af));
 
  214         } 
catch (Throwable ex) {
 
  216             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()), ex); 
 
  220         PicturesTable pictureTable = null;
 
  221         List<
org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = null;
 
  223             pictureTable = doc.getPicturesTable();
 
  224             listOfAllPictures = pictureTable.getAllPictures();
 
  225         } 
catch (Exception ex) {
 
  227             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  231         String outputFolderPath;
 
  232         if (listOfAllPictures.isEmpty()) {
 
  235             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  237         if (outputFolderPath == null) {
 
  240         listOfExtractedImages = 
new ArrayList<>();
 
  242         for (
org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
 
  243             String fileName = picture.suggestFullFileName();
 
  245                 data = picture.getContent();
 
  246             } 
catch (Exception ex) {
 
  248                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  251             writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
 
  253             listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
 
  256         return listOfExtractedImages;
 
  267     private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
 
  268         List<ExtractedImage> listOfExtractedImages;
 
  269         XWPFDocument docx = null;
 
  271             docx = 
new XWPFDocument(
new ReadContentInputStream(af));
 
  272         } 
catch (Throwable ex) {
 
  274             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); 
 
  277         List<XWPFPictureData> listOfAllPictures = null;
 
  279             listOfAllPictures = docx.getAllPictures();
 
  280         } 
catch (Exception ex) {
 
  282             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  288         String outputFolderPath;
 
  289         if (listOfAllPictures.isEmpty()) {
 
  292             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  294         if (outputFolderPath == null) {
 
  295             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); 
 
  298         listOfExtractedImages = 
new ArrayList<>();
 
  300         for (XWPFPictureData xwpfPicture : listOfAllPictures) {
 
  301             String fileName = xwpfPicture.getFileName();
 
  303                 data = xwpfPicture.getData();
 
  304             } 
catch (Exception ex) {
 
  306                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  309             writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
 
  310             listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
 
  312         return listOfExtractedImages;
 
  323     private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
 
  324         List<ExtractedImage> listOfExtractedImages;
 
  325         SlideShow ppt = null;
 
  327             ppt = 
new SlideShow(
new ReadContentInputStream(af));
 
  328         } 
catch (Throwable ex) {
 
  330             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()), ex); 
 
  335         PictureData[] listOfAllPictures = null;
 
  337             listOfAllPictures = ppt.getPictureData();
 
  338         } 
catch (Exception ex) {
 
  340             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  346         String outputFolderPath;
 
  347         if (listOfAllPictures.length == 0) {
 
  350             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  352         if (outputFolderPath == null) {
 
  353             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); 
 
  360         listOfExtractedImages = 
new ArrayList<>();
 
  362         for (PictureData pictureData : listOfAllPictures) {
 
  366             int type = pictureData.getType();
 
  387             String imageName = UNKNOWN_NAME_PREFIX + i + ext; 
 
  389                 data = pictureData.getData();
 
  390             } 
catch (Exception ex) {
 
  392                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  395             writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
 
  396             listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
 
  399         return listOfExtractedImages;
 
  410     private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
 
  411         List<ExtractedImage> listOfExtractedImages;
 
  414             pptx = 
new XMLSlideShow(
new ReadContentInputStream(af));
 
  415         } 
catch (Throwable ex) {
 
  417             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex); 
 
  420         List<XSLFPictureData> listOfAllPictures = null;
 
  422             listOfAllPictures = pptx.getAllPictures();
 
  423         } 
catch (Exception ex) {
 
  425             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  431         String outputFolderPath;
 
  432         if (listOfAllPictures.isEmpty()) {
 
  435             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  437         if (outputFolderPath == null) {
 
  438             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); 
 
  442         listOfExtractedImages = 
new ArrayList<>();
 
  444         for (XSLFPictureData xslsPicture : listOfAllPictures) {
 
  448             String fileName = xslsPicture.getFileName();
 
  450                 data = xslsPicture.getData();
 
  451             } 
catch (Exception ex) {
 
  453                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  456             writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
 
  457             listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
 
  461         return listOfExtractedImages;
 
  473     private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
 
  474         List<ExtractedImage> listOfExtractedImages;
 
  478             xls = 
new HSSFWorkbook(
new ReadContentInputStream(af));
 
  479         } 
catch (Throwable ex) {
 
  481             logger.log(Level.WARNING, String.format(
"%s%s", NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()), af.getName()), ex); 
 
  485         List<? extends 
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
 
  487             listOfAllPictures = xls.getAllPictures();
 
  488         } 
catch (Exception ex) {
 
  490             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  496         String outputFolderPath;
 
  497         if (listOfAllPictures.isEmpty()) {
 
  500             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  502         if (outputFolderPath == null) {
 
  503             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); 
 
  508         listOfExtractedImages = 
new ArrayList<>();
 
  510         for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
 
  511             String imageName = UNKNOWN_NAME_PREFIX + i + 
"." + pictureData.suggestFileExtension(); 
 
  513                 data = pictureData.getData();
 
  514             } 
catch (Exception ex) {
 
  516                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  519             writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
 
  520             listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
 
  523         return listOfExtractedImages;
 
  535     private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
 
  536         List<ExtractedImage> listOfExtractedImages;
 
  539             xlsx = 
new XSSFWorkbook(
new ReadContentInputStream(af));
 
  540         } 
catch (Throwable ex) {
 
  542             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()), ex); 
 
  546         List<? extends 
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
 
  548             listOfAllPictures = xlsx.getAllPictures();
 
  549         } 
catch (Exception ex) {
 
  551             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  557         String outputFolderPath;
 
  558         if (listOfAllPictures.isEmpty()) {
 
  561             outputFolderPath = getOutputFolderPath(this.parentFileName);
 
  563         if (outputFolderPath == null) {
 
  564             logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName())); 
 
  569         listOfExtractedImages = 
new ArrayList<>();
 
  571         for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
 
  572             String imageName = UNKNOWN_NAME_PREFIX + i + 
"." + pictureData.suggestFileExtension();
 
  574                 data = pictureData.getData();
 
  575             } 
catch (Exception ex) {
 
  577                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); 
 
  580             writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
 
  581             listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
 
  584         return listOfExtractedImages;
 
  595     private void writeExtractedImage(String outputPath, byte[] data) {
 
  596         try (EncodedFileOutputStream fos = 
new EncodedFileOutputStream(
new FileOutputStream(outputPath), TskData.EncodingType.XOR1)) {
 
  598         } 
catch (IOException ex) {
 
  599             logger.log(Level.WARNING, 
"Could not write to the provided location: " + outputPath, ex); 
 
  612     private String getOutputFolderPath(String parentFileName) {
 
  613         String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
 
  614         File outputFilePath = 
new File(outputFolderPath);
 
  615         if (!outputFilePath.exists()) {
 
  617                 outputFilePath.mkdirs();
 
  618             } 
catch (SecurityException ex) {
 
  619                 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(), 
"EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
 
  623         return outputFolderPath;
 
  635     private String getFileRelativePath(String fileName) {
 
  637         return "/" + moduleDirRelative + 
"/" + this.parentFileName + 
"/" + fileName; 
 
  658         ExtractedImage(String fileName, String localPath, 
long size, AbstractFile parentFile) {
 
  662         ExtractedImage(String fileName, String localPath, 
long size, 
long ctime, 
long crtime, 
long atime, 
long mtime, AbstractFile parentFile) {