19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.file.Paths;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.logging.Level;
28 import org.apache.poi.hslf.model.Picture;
29 import org.apache.poi.hslf.usermodel.PictureData;
30 import org.apache.poi.hslf.usermodel.SlideShow;
31 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
32 import org.apache.poi.hwpf.HWPFDocument;
33 import org.apache.poi.hwpf.model.PicturesTable;
34 import org.apache.poi.ss.usermodel.Workbook;
35 import org.apache.poi.xslf.usermodel.XMLSlideShow;
36 import org.apache.poi.xslf.usermodel.XSLFPictureData;
37 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
38 import org.apache.poi.xwpf.usermodel.XWPFDocument;
39 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
40 import org.openide.util.NbBundle;
52 class ImageExtractor {
54 private final FileManager fileManager;
55 private final IngestServices services;
56 private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
57 private final IngestJobContext context;
58 private String parentFileName;
59 private final String UNKNOWN_NAME_PREFIX =
"image_";
60 private final FileTypeDetector fileTypeDetector;
62 private String moduleDirRelative;
63 private String moduleDirAbsolute;
68 enum SupportedImageExtractionFormats {
70 DOC(
"application/msword"),
71 DOCX(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
72 PPT(
"application/vnd.ms-powerpoint"),
73 PPTX(
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
74 XLS(
"application/vnd.ms-excel"),
75 XLSX(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
77 private final String mimeType;
79 SupportedImageExtractionFormats(
final String mimeType) {
80 this.mimeType = mimeType;
84 public String toString() {
89 private SupportedImageExtractionFormats abstractFileExtractionFormat;
91 ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
93 this.fileManager = Case.getCurrentCase().getServices().getFileManager();
94 this.services = IngestServices.getInstance();
95 this.context = context;
96 this.fileTypeDetector = fileTypeDetector;
97 this.moduleDirRelative = moduleDirRelative;
98 this.moduleDirAbsolute = moduleDirAbsolute;
110 boolean isImageExtractionSupported(AbstractFile abstractFile) {
112 String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
113 for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
114 if (s.toString().equals(abstractFileMimeType)) {
115 abstractFileExtractionFormat = s;
120 }
catch (TskCoreException ex) {
121 logger.log(Level.WARNING,
"Error executing FileTypeDetector.getFileType()", ex);
135 void extractImage(AbstractFile abstractFile) {
140 List<ExtractedImage> listOfExtractedImages = null;
141 List<AbstractFile> listOfExtractedImageAbstractFiles = null;
142 this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
145 if (abstractFile.hasChildren()) {
147 if (
new File(getOutputFolderPath(parentFileName)).exists()) {
148 logger.log(Level.INFO,
"File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName());
152 }
catch (TskCoreException e) {
153 logger.log(Level.INFO,
"Error checking if file already has been processed, skipping: {0}", parentFileName);
156 switch (abstractFileExtractionFormat) {
158 listOfExtractedImages = extractImagesFromDoc(abstractFile);
161 listOfExtractedImages = extractImagesFromDocx(abstractFile);
164 listOfExtractedImages = extractImagesFromPpt(abstractFile);
167 listOfExtractedImages = extractImagesFromPptx(abstractFile);
170 listOfExtractedImages = extractImagesFromXls(abstractFile);
173 listOfExtractedImages = extractImagesFromXlsx(abstractFile);
179 if (listOfExtractedImages == null) {
183 listOfExtractedImageAbstractFiles =
new ArrayList<>();
184 for (ExtractedImage extractedImage : listOfExtractedImages) {
186 listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
187 extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
188 true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null));
189 }
catch (TskCoreException ex) {
190 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex);
193 if (!listOfExtractedImages.isEmpty()) {
194 services.fireModuleContentEvent(
new ModuleContentEvent(abstractFile));
195 context.addFilesToJob(listOfExtractedImageAbstractFiles);
207 private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
208 List<ExtractedImage> listOfExtractedImages;
209 HWPFDocument doc = null;
211 doc =
new HWPFDocument(
new ReadContentInputStream(af));
212 }
catch (Throwable ignore) {
214 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()));
218 PicturesTable pictureTable = null;
219 List<
org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = null;
221 pictureTable = doc.getPicturesTable();
222 listOfAllPictures = pictureTable.getAllPictures();
223 }
catch (Exception ignore) {
225 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
229 String outputFolderPath;
230 if (listOfAllPictures.isEmpty()) {
233 outputFolderPath = getOutputFolderPath(this.parentFileName);
235 if (outputFolderPath == null) {
238 listOfExtractedImages =
new ArrayList<>();
240 for (
org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
241 String fileName = picture.suggestFullFileName();
243 data = picture.getContent();
244 }
catch (Exception ignore) {
246 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
249 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
251 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
254 return listOfExtractedImages;
265 private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
266 List<ExtractedImage> listOfExtractedImages;
267 XWPFDocument docx = null;
269 docx =
new XWPFDocument(
new ReadContentInputStream(af));
270 }
catch (Throwable ignore) {
272 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()));
275 List<XWPFPictureData> listOfAllPictures = null;
277 listOfAllPictures = docx.getAllPictures();
278 }
catch (Exception ignore) {
280 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
286 String outputFolderPath;
287 if (listOfAllPictures.isEmpty()) {
290 outputFolderPath = getOutputFolderPath(this.parentFileName);
292 if (outputFolderPath == null) {
293 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
296 listOfExtractedImages =
new ArrayList<>();
298 for (XWPFPictureData xwpfPicture : listOfAllPictures) {
299 String fileName = xwpfPicture.getFileName();
301 data = xwpfPicture.getData();
302 }
catch (Exception ignore) {
304 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
307 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
308 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
310 return listOfExtractedImages;
321 private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
322 List<ExtractedImage> listOfExtractedImages;
323 SlideShow ppt = null;
325 ppt =
new SlideShow(
new ReadContentInputStream(af));
326 }
catch (Throwable ignore) {
328 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()));
333 PictureData[] listOfAllPictures = null;
335 listOfAllPictures = ppt.getPictureData();
336 }
catch (Exception ignore) {
338 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
344 String outputFolderPath;
345 if (listOfAllPictures.length == 0) {
348 outputFolderPath = getOutputFolderPath(this.parentFileName);
350 if (outputFolderPath == null) {
351 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
358 listOfExtractedImages =
new ArrayList<>();
360 for (PictureData pictureData : listOfAllPictures) {
364 int type = pictureData.getType();
385 String imageName = UNKNOWN_NAME_PREFIX + i + ext;
387 data = pictureData.getData();
388 }
catch (Exception ignore) {
390 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
393 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
394 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
397 return listOfExtractedImages;
408 private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
409 List<ExtractedImage> listOfExtractedImages;
412 pptx =
new XMLSlideShow(
new ReadContentInputStream(af));
413 }
catch (Throwable ignore) {
415 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()));
418 List<XSLFPictureData> listOfAllPictures = null;
420 listOfAllPictures = pptx.getAllPictures();
421 }
catch (Exception ignore) {
423 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
429 String outputFolderPath;
430 if (listOfAllPictures.isEmpty()) {
433 outputFolderPath = getOutputFolderPath(this.parentFileName);
435 if (outputFolderPath == null) {
436 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
440 listOfExtractedImages =
new ArrayList<>();
442 for (XSLFPictureData xslsPicture : listOfAllPictures) {
446 String fileName = xslsPicture.getFileName();
448 data = xslsPicture.getData();
449 }
catch (Exception ignore) {
451 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
454 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
455 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
459 return listOfExtractedImages;
471 private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
472 List<ExtractedImage> listOfExtractedImages;
476 xls =
new HSSFWorkbook(
new ReadContentInputStream(af));
477 }
catch (Throwable ignore) {
479 logger.log(Level.WARNING,
"{0}{1}",
new Object[]{NbBundle.getMessage(this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()), af.getName()});
483 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
485 listOfAllPictures = xls.getAllPictures();
486 }
catch (Exception ignore) {
488 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
494 String outputFolderPath;
495 if (listOfAllPictures.isEmpty()) {
498 outputFolderPath = getOutputFolderPath(this.parentFileName);
500 if (outputFolderPath == null) {
501 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
506 listOfExtractedImages =
new ArrayList<>();
508 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
509 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
511 data = pictureData.getData();
512 }
catch (Exception ignore) {
514 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
517 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
518 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
521 return listOfExtractedImages;
533 private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
534 List<ExtractedImage> listOfExtractedImages;
537 xlsx =
new XSSFWorkbook(
new ReadContentInputStream(af));
538 }
catch (Throwable ignore) {
540 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()));
544 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
546 listOfAllPictures = xlsx.getAllPictures();
547 }
catch (Exception ignore) {
549 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
555 String outputFolderPath;
556 if (listOfAllPictures.isEmpty()) {
559 outputFolderPath = getOutputFolderPath(this.parentFileName);
561 if (outputFolderPath == null) {
562 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
567 listOfExtractedImages =
new ArrayList<>();
569 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
570 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
572 data = pictureData.getData();
573 }
catch (Exception ignore) {
575 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()));
578 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
579 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
582 return listOfExtractedImages;
593 private void writeExtractedImage(String outputPath, byte[] data) {
594 try (FileOutputStream fos =
new FileOutputStream(outputPath)) {
596 }
catch (IOException ex) {
597 logger.log(Level.WARNING,
"Could not write to the provided location: " + outputPath, ex);
610 private String getOutputFolderPath(String parentFileName) {
611 String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
612 File outputFilePath =
new File(outputFolderPath);
613 if (!outputFilePath.exists()) {
615 outputFilePath.mkdirs();
616 }
catch (SecurityException ex) {
617 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
621 return outputFolderPath;
633 private String getFileRelativePath(String fileName) {
635 return "/" + moduleDirRelative +
"/" + this.parentFileName +
"/" + fileName;
656 ExtractedImage(String fileName, String localPath,
long size, AbstractFile parentFile) {
660 ExtractedImage(String fileName, String localPath,
long size,
long ctime,
long crtime,
long atime,
long mtime, AbstractFile parentFile) {