19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.file.Paths;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.logging.Level;
28 import org.apache.poi.OldFileFormatException;
29 import org.apache.poi.hslf.model.Picture;
30 import org.apache.poi.hslf.usermodel.PictureData;
31 import org.apache.poi.hslf.usermodel.SlideShow;
32 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
33 import org.apache.poi.hwpf.HWPFDocument;
34 import org.apache.poi.hwpf.model.PicturesTable;
35 import org.apache.poi.ss.usermodel.Workbook;
36 import org.apache.poi.xslf.usermodel.XMLSlideShow;
37 import org.apache.poi.xslf.usermodel.XSLFPictureData;
38 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
39 import org.apache.poi.xwpf.usermodel.XWPFDocument;
40 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
41 import org.openide.util.NbBundle;
53 class ImageExtractor {
55 private final FileManager fileManager;
56 private final IngestServices services;
57 private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
58 private final IngestJobContext context;
59 private String parentFileName;
60 private final String UNKNOWN_NAME_PREFIX =
"image_";
61 private final FileTypeDetector fileTypeDetector;
63 private String moduleDirRelative;
64 private String moduleDirAbsolute;
68 enum SupportedImageExtractionFormats {
70 DOC(
"application/msword"),
71 DOCX(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
72 PPT(
"application/vnd.ms-powerpoint"),
73 PPTX(
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
74 XLS(
"application/vnd.ms-excel"),
75 XLSX(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
77 private final String mimeType;
79 SupportedImageExtractionFormats(
final String mimeType) {
80 this.mimeType = mimeType;
84 public String toString() {
89 private SupportedImageExtractionFormats abstractFileExtractionFormat;
91 ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
93 this.fileManager = Case.getCurrentCase().getServices().getFileManager();
94 this.services = IngestServices.getInstance();
95 this.context = context;
96 this.fileTypeDetector = fileTypeDetector;
97 this.moduleDirRelative = moduleDirRelative;
98 this.moduleDirAbsolute = moduleDirAbsolute;
109 boolean isImageExtractionSupported(AbstractFile abstractFile) {
111 String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
112 for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
113 if (s.toString().equals(abstractFileMimeType)) {
114 abstractFileExtractionFormat = s;
119 }
catch (TskCoreException ex) {
120 logger.log(Level.WARNING,
"Error executing FileTypeDetector.getFileType()", ex);
134 void extractImage(AbstractFile abstractFile) {
139 List<ExtractedImage> listOfExtractedImages = null;
140 List<AbstractFile> listOfExtractedImageAbstractFiles = null;
141 this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
144 if (abstractFile.hasChildren()) {
146 if (
new File(getOutputFolderPath(parentFileName)).exists()) {
147 logger.log(Level.INFO,
"File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName());
151 }
catch (TskCoreException e) {
152 logger.log(Level.INFO,
"Error checking if file already has been processed, skipping: {0}", parentFileName);
155 switch (abstractFileExtractionFormat) {
157 listOfExtractedImages = extractImagesFromDoc(abstractFile);
160 listOfExtractedImages = extractImagesFromDocx(abstractFile);
163 listOfExtractedImages = extractImagesFromPpt(abstractFile);
166 listOfExtractedImages = extractImagesFromPptx(abstractFile);
169 listOfExtractedImages = extractImagesFromXls(abstractFile);
172 listOfExtractedImages = extractImagesFromXlsx(abstractFile);
178 if (listOfExtractedImages == null) {
182 listOfExtractedImageAbstractFiles =
new ArrayList<>();
183 for (ExtractedImage extractedImage : listOfExtractedImages) {
185 listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
186 extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
187 true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null));
188 }
catch (TskCoreException ex) {
189 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex);
192 if (!listOfExtractedImages.isEmpty()) {
193 services.fireModuleContentEvent(
new ModuleContentEvent(abstractFile));
194 context.addFilesToJob(listOfExtractedImageAbstractFiles);
205 private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
206 List<ExtractedImage> listOfExtractedImages;
207 HWPFDocument doc = null;
209 doc =
new HWPFDocument(
new ReadContentInputStream(af));
210 }
catch (Throwable ex) {
212 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()));
215 PicturesTable pictureTable = doc.getPicturesTable();
216 List<
org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = pictureTable.getAllPictures();
217 String outputFolderPath;
218 if (listOfAllPictures.isEmpty()) {
221 outputFolderPath = getOutputFolderPath(this.parentFileName);
223 if (outputFolderPath == null) {
226 listOfExtractedImages =
new ArrayList<>();
227 for (
org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
228 String fileName = picture.suggestFullFileName();
229 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), picture.getContent());
231 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
234 return listOfExtractedImages;
244 private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
245 List<ExtractedImage> listOfExtractedImages;
246 XWPFDocument docx = null;
248 docx =
new XWPFDocument(
new ReadContentInputStream(af));
249 }
catch (Throwable ex) {
251 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()));
254 List<XWPFPictureData> listOfAllPictures = docx.getAllPictures();
258 String outputFolderPath;
259 if (listOfAllPictures.isEmpty()) {
262 outputFolderPath = getOutputFolderPath(this.parentFileName);
264 if (outputFolderPath == null) {
265 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
268 listOfExtractedImages =
new ArrayList<>();
269 for (XWPFPictureData xwpfPicture : listOfAllPictures) {
270 String fileName = xwpfPicture.getFileName();
271 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xwpfPicture.getData());
272 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
274 return listOfExtractedImages;
284 private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
285 List<ExtractedImage> listOfExtractedImages;
286 SlideShow ppt = null;
288 ppt =
new SlideShow(
new ReadContentInputStream(af));
289 }
catch (Throwable ex) {
291 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()));
296 PictureData[] listOfAllPictures = ppt.getPictureData();
300 String outputFolderPath;
301 if (listOfAllPictures.length == 0) {
304 outputFolderPath = getOutputFolderPath(this.parentFileName);
306 if (outputFolderPath == null) {
307 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
314 listOfExtractedImages =
new ArrayList<>();
315 for (PictureData pictureData : listOfAllPictures) {
319 int type = pictureData.getType();
340 String imageName = UNKNOWN_NAME_PREFIX + i + ext;
341 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
342 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
345 return listOfExtractedImages;
355 private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
356 List<ExtractedImage> listOfExtractedImages;
359 pptx =
new XMLSlideShow(
new ReadContentInputStream(af));
360 }
catch (Throwable ex) {
362 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()));
365 List<XSLFPictureData> listOfAllPictures = pptx.getAllPictures();
369 String outputFolderPath;
370 if (listOfAllPictures.isEmpty()) {
373 outputFolderPath = getOutputFolderPath(this.parentFileName);
375 if (outputFolderPath == null) {
376 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
380 listOfExtractedImages =
new ArrayList<>();
381 for (XSLFPictureData xslsPicture : listOfAllPictures) {
385 String fileName = xslsPicture.getFileName();
386 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xslsPicture.getData());
387 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
391 return listOfExtractedImages;
402 private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
403 List<ExtractedImage> listOfExtractedImages;
407 xls =
new HSSFWorkbook(
new ReadContentInputStream(af));
408 }
catch (Throwable ex) {
410 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()) + af.getName());
414 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xls.getAllPictures();
417 String outputFolderPath;
418 if (listOfAllPictures.isEmpty()) {
421 outputFolderPath = getOutputFolderPath(this.parentFileName);
423 if (outputFolderPath == null) {
424 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
429 listOfExtractedImages =
new ArrayList<>();
430 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
431 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
432 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
433 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
436 return listOfExtractedImages;
447 private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
448 List<ExtractedImage> listOfExtractedImages;
451 xlsx =
new XSSFWorkbook(
new ReadContentInputStream(af));
452 }
catch (Throwable ex) {
454 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()));
458 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xlsx.getAllPictures();
461 String outputFolderPath;
462 if (listOfAllPictures.isEmpty()) {
465 outputFolderPath = getOutputFolderPath(this.parentFileName);
467 if (outputFolderPath == null) {
468 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
473 listOfExtractedImages =
new ArrayList<>();
474 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
475 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
476 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
477 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
480 return listOfExtractedImages;
491 private void writeExtractedImage(String outputPath, byte[] data) {
492 try (FileOutputStream fos =
new FileOutputStream(outputPath)) {
494 }
catch (IOException ex) {
495 logger.log(Level.WARNING,
"Could not write to the provided location: " + outputPath, ex);
507 private String getOutputFolderPath(String parentFileName) {
508 String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
509 File outputFilePath =
new File(outputFolderPath);
510 if (!outputFilePath.exists()) {
512 outputFilePath.mkdirs();
513 }
catch (SecurityException ex) {
514 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
518 return outputFolderPath;
529 private String getFileRelativePath(String fileName) {
531 return "/" + moduleDirRelative +
"/" + this.parentFileName +
"/" + fileName;
556 ExtractedImage(String fileName, String localPath,
long size,
long ctime,
long crtime,
long atime,
long mtime,
AbstractFile parentFile) {