Autopsy  4.16.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmbeddedFileExtractorIngestModule.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2015-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20 
21 import java.io.File;
22 import java.nio.file.Paths;
23 import java.util.concurrent.ConcurrentHashMap;
24 import org.openide.util.NbBundle;
26 import org.sleuthkit.datamodel.AbstractFile;
27 import org.sleuthkit.datamodel.TskData;
31 import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
36 
41 @NbBundle.Messages({
42  "CannotCreateOutputFolder=Unable to create output folder.",
43  "CannotRunFileTypeDetection=Unable to run file type detection.",
44  "UnableToInitializeLibraries=Unable to initialize 7Zip libraries.",
45  "EmbeddedFileExtractorIngestModule.NoOpenCase.errMsg=No open case available.",
46  "EmbeddedFileExtractorIngestModule.UnableToGetMSOfficeExtractor.errMsg=Unable to get MSOfficeEmbeddedContentExtractor."
47 })
49 
50  //Outer concurrent hashmap with keys of JobID, inner concurrentHashmap with keys of objectID
51  private static final ConcurrentHashMap<Long, ConcurrentHashMap<Long, Archive>> mapOfDepthTrees = new ConcurrentHashMap<>();
52  private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
53  private DocumentEmbeddedContentExtractor documentExtractor;
54  private SevenZipExtractor archiveExtractor;
56  private long jobId;
57 
63  }
64 
65  @Override
66  public void startUp(IngestJobContext context) throws IngestModuleException {
67  /*
68  * Construct absolute and relative paths to the output directory. The
69  * relative path is relative to the case folder, and will be used in the
70  * case database for extracted (derived) file paths. The absolute path
71  * is used to write the extracted (derived) files to local storage.
72  */
73  jobId = context.getJobId();
74  String moduleDirRelative = null;
75  String moduleDirAbsolute = null;
76 
77  try {
78  final Case currentCase = Case.getCurrentCaseThrows();
79  moduleDirRelative = Paths.get(currentCase.getModuleOutputDirectoryRelativePath(), EmbeddedFileExtractorModuleFactory.getModuleName()).toString();
80  moduleDirAbsolute = Paths.get(currentCase.getModuleDirectory(), EmbeddedFileExtractorModuleFactory.getModuleName()).toString();
81  } catch (NoCurrentCaseException ex) {
82  throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_NoOpenCase_errMsg(), ex);
83  }
84  /*
85  * Create the output directory.
86  */
87  File extractionDirectory = new File(moduleDirAbsolute);
88  if (!extractionDirectory.exists()) {
89  try {
90  extractionDirectory.mkdirs();
91  } catch (SecurityException ex) {
92  throw new IngestModuleException(Bundle.CannotCreateOutputFolder(), ex);
93  }
94  }
95 
96  /*
97  * Construct a file type detector.
98  */
99  try {
100  fileTypeDetector = new FileTypeDetector();
102  throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);
103  }
104  try {
105  this.archiveExtractor = new SevenZipExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute);
106  } catch (SevenZipNativeInitializationException ex) {
107  throw new IngestModuleException(Bundle.UnableToInitializeLibraries(), ex);
108  }
109  if (refCounter.incrementAndGet(jobId) == 1) {
110  /*
111  * Construct a concurrentHashmap to keep track of depth in archives
112  * while processing archive files.
113  */
114  mapOfDepthTrees.put(jobId, new ConcurrentHashMap<>());
115  }
116  /*
117  * Construct an embedded content extractor for processing Microsoft
118  * Office documents and PDF documents.
119  */
120  try {
121  this.documentExtractor = new DocumentEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute);
122  } catch (NoCurrentCaseException ex) {
123  throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_UnableToGetMSOfficeExtractor_errMsg(), ex);
124  }
125 
126  }
127 
128  @Override
129  public ProcessResult process(AbstractFile abstractFile) {
130  /*
131  * Skip unallocated space files.
132  */
133  if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS))
134  || (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
135  return ProcessResult.OK;
136  }
137 
138  /*
139  * Skip known files.
140  */
141  if (abstractFile.getKnown().equals(TskData.FileKnown.KNOWN)) {
142  return ProcessResult.OK;
143  }
144 
145  /*
146  * Skip directories, etc.
147  */
148  if (!abstractFile.isFile()) {
149  return ProcessResult.OK;
150  }
151 
152  /*
153  * Attempt embedded file extraction for the file if it is a supported
154  * type/format.
155  */
156  if (archiveExtractor.isSevenZipExtractionSupported(abstractFile)) {
157  archiveExtractor.unpack(abstractFile, mapOfDepthTrees.get(jobId));
158  } else if (documentExtractor.isContentExtractionSupported(abstractFile)) {
159  documentExtractor.extractEmbeddedContent(abstractFile);
160  }
161  return ProcessResult.OK;
162  }
163 
164  @Override
165  public void shutDown() {
166  if (refCounter.decrementAndGet(jobId) == 0) {
167  mapOfDepthTrees.remove(jobId);
168  }
169  }
170 
179  static String getUniqueName(AbstractFile file) {
180  return file.getName() + "_" + file.getId();
181  }
182 
183 }

Copyright © 2012-2020 Basis Technology. Generated on: Tue Sep 22 2020
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.