Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmbeddedFileExtractorIngestModule.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2015-2020 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20 
21 import java.io.File;
22 import java.nio.file.Paths;
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.concurrent.ConcurrentHashMap;
26 import java.util.logging.Level;
27 import javax.annotation.concurrent.GuardedBy;
28 import org.openide.util.NbBundle;
30 import org.sleuthkit.datamodel.AbstractFile;
31 import org.sleuthkit.datamodel.TskData;
35 import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
38 import java.util.logging.Logger;
43 
48 @NbBundle.Messages({
49  "CannotCreateOutputFolder=Unable to create output folder.",
50  "CannotRunFileTypeDetection=Unable to run file type detection.",
51  "UnableToInitializeLibraries=Unable to initialize 7Zip libraries.",
52  "EmbeddedFileExtractorIngestModule.NoOpenCase.errMsg=No open case available.",
53  "EmbeddedFileExtractorIngestModule.UnableToGetMSOfficeExtractor.errMsg=Unable to get MSOfficeEmbeddedContentExtractor."
54 })
56 
57  private static final String TASK_RETRY_STATS_LOG_NAME = "task_retry_stats";
58  private static final Logger taskStatsLogger = ApplicationLoggers.getLogger(TASK_RETRY_STATS_LOG_NAME);
59  private static final Object execMapLock = new Object();
60  @GuardedBy("execMapLock")
61  private static final Map<Long, FileTaskExecutor> fileTaskExecsByJob = new HashMap<>();
62  //Outer concurrent hashmap with keys of JobID, inner concurrentHashmap with keys of objectID
63  private static final ConcurrentHashMap<Long, ConcurrentHashMap<Long, Archive>> mapOfDepthTrees = new ConcurrentHashMap<>();
64  private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
65  private DocumentEmbeddedContentExtractor documentExtractor;
66  private SevenZipExtractor archiveExtractor;
67  private FileTypeDetector fileTypeDetector;
68  private long jobId;
69 
75  }
76 
77  @Override
78  @NbBundle.Messages({
79  "EmbeddedFileExtractor_make_output_dir_err=Failed to create module output directory for Embedded File Extractor"
80  })
81  public void startUp(IngestJobContext context) throws IngestModuleException {
82  jobId = context.getJobId();
83 
84  /*
85  * Construct absolute and relative paths to the output directory. The
86  * output directory is a subdirectory of the ModuleOutput folder in the
87  * case directory and is named for the module.
88  *
89  * The absolute path is used to write the extracted (derived) files to
90  * local storage.
91  *
92  * The relative path is relative to the case folder and is used in the
93  * case database for extracted (derived) file paths.
94  *
95  */
96  Case currentCase = Case.getCurrentCase();
97  String moduleDirAbsolute = Paths.get(currentCase.getModuleDirectory(), EmbeddedFileExtractorModuleFactory.getOutputFolderName()).toString();
98  String moduleDirRelative = Paths.get(currentCase.getModuleOutputDirectoryRelativePath(), EmbeddedFileExtractorModuleFactory.getOutputFolderName()).toString();
99 
100  if (refCounter.incrementAndGet(jobId) == 1) {
101 
102  /*
103  * Construct a per ingest job executor that will be used for calling
104  * java.io.File methods as tasks with retries. Retries are employed
105  * here due to observed issues with hangs when attempting these
106  * operations on case directories stored on a certain type of
107  * network file system. See the FileTaskExecutor class header docs
108  * for more details.
109  */
110  FileTaskExecutor fileTaskExecutor = new FileTaskExecutor(context);
111  synchronized (execMapLock) {
112  fileTaskExecsByJob.put(jobId, fileTaskExecutor);
113  }
114 
115  try {
116  File extractionDirectory = new File(moduleDirAbsolute);
117  if (!fileTaskExecutor.exists(extractionDirectory)) {
118  fileTaskExecutor.mkdirs(extractionDirectory);
119  }
120  } catch (FileTaskExecutor.FileTaskFailedException | InterruptedException ex) {
121  /*
122  * The exception message is localized because ingest module
123  * start up exceptions are displayed to the user when running
124  * with the RCP GUI.
125  */
126  throw new IngestModuleException(Bundle.EmbeddedFileExtractor_make_output_dir_err(), ex);
127  }
128 
129  /*
130  * Construct a hash map to keep track of depth in archives while
131  * processing archive files.
132  *
133  * TODO (Jira-7119): A ConcurrentHashMap of ConcurrentHashMaps is
134  * almost certainly the wrong data structure here. ConcurrentHashMap
135  * is intended to efficiently provide snapshots to multiple threads.
136  * A thread may not see the current state.
137  */
138  mapOfDepthTrees.put(jobId, new ConcurrentHashMap<>());
139  }
140 
141  try {
142  fileTypeDetector = new FileTypeDetector();
144  throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);
145  }
146 
147  try {
148  archiveExtractor = new SevenZipExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute, fileTaskExecsByJob.get(jobId));
149  } catch (SevenZipNativeInitializationException ex) {
150  /*
151  * The exception message is localized because ingest module start up
152  * exceptions are displayed to the user when running with the RCP
153  * GUI.
154  */
155  throw new IngestModuleException(Bundle.UnableToInitializeLibraries(), ex);
156  }
157 
158  try {
159  documentExtractor = new DocumentEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute, fileTaskExecsByJob.get(jobId));
160  } catch (NoCurrentCaseException ex) {
161  /*
162  * The exception message is localized because ingest module start up
163  * exceptions are displayed to the user when running with the RCP
164  * GUI.
165  */
166  throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_UnableToGetMSOfficeExtractor_errMsg(), ex);
167  }
168  }
169 
170  @Override
171  public ProcessResult process(AbstractFile abstractFile) {
172  /*
173  * Skip unallocated space files.
174  */
175  if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS))
176  || (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
177  return ProcessResult.OK;
178  }
179 
180  /*
181  * Skip known files.
182  */
183  if (abstractFile.getKnown().equals(TskData.FileKnown.KNOWN)) {
184  return ProcessResult.OK;
185  }
186 
187  /*
188  * Skip directories, etc.
189  */
190  if (!abstractFile.isFile()) {
191  return ProcessResult.OK;
192  }
193 
194  /*
195  * Attempt embedded file extraction for the file if it is a supported
196  * type/format.
197  */
198  if (archiveExtractor.isSevenZipExtractionSupported(abstractFile)) {
199  archiveExtractor.unpack(abstractFile, mapOfDepthTrees.get(jobId));
200  } else if (documentExtractor.isContentExtractionSupported(abstractFile)) {
201  documentExtractor.extractEmbeddedContent(abstractFile);
202  }
203  return ProcessResult.OK;
204  }
205 
206  @Override
207  public void shutDown() {
208  if (refCounter.decrementAndGet(jobId) == 0) {
209  mapOfDepthTrees.remove(jobId);
210  FileTaskExecutor fileTaskExecutor;
211  synchronized (execMapLock) {
212  fileTaskExecutor = fileTaskExecsByJob.remove(jobId);
213  }
214  fileTaskExecutor.shutDown();
215  taskStatsLogger.log(Level.INFO, String.format("total tasks: %d, total task timeouts: %d, total task retries: %d, total task failures: %d (ingest job ID = %d)", TaskRetryUtil.getTotalTasksCount(), TaskRetryUtil.getTotalTaskAttemptTimeOutsCount(), TaskRetryUtil.getTotalTaskRetriesCount(), TaskRetryUtil.getTotalFailedTasksCount(), jobId));
216  }
217  }
218 
227  static String getUniqueName(AbstractFile file) {
228  return Long.toString(file.getId());
229  }
230 
231 }
synchronized static Logger getLogger(String logName)

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.