Autopsy  4.10.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.SortedSet;
25 import java.util.TreeSet;
26 import java.util.logging.Level;
27 import java.util.stream.Collectors;
28 import org.apache.tika.Tika;
29 import org.apache.tika.io.TikaInputStream;
30 import org.apache.tika.mime.MimeTypes;
32 import org.sleuthkit.datamodel.AbstractFile;
33 import org.sleuthkit.datamodel.ReadContentInputStream;
34 import org.sleuthkit.datamodel.TskCoreException;
35 import org.sleuthkit.datamodel.TskData;
36 
44 public class FileTypeDetector {
45 
46  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
47  private static final Tika tika = new Tika();
48  private static final int SLACK_FILE_THRESHOLD = 4096;
49  private final List<FileType> userDefinedFileTypes;
50  private final List<FileType> autopsyDefinedFileTypes;
51  private static SortedSet<String> tikaDetectedTypes;
52 
63  public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
64  TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
65  int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
66  if (result == 0) {
67  result = string1.compareTo(string2);
68  }
69  return result;
70  });
71  detectedTypes.addAll(FileTypeDetector.getTikaDetectedTypes());
72  try {
73  for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
74  detectedTypes.add(fileType.getMimeType());
75  }
76  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
77  throw new FileTypeDetectorInitException("Error loading Autopsy custom file types", ex);
78  }
79  try {
80  for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
81  detectedTypes.add(fileType.getMimeType());
82  }
83  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
84  throw new FileTypeDetectorInitException("Error loading user custom file types", ex);
85  }
86  return detectedTypes;
87  }
88 
96  private static SortedSet<String> getTikaDetectedTypes() {
97  if (null == tikaDetectedTypes) {
98  tikaDetectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
99  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString().replace("tika-", "")).collect(Collectors.toCollection(TreeSet::new));
100  }
101  return Collections.unmodifiableSortedSet(tikaDetectedTypes);
102  }
103 
115  try {
116  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
117  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
118  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
119  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
120  }
121  }
122 
131  public boolean isDetectable(String mimeType) {
132  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
133  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
134  || isDetectableByTika(mimeType);
135  }
136 
146  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
147  for (FileType fileType : customTypes) {
148  if (fileType.getMimeType().equals(mimeType)) {
149  return true;
150  }
151  }
152  return false;
153  }
154 
162  private boolean isDetectableByTika(String mimeType) {
164  }
165 
177  public String getMIMEType(AbstractFile file) {
178  /*
179  * Check to see if the file has already been typed.
180  */
181  String mimeType = file.getMIMEType();
182  if (null != mimeType) {
183  // We remove the optional parameter to allow this method to work
184  // with legacy databases that may contain MIME types with the
185  // optional parameter attached.
186  return removeOptionalParameter(mimeType);
187  }
188 
189  /*
190  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
191  * zero-sized files, unallocated space, and unused blocks (refer to
192  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
193  */
194  if (!file.isFile() || file.getSize() <= 0
195  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
196  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
197  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
198  || ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) && file.getSize() < SLACK_FILE_THRESHOLD)) {
199  mimeType = MimeTypes.OCTET_STREAM;
200  }
201 
202  /*
203  * If the file is a regular file, give precedence to user-defined custom
204  * file types.
205  */
206  if (null == mimeType) {
207  mimeType = detectUserDefinedType(file);
208  }
209 
210  /*
211  * If the file does not match a user-defined type, give precedence to
212  * custom file types defined by Autopsy.
213  */
214  if (null == mimeType) {
215  mimeType = detectAutopsyDefinedType(file);
216  }
217 
218  /*
219  * If the file does not match a user-defined type, send the initial
220  * bytes to Tika.
221  */
222  if (null == mimeType) {
223  ReadContentInputStream stream = new ReadContentInputStream(file);
224 
225  try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
226  String tikaType = tika.detect(tikaInputStream, file.getName());
227 
228  /*
229  * Remove the Tika suffix from the MIME type name.
230  */
231  mimeType = tikaType.replace("tika-", ""); //NON-NLS
232  /*
233  * Remove the optional parameter from the MIME type.
234  */
235  mimeType = removeOptionalParameter(mimeType);
236 
242  if (mimeType.contains("audio/mpeg")) {
243  try {
244  byte[] header = getNBytes(file, 0, 2);
245  if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) {
246  mimeType = MimeTypes.OCTET_STREAM;
247  }
248  } catch (TskCoreException ex) {
249  //Oh well, the mimetype is what it is.
250  logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex);
251  }
252  }
253  } catch (Exception ignored) {
254  /*
255  * This exception is swallowed and not logged rather than
256  * propagated because files in data sources are not always
257  * consistent with their file system metadata, making for read
258  * errors. Also, Tika can be a bit flaky at times, making this a
259  * best effort endeavor. Default to octet-stream.
260  */
261  mimeType = MimeTypes.OCTET_STREAM;
262  }
263  }
264 
265  /*
266  * Documented side effect: write the result to the AbstractFile object.
267  */
268  file.setMIMEType(mimeType);
269 
270  return mimeType;
271  }
272 
280  private boolean byteIs0xFF(byte x) {
281  return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0;
282  }
283 
294  private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException {
295  byte[] headerCache = new byte[n];
296  file.read(headerCache, offset, n);
297  return headerCache;
298  }
299 
307  private String removeOptionalParameter(String mimeType) {
308  int indexOfSemicolon = mimeType.indexOf(';');
309  if (indexOfSemicolon != -1) {
310  return mimeType.substring(0, indexOfSemicolon).trim();
311  } else {
312  return mimeType;
313  }
314  }
315 
323  private String detectUserDefinedType(AbstractFile file) {
324  String retValue = null;
325 
326  for (FileType fileType : userDefinedFileTypes) {
327  if (fileType.matches(file)) {
328  retValue = fileType.getMimeType();
329  break;
330  }
331  }
332  return retValue;
333  }
334 
343  private String detectAutopsyDefinedType(AbstractFile file) {
344  for (FileType fileType : autopsyDefinedFileTypes) {
345  if (fileType.matches(file)) {
346  return fileType.getMimeType();
347  }
348  }
349  return null;
350  }
351 
352  /*
353  * Exception thrown if an initialization error occurs, e.g., user-defined
354  * file type definitions exist but cannot be loaded.
355  */
356  public static class FileTypeDetectorInitException extends Exception {
357 
358  private static final long serialVersionUID = 1L;
359 
366  FileTypeDetectorInitException(String message) {
367  super(message);
368  }
369 
377  FileTypeDetectorInitException(String message, Throwable throwable) {
378  super(message, throwable);
379  }
380 
381  }
382 
391  @Deprecated
392  public List<String> getUserDefinedTypes() {
393  List<String> customFileTypes = new ArrayList<>();
394  userDefinedFileTypes.forEach((fileType) -> {
395  customFileTypes.add(fileType.getMimeType());
396  });
397  autopsyDefinedFileTypes.forEach((fileType) -> {
398  customFileTypes.add(fileType.getMimeType());
399  });
400  return customFileTypes;
401  }
402 
417  @Deprecated
418  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
419  String fileType = getMIMEType(file);
420  file.setMIMEType(fileType);
421  file.save();
422  return fileType;
423  }
424 
441  @Deprecated
442  public String getFileType(AbstractFile file) throws TskCoreException {
443  String fileType = getMIMEType(file);
444  file.setMIMEType(fileType);
445  file.save();
446  return fileType;
447  }
448 
461  @Deprecated
462  public String detect(AbstractFile file) throws TskCoreException {
463  String fileType = getMIMEType(file);
464  return fileType;
465  }
466 
467 }
byte[] getNBytes(AbstractFile file, int offset, int n)
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static synchronized SortedSet< String > getDetectedTypes()

Copyright © 2012-2018 Basis Technology. Generated on: Fri Mar 22 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.