Autopsy  4.15.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2020 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.nio.charset.Charset;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.List;
25 import java.util.SortedSet;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
28 import java.util.stream.Collectors;
29 import org.apache.tika.Tika;
30 import org.apache.tika.io.TikaInputStream;
31 import org.apache.tika.mime.MimeTypes;
34 import org.sleuthkit.datamodel.AbstractFile;
35 import org.sleuthkit.datamodel.ReadContentInputStream;
36 import org.sleuthkit.datamodel.TskCoreException;
37 import org.sleuthkit.datamodel.TskData;
38 
46 public class FileTypeDetector {
47 
48  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
49  private static final Tika tika = new Tika();
50  private static final int SLACK_FILE_THRESHOLD = 4096;
51  private final List<FileType> userDefinedFileTypes;
52  private final List<FileType> autopsyDefinedFileTypes;
53  private static SortedSet<String> tikaDetectedTypes;
54 
65  public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
66  TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
67  int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
68  if (result == 0) {
69  result = string1.compareTo(string2);
70  }
71  return result;
72  });
73  detectedTypes.addAll(FileTypeDetector.getTikaDetectedTypes());
74  try {
75  for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
76  detectedTypes.add(fileType.getMimeType());
77  }
78  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
79  throw new FileTypeDetectorInitException("Error loading Autopsy custom file types", ex);
80  }
81  try {
82  for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
83  detectedTypes.add(fileType.getMimeType());
84  }
85  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
86  throw new FileTypeDetectorInitException("Error loading user custom file types", ex);
87  }
88  return detectedTypes;
89  }
90 
98  private static SortedSet<String> getTikaDetectedTypes() {
99  if (null == tikaDetectedTypes) {
100  tikaDetectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
101  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString().replace("tika-", "")).collect(Collectors.toCollection(TreeSet::new));
102  }
103  return Collections.unmodifiableSortedSet(tikaDetectedTypes);
104  }
105 
119  try {
120  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
121  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
122  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
123  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
124  }
125  }
126 
135  public boolean isDetectable(String mimeType) {
136  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
137  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
138  || isDetectableByTika(mimeType);
139  }
140 
150  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
151  for (FileType fileType : customTypes) {
152  if (fileType.getMimeType().equals(mimeType)) {
153  return true;
154  }
155  }
156  return false;
157  }
158 
166  private boolean isDetectableByTika(String mimeType) {
168  }
169 
181  public String getMIMEType(AbstractFile file) {
182  /*
183  * Check to see if the file has already been typed.
184  */
185  String mimeType = file.getMIMEType();
186  if (null != mimeType) {
187  // We remove the optional parameter to allow this method to work
188  // with legacy databases that may contain MIME types with the
189  // optional parameter attached.
190  return removeOptionalParameter(mimeType);
191  }
192 
193  /*
194  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
195  * zero-sized files, unallocated space, and unused blocks (refer to
196  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
197  */
198  if (!file.isFile() || file.getSize() <= 0
199  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
200  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
201  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
202  || ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) && file.getSize() < SLACK_FILE_THRESHOLD)) {
203  mimeType = MimeTypes.OCTET_STREAM;
204  }
205 
206  /*
207  * If the file is a regular file, give precedence to user-defined custom
208  * file types.
209  */
210  if (null == mimeType) {
211  mimeType = detectUserDefinedType(file);
212  }
213 
214  /*
215  * If the file does not match a user-defined type, give precedence to
216  * custom file types defined by Autopsy.
217  */
218  if (null == mimeType) {
219  mimeType = detectAutopsyDefinedType(file);
220  }
221 
222  /*
223  * If the file does not match a user-defined type, send the initial
224  * bytes to Tika.
225  */
226  if (null == mimeType) {
227  ReadContentInputStream stream = new ReadContentInputStream(file);
228 
229  try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
230  String tikaType = tika.detect(tikaInputStream);
231 
232  /*
233  * Remove the Tika suffix from the MIME type name.
234  */
235  mimeType = tikaType.replace("tika-", ""); //NON-NLS
236  /*
237  * Remove the optional parameter from the MIME type.
238  */
239  mimeType = removeOptionalParameter(mimeType);
240 
241  /*
242  * If Tika recognizes the file signature, then use the file
243  * name to refine the type. In short, this is to exclude the
244  * mime types that are determined solely by file extension.
245  * More details in JIRA-4871.
246  */
247  if (!mimeType.equals(MimeTypes.OCTET_STREAM)) {
248  ReadContentInputStream secondPassStream = new ReadContentInputStream(file);
249  try (TikaInputStream secondPassTikaStream = TikaInputStream.get(secondPassStream)) {
250  tikaType = tika.detect(secondPassTikaStream, file.getName());
251  mimeType = tikaType.replace("tika-", ""); //NON-NLS
252  mimeType = removeOptionalParameter(mimeType);
253  }
254  } else {
255  /*
256  * If the file was marked as an octet stream and the extension is .txt, try to detect a text
257  * encoding
258  */
259  if (file.getNameExtension().equals("txt")) {
260  Charset detectedCharset = EncodingUtils.getEncoding(file);
261  if (detectedCharset != EncodingUtils.UNKNOWN_CHARSET) {
262  mimeType = MimeTypes.PLAIN_TEXT;
263  }
264  }
265  }
266 
272  if (mimeType.contains("audio/mpeg")) {
273  try {
274  byte[] header = getNBytes(file, 0, 2);
275  if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) {
276  mimeType = MimeTypes.OCTET_STREAM;
277  }
278  } catch (TskCoreException ex) {
279  //Oh well, the mimetype is what it is.
280  logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex);
281  }
282  }
283  } catch (Exception ignored) {
284  /*
285  * This exception is swallowed and not logged rather than
286  * propagated because files in data sources are not always
287  * consistent with their file system metadata, making for read
288  * errors. Also, Tika can be a bit flaky at times, making this a
289  * best effort endeavor. Default to octet-stream.
290  */
291  mimeType = MimeTypes.OCTET_STREAM;
292  }
293  }
294 
295  /*
296  * Documented side effect: write the result to the AbstractFile object.
297  */
298  file.setMIMEType(mimeType);
299 
300  return mimeType;
301  }
302 
311  private boolean byteIs0xFF(byte x) {
312  return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0;
313  }
314 
326  private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException {
327  byte[] headerCache = new byte[n];
328  file.read(headerCache, offset, n);
329  return headerCache;
330  }
331 
339  private String removeOptionalParameter(String mimeType) {
340  int indexOfSemicolon = mimeType.indexOf(';');
341  if (indexOfSemicolon != -1) {
342  return mimeType.substring(0, indexOfSemicolon).trim();
343  } else {
344  return mimeType;
345  }
346  }
347 
355  private String detectUserDefinedType(AbstractFile file) {
356  String retValue = null;
357 
358  for (FileType fileType : userDefinedFileTypes) {
359  if (fileType.matches(file)) {
360  retValue = fileType.getMimeType();
361  break;
362  }
363  }
364  return retValue;
365  }
366 
375  private String detectAutopsyDefinedType(AbstractFile file) {
376  for (FileType fileType : autopsyDefinedFileTypes) {
377  if (fileType.matches(file)) {
378  return fileType.getMimeType();
379  }
380  }
381  return null;
382  }
383 
384  /*
385  * Exception thrown if an initialization error occurs, e.g., user-defined
386  * file type definitions exist but cannot be loaded.
387  */
388  public static class FileTypeDetectorInitException extends Exception {
389 
390  private static final long serialVersionUID = 1L;
391 
398  FileTypeDetectorInitException(String message) {
399  super(message);
400  }
401 
409  FileTypeDetectorInitException(String message, Throwable throwable) {
410  super(message, throwable);
411  }
412 
413  }
414 
423  @Deprecated
424  public List<String> getUserDefinedTypes() {
425  List<String> customFileTypes = new ArrayList<>();
426  userDefinedFileTypes.forEach((fileType) -> {
427  customFileTypes.add(fileType.getMimeType());
428  });
429  autopsyDefinedFileTypes.forEach((fileType) -> {
430  customFileTypes.add(fileType.getMimeType());
431  });
432  return customFileTypes;
433  }
434 
449  @Deprecated
450  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
451  String fileType = getMIMEType(file);
452  file.setMIMEType(fileType);
453  file.save();
454  return fileType;
455  }
456 
473  @Deprecated
474  public String getFileType(AbstractFile file) throws TskCoreException {
475  String fileType = getMIMEType(file);
476  file.setMIMEType(fileType);
477  file.save();
478  return fileType;
479  }
480 
493  @Deprecated
494  public String detect(AbstractFile file) throws TskCoreException {
495  String fileType = getMIMEType(file);
496  return fileType;
497  }
498 
499 }
byte[] getNBytes(AbstractFile file, int offset, int n)
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static synchronized SortedSet< String > getDetectedTypes()

Copyright © 2012-2020 Basis Technology. Generated on: Mon Jul 6 2020
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.