19 package org.sleuthkit.autopsy.modules.filetypeid;
21 import java.nio.charset.Charset;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.List;
25 import java.util.SortedSet;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
28 import java.util.stream.Collectors;
29 import org.apache.tika.Tika;
30 import org.apache.tika.io.TikaInputStream;
31 import org.apache.tika.mime.MimeTypes;
49 private static final Tika
tika =
new Tika();
67 TreeSet<String> detectedTypes =
new TreeSet<>((String string1, String string2) -> {
68 int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
70 result = string1.compareTo(string2);
76 for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
77 detectedTypes.add(fileType.getMimeType());
79 }
catch (CustomFileTypesManager.CustomFileTypesException ex) {
80 throw new FileTypeDetectorInitException(
"Error loading Autopsy custom file types", ex);
83 for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
84 detectedTypes.add(fileType.getMimeType());
86 }
catch (CustomFileTypesManager.CustomFileTypesException ex) {
87 throw new FileTypeDetectorInitException(
"Error loading user custom file types", ex);
100 if (null == tikaDetectedTypes) {
101 tikaDetectedTypes =
org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
102 .stream().filter(t -> !t.hasParameters()).map(s -> s.toString().replace(
"tika-",
"")).collect(Collectors.toCollection(TreeSet::new));
104 return Collections.unmodifiableSortedSet(tikaDetectedTypes);
121 userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
122 autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
123 }
catch (CustomFileTypesManager.CustomFileTypesException ex) {
124 throw new FileTypeDetectorInitException(
"Error loading custom file types", ex);
152 for (FileType fileType : customTypes) {
153 if (fileType.getMimeType().equals(mimeType)) {
186 String mimeType = file.getMIMEType();
187 if (null != mimeType) {
198 if (!file.isFile() || file.getSize() <= 0
199 || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
200 || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
201 || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
202 || ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) && file.getSize() <
SLACK_FILE_THRESHOLD)) {
203 mimeType = MimeTypes.OCTET_STREAM;
212 bufLen = file.read(buf, 0, defaultBufferSize);
213 }
catch (TskCoreException ex) {
222 if (null == mimeType) {
230 if (null == mimeType) {
238 if (null == mimeType) {
239 ReadContentInputStream stream =
new ReadContentInputStream(file);
241 try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
242 String tikaType = tika.detect(tikaInputStream);
247 mimeType = tikaType.replace(
"tika-",
"");
259 if (!mimeType.equals(MimeTypes.OCTET_STREAM)) {
260 ReadContentInputStream secondPassStream =
new ReadContentInputStream(file);
261 try (TikaInputStream secondPassTikaStream = TikaInputStream.get(secondPassStream)) {
262 tikaType = tika.detect(secondPassTikaStream, file.getName());
263 mimeType = tikaType.replace(
"tika-",
"");
271 if (file.getNameExtension().equals(
"txt")) {
274 mimeType = MimeTypes.PLAIN_TEXT;
284 if (mimeType.contains(
"audio/mpeg")) {
288 mimeType = MimeTypes.OCTET_STREAM;
290 }
catch (TskCoreException ex) {
292 logger.log(Level.WARNING, String.format(
"Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex);
295 }
catch (Exception ignored) {
303 mimeType = MimeTypes.OCTET_STREAM;
310 file.setMIMEType(mimeType);
324 return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0;
338 private byte[]
getNBytes(AbstractFile file,
int offset,
int n)
throws TskCoreException {
339 byte[] headerCache =
new byte[n];
340 file.read(headerCache, offset, n);
352 int indexOfSemicolon = mimeType.indexOf(
';');
353 if (indexOfSemicolon != -1) {
354 return mimeType.substring(0, indexOfSemicolon).trim();
370 String retValue = null;
372 for (FileType fileType : userDefinedFileTypes) {
373 if (fileType.matches(file, startOfFileBuffer, bufLen)) {
374 retValue = fileType.getMimeType();
392 for (FileType fileType : autopsyDefinedFileTypes) {
393 if (fileType.matches(file, startOfFileBuffer, bufLen)) {
394 return fileType.getMimeType();
426 super(message, throwable);
441 List<String> customFileTypes =
new ArrayList<>();
442 userDefinedFileTypes.forEach((fileType) -> {
443 customFileTypes.add(fileType.getMimeType());
445 autopsyDefinedFileTypes.forEach((fileType) -> {
446 customFileTypes.add(fileType.getMimeType());
448 return customFileTypes;
468 file.setMIMEType(fileType);
490 public String
getFileType(AbstractFile file)
throws TskCoreException {
492 file.setMIMEType(fileType);
510 public String
detect(AbstractFile file)
throws TskCoreException {
String removeOptionalParameter(String mimeType)
static final long serialVersionUID
final List< FileType > userDefinedFileTypes
final int defaultBufferSize
static final Charset UNKNOWN_CHARSET
String detectAutopsyDefinedType(AbstractFile file, byte[] startOfFileBuffer, int bufLen)
static final int SLACK_FILE_THRESHOLD
boolean isDetectable(String mimeType)
byte[] getNBytes(AbstractFile file, int offset, int n)
String getMIMEType(AbstractFile file)
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
final List< FileType > autopsyDefinedFileTypes
static SortedSet< String > tikaDetectedTypes
static Charset getEncoding(AbstractFile file)
String detectUserDefinedType(AbstractFile file, byte[] startOfFileBuffer, int bufLen)
String detect(AbstractFile file)
synchronized static Logger getLogger(String name)
static final Logger logger
List< String > getUserDefinedTypes()
static SortedSet< String > getTikaDetectedTypes()
String getFileType(AbstractFile file)
static synchronized SortedSet< String > getDetectedTypes()
boolean byteIs0xFF(byte x)
boolean isDetectableByTika(String mimeType)
String detectAndPostToBlackboard(AbstractFile file)