Autopsy  4.17.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
TextFileExtractor.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2018-2020 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.textextractors;
20 
21 import java.io.BufferedInputStream;
22 import java.io.IOException;
23 import java.io.InputStreamReader;
24 import java.io.Reader;
25 import java.nio.charset.Charset;
26 import java.nio.charset.StandardCharsets;
27 import java.util.logging.Level;
28 import org.apache.commons.lang.StringUtils;
32 import org.sleuthkit.datamodel.AbstractFile;
33 import org.sleuthkit.datamodel.ReadContentInputStream;
34 import org.sleuthkit.datamodel.TskCoreException;
35 
39 public final class TextFileExtractor implements TextExtractor {
40 
41  private static final Logger logger = Logger.getLogger(TextFileExtractor.class.getName());
42  private final AbstractFile file;
43  private static final String PLAIN_TEXT_MIME_TYPE = "text/plain";
44 
45  private Charset encoding = null;
46 
52  public TextFileExtractor(AbstractFile file) {
53  this.file = file;
54  }
55 
56  @Override
57  public Reader getReader() throws InitReaderException {
58  if(encoding == null) {
59  try {
60  encoding = EncodingUtils.getEncoding(file);
61  if(encoding == EncodingUtils.UNKNOWN_CHARSET) {
62  encoding = StandardCharsets.UTF_8;
63  }
64  } catch (TskCoreException | IOException ex) {
65  logger.log(Level.WARNING, String.format("Error detecting the "
66  + "encoding for %s (objID=%d)", file.getName(), file.getId()), ex);
67  encoding = StandardCharsets.UTF_8;
68  }
69  }
70 
71  return getReader(encoding);
72  }
73 
74  private Reader getReader(Charset encoding) {
75  return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(file)), encoding);
76  }
77 
78  @Override
79  public boolean isSupported() {
80  // get the MIME type
81  String mimeType = file.getMIMEType();
82 
83  // if it is not present, attempt to use the FileTypeDetector to determine
84  if (StringUtils.isEmpty(mimeType)) {
85  FileTypeDetector fileTypeDetector = null;
86  try {
87  fileTypeDetector = new FileTypeDetector();
89  logger.log(Level.SEVERE, "Unable to create file type detector for determining MIME type", ex);
90  return false;
91  }
92  mimeType = fileTypeDetector.getMIMEType(file);
93  }
94 
95  return PLAIN_TEXT_MIME_TYPE.equals(mimeType);
96  }
97 }
synchronized static Logger getLogger(String name)
Definition: Logger.java:124

Copyright © 2012-2021 Basis Technology. Generated on: Tue Jan 19 2021
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.