19 package org.sleuthkit.autopsy.textextractors;
21 import com.ethteck.decodetect.core.Decodetect;
22 import com.ethteck.decodetect.core.DecodetectResult;
23 import java.io.BufferedInputStream;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.io.Reader;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetDecoder;
30 import java.nio.charset.CharsetEncoder;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.List;
34 import java.util.logging.Level;
35 import org.apache.tika.parser.txt.CharsetDetector;
36 import org.apache.tika.parser.txt.CharsetMatch;
53 public boolean contains(Charset cs) {
58 public CharsetDecoder newDecoder() {
63 public CharsetEncoder newEncoder() {
85 private final AbstractFile
file;
101 if (enc.equals(UNKNOWN_CHARSET)) {
102 enc = StandardCharsets.UTF_8;
108 return new InputStreamReader(
new BufferedInputStream(
new ReadContentInputStream(file)), encoding);
113 return file.getMIMEType().equals(
"text/plain");
122 if (encoding != null) {
129 try (InputStream stream =
new BufferedInputStream(
new ReadContentInputStream(file))) {
130 CharsetDetector detector =
new CharsetDetector();
131 detector.setText(stream);
132 CharsetMatch tikaResult = detector.detect();
135 encoding = Charset.forName(tikaResult.getName());
137 }
catch (UnsupportedCharsetException ex) {
138 logger.log(Level.WARNING, String.format(
"Error converting CharsetDetector result for %s (objID=%d)", file.getName(), file.getId()), ex);
141 }
catch (IOException ex) {
142 logger.log(Level.WARNING, String.format(
"Error setting CharsetDetector stream for %s (objID=%d)", file.getName(), file.getId()), ex);
149 int maxBytes = 100000;
150 int numBytes = maxBytes;
151 if (file.getSize() < maxBytes) {
152 numBytes = (int) file.getSize();
155 byte[] targetArray =
new byte[numBytes];
156 file.read(targetArray, 0, numBytes);
157 List<DecodetectResult> results = Decodetect.DECODETECT.getResults(targetArray);
158 if (!results.isEmpty()) {
159 DecodetectResult topResult = results.get(0);
161 encoding = topResult.getEncoding();
165 }
catch (TskCoreException ex) {
166 logger.log(Level.WARNING, String.format(
"Error reading content from %s (objID=%d)", file.getName(), file.getId()), ex);
synchronized static Logger getLogger(String name)