19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.InputStreamReader;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.List;
28 import java.util.logging.Level;
39 class StringsTextExtractor
extends FileTextExtractor {
41 static final private Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
51 private final List<SCRIPT> extractScripts =
new ArrayList<>();
52 private Map<String, String> extractOptions =
new HashMap<>();
54 public StringsTextExtractor() {
56 extractScripts.add(SCRIPT.LATIN_2);
64 public void setScripts(List<SCRIPT> extractScripts) {
65 this.extractScripts.clear();
66 this.extractScripts.addAll(extractScripts);
74 public List<SCRIPT> getScripts() {
75 return new ArrayList<>(extractScripts);
84 public Map<String, String> getOptions() {
85 return extractOptions;
93 public void setOptions(Map<String, String> options) {
94 this.extractOptions = options;
98 public void logWarning(
final String msg, Exception ex) {
99 logger.log(Level.WARNING, msg, ex);
103 public boolean isDisabled() {
104 boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
105 boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
107 return extractUTF8 ==
false && extractUTF16 ==
false;
111 public InputStreamReader getReader(AbstractFile sourceFile)
throws TextExtractorException {
112 InputStream stringStream = getInputStream(sourceFile);
113 return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
116 InputStream getInputStream(AbstractFile sourceFile) {
118 if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
119 return new EnglishOnlyStream(sourceFile);
121 boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
122 boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
124 return new InternationalStream(sourceFile, extractScripts, extractUTF8, extractUTF16);
129 public boolean isContentTypeSpecific() {
134 public boolean isSupported(AbstractFile file, String detectedFormat) {
154 private static final String
NLS = Character.toString((
char) 10);
190 public int read(byte[] b,
int off,
int len)
throws IOException {
192 throw new NullPointerException();
193 }
else if (off < 0 || len < 0 || len > b.length - off) {
194 throw new IndexOutOfBoundsException();
195 }
else if (len == 0) {
198 long fileSize = content.
getSize();
205 if (stringAtTempBoundary) {
209 stringAtTempBoundary =
false;
212 boolean singleConsecZero =
false;
214 while (newCurLen < len) {
216 if (readBufOffset > bytesInReadBuf - 1) {
220 bytesInReadBuf = content.
read(curReadBuf, contentOffset, READ_BUF_SIZE);
222 if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
232 if (bytesInReadBuf < 1) {
233 if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
249 char c = (char) curReadBuf[readBufOffset++];
250 if (c == 0 && singleConsecZero ==
false) {
252 singleConsecZero =
true;
254 singleConsecZero =
false;
257 tempString.append(c);
259 if (tempStringLen >= MIN_PRINTABLE_CHARS) {
263 }
else if (!singleConsecZero) {
265 if (tempStringLen >= MIN_PRINTABLE_CHARS || stringAtBufBoundary) {
267 tempString.append(NLS);
269 curString.append(tempString);
271 stringAtBufBoundary =
false;
274 tempString =
new StringBuilder();
283 stringAtBufBoundary =
true;
288 if (tempStringLen >= MIN_PRINTABLE_CHARS) {
289 if (newCurLen > len) {
293 String toAppend = tempString.substring(0, appendChars);
294 String newTemp = tempString.substring(appendChars);
295 curString.append(toAppend);
296 curStringLen += appendChars;
297 tempString =
new StringBuilder(newTemp);
298 tempStringLen = newTemp.length();
299 stringAtTempBoundary =
true;
302 curString.append(tempString);
305 tempString =
new StringBuilder();
321 if (tempStringLen >= MIN_PRINTABLE_CHARS) {
322 curString.append(tempString);
324 tempString =
new StringBuilder();
332 final String curStringS = curString.toString();
335 System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (
int) len));
338 curString =
new StringBuilder();
345 public int read() throws IOException {
346 final int read =
read(oneCharBuf, 0, 1);
348 return oneCharBuf[0];
361 public long skip(
long n)
throws IOException {
364 return super.skip(n);
408 this.nothingToDo = extractUTF8 ==
false && extractUTF16 ==
false;
414 public int read() throws IOException {
418 final int read =
read(oneCharBuf, 0, 1);
420 return oneCharBuf[0];
427 public int read(byte[] b,
int off,
int len)
throws IOException {
429 throw new NullPointerException();
430 }
else if (off < 0 || len < 0 || len > b.length - off) {
431 throw new IndexOutOfBoundsException();
432 }
else if (len == 0) {
438 long fileSize = content.
getSize();
445 int offsetUser = off;
446 while (bytesToUser < len && offsetUser < len) {
449 if ((convertBuff == null || convertBuffRemain == 0) && !fileEOF && fileReadOffset < fileSize) {
455 toRead = Math.min(FILE_BUF_SIZE, fileSize - fileReadOffset);
457 int read = content.
read(fileReadBuff, fileReadOffset, toRead);
458 if (read == -1 || read == 0) {
461 fileReadOffset +=
read;
462 if (fileReadOffset >= fileSize) {
475 if (convertBuff == null || convertBuffRemain == 0) {
477 return bytesToUser > 0 ? bytesToUser : -1;
484 final int toCopy = Math.min(convertBuffRemain, len - offsetUser);
485 System.arraycopy(convertBuff, convertBuffOffset, b, offsetUser, toCopy);
487 convertBuffOffset += toCopy;
488 offsetUser += toCopy;
489 bytesToUser += toCopy;
506 bytesInConvertBuff = 0;
508 bytesInConvertBuff = convertBuff.length;
510 convertBuffOffset = 0;
static final Charset DEFAULT_INDEXED_TEXT_CHARSET
default Charset to index text as
synchronized static Logger getLogger(String name)
final int read(byte[] buf, long offset, long len)