19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.IOException;
 
   22 import java.io.InputStream;
 
   23 import java.io.InputStreamReader;
 
   24 import java.util.ArrayList;
 
   25 import java.util.HashMap;
 
   26 import java.util.List;
 
   28 import java.util.logging.Level;
 
   39 class StringsTextExtractor 
extends FileTextExtractor {
 
   41     static final private Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
 
   51     private final List<SCRIPT> extractScripts = 
new ArrayList<>();
 
   52     private Map<String, String> extractOptions = 
new HashMap<>();
 
   54     public StringsTextExtractor() {
 
   56         extractScripts.add(SCRIPT.LATIN_2);
 
   64     public void setScripts(List<SCRIPT> extractScripts) {
 
   65         this.extractScripts.clear();
 
   66         this.extractScripts.addAll(extractScripts);
 
   74     public List<SCRIPT> getScripts() {
 
   75         return new ArrayList<>(extractScripts);
 
   84     public Map<String, String> getOptions() {
 
   85         return extractOptions;
 
   93     public void setOptions(Map<String, String> options) {
 
   94         this.extractOptions = options;
 
   98     public void logWarning(
final String msg, Exception ex) {
 
   99         logger.log(Level.WARNING, msg, ex); 
 
  103     public boolean isDisabled() {
 
  104         boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
 
  105         boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
 
  107         return extractUTF8 == 
false && extractUTF16 == 
false;
 
  111     public InputStreamReader getReader(AbstractFile sourceFile) 
throws TextExtractorException {
 
  112         InputStream stringStream = getInputStream(sourceFile);
 
  113         return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
 
  116     InputStream getInputStream(AbstractFile sourceFile) {
 
  118         if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
 
  119             return new EnglishOnlyStream(sourceFile);
 
  121             boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
 
  122             boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
 
  124             return new InternationalStream(sourceFile, extractScripts, extractUTF8, extractUTF16);
 
  129     public boolean isContentTypeSpecific() {
 
  134     public boolean isSupported(AbstractFile file, String detectedFormat) {
 
  154         private static final String 
NLS = Character.toString((
char) 10); 
 
  190         public int read(byte[] b, 
int off, 
int len) 
throws IOException {
 
  192                 throw new NullPointerException();
 
  193             } 
else if (off < 0 || len < 0 || len > b.length - off) {
 
  194                 throw new IndexOutOfBoundsException();
 
  195             } 
else if (len == 0) {
 
  198             long fileSize = content.
getSize();
 
  205             if (stringAtTempBoundary) {
 
  209                 stringAtTempBoundary = 
false;
 
  212             boolean singleConsecZero = 
false; 
 
  214             while (newCurLen < len) {
 
  216                 if (readBufOffset > bytesInReadBuf - 1) {
 
  220                         bytesInReadBuf = content.
read(curReadBuf, contentOffset, READ_BUF_SIZE);
 
  222                         if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  232                     if (bytesInReadBuf < 1) {
 
  233                         if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  249                 char c = (char) curReadBuf[readBufOffset++];
 
  250                 if (c == 0 && singleConsecZero == 
false) {
 
  252                     singleConsecZero = 
true;
 
  254                     singleConsecZero = 
false;
 
  257                     tempString.append(c);
 
  259                     if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  263                 } 
else if (!singleConsecZero) {
 
  265                     if (tempStringLen >= MIN_PRINTABLE_CHARS || stringAtBufBoundary) {
 
  267                         tempString.append(NLS);
 
  269                         curString.append(tempString);
 
  271                         stringAtBufBoundary = 
false;
 
  274                     tempString = 
new StringBuilder();
 
  283                 stringAtBufBoundary = 
true; 
 
  288             if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  289                 if (newCurLen > len) {
 
  293                     String toAppend = tempString.substring(0, appendChars);
 
  294                     String newTemp = tempString.substring(appendChars);
 
  295                     curString.append(toAppend);
 
  296                     curStringLen += appendChars;
 
  297                     tempString = 
new StringBuilder(newTemp);
 
  298                     tempStringLen = newTemp.length();
 
  299                     stringAtTempBoundary = 
true;
 
  302                     curString.append(tempString);
 
  305                     tempString = 
new StringBuilder();
 
  321             if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  322                 curString.append(tempString);
 
  324                 tempString = 
new StringBuilder();
 
  332             final String curStringS = curString.toString();
 
  335             System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (
int) len));
 
  338             curString = 
new StringBuilder();
 
  345         public int read() throws IOException {
 
  346             final int read = 
read(oneCharBuf, 0, 1);
 
  348                 return oneCharBuf[0];
 
  361         public long skip(
long n) 
throws IOException {
 
  364             return super.skip(n);
 
  408             this.nothingToDo = extractUTF8 == 
false && extractUTF16 == 
false;
 
  414         public int read() throws IOException {
 
  418             final int read = 
read(oneCharBuf, 0, 1);
 
  420                 return oneCharBuf[0];
 
  427         public int read(byte[] b, 
int off, 
int len) 
throws IOException {
 
  429                 throw new NullPointerException();
 
  430             } 
else if (off < 0 || len < 0 || len > b.length - off) {
 
  431                 throw new IndexOutOfBoundsException();
 
  432             } 
else if (len == 0) {
 
  438             long fileSize = content.
getSize();
 
  445             int offsetUser = off;
 
  446             while (bytesToUser < len && offsetUser < len) {
 
  449                 if ((convertBuff == null || convertBuffRemain == 0) && !fileEOF && fileReadOffset < fileSize) {
 
  455                         toRead = Math.min(FILE_BUF_SIZE, fileSize - fileReadOffset);
 
  457                         int read = content.
read(fileReadBuff, fileReadOffset, toRead);
 
  458                         if (read == -1 || read == 0) {
 
  461                             fileReadOffset += 
read;
 
  462                             if (fileReadOffset >= fileSize) {
 
  475                 if (convertBuff == null || convertBuffRemain == 0) {
 
  477                         return bytesToUser > 0 ? bytesToUser : -1;
 
  484                 final int toCopy = Math.min(convertBuffRemain, len - offsetUser);
 
  485                 System.arraycopy(convertBuff, convertBuffOffset, b, offsetUser, toCopy);
 
  487                 convertBuffOffset += toCopy;
 
  488                 offsetUser += toCopy;
 
  489                 bytesToUser += toCopy;
 
  506                 bytesInConvertBuff = 0;
 
  508                 bytesInConvertBuff = convertBuff.length;
 
  510             convertBuffOffset = 0;
 
static final Charset DEFAULT_INDEXED_TEXT_CHARSET
default Charset to index text as 
 
synchronized static Logger getLogger(String name)
 
final int read(byte[] buf, long offset, long len)