19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.IOException;
 
   22 import java.io.InputStream;
 
   23 import java.nio.charset.Charset;
 
   24 import java.util.ArrayList;
 
   25 import java.util.HashMap;
 
   26 import java.util.List;
 
   28 import java.util.logging.Level;
 
   38 class StringsTextExtractor 
implements TextExtractor {
 
   40     private static Ingester ingester;    
 
   41     private static final Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
 
   42     private static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;        
 
   44     private static final int BOM_LEN = 0;  
 
   45     private static final Charset INDEX_CHARSET = Server.DEFAULT_INDEXED_TEXT_CHARSET;
 
   46     private static final SCRIPT DEFAULT_SCRIPT = SCRIPT.LATIN_2;
 
   47     private KeywordSearchIngestModule module;
 
   48     private AbstractFile sourceFile;
 
   49     private int numChunks = 0;
 
   50     private final List<SCRIPT> extractScripts = 
new ArrayList<>();
 
   51     private Map<String, String> extractOptions = 
new HashMap<>();   
 
   60     public StringsTextExtractor(KeywordSearchIngestModule module) {
 
   62         ingester = Server.getIngester();
 
   63         extractScripts.add(DEFAULT_SCRIPT);
 
   67     public boolean setScripts(List<SCRIPT> extractScripts) {
 
   68         this.extractScripts.clear();
 
   69         this.extractScripts.addAll(extractScripts);
 
   74     public List<SCRIPT> getScripts() {
 
   75         return new ArrayList<>(extractScripts);
 
   79     public int getNumChunks() {
 
   80         return this.numChunks;
 
   84     public AbstractFile getSourceFile() {
 
   89     public Map<String, String> getOptions() {
 
   90         return extractOptions;
 
   94     public void setOptions(Map<String, String> options) {
 
   95         this.extractOptions = options;
 
   99     public boolean index(AbstractFile sourceFile) 
throws IngesterException {
 
  100         this.sourceFile = sourceFile;
 
  102         boolean success = 
false;
 
  105         final boolean extractUTF8 =
 
  106                 Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
 
  108         final boolean extractUTF16 =
 
  109                 Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
 
  111         if (extractUTF8 == 
false && extractUTF16 == 
false) {
 
  116         InputStream stringStream;
 
  118         if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
 
  120             stringStream = 
new AbstractFileStringStream(sourceFile, INDEX_CHARSET);
 
  122             stringStream = 
new AbstractFileStringIntStream(
 
  123                     sourceFile, extractScripts, extractUTF8, extractUTF16, INDEX_CHARSET);
 
  131             final byte[] stringChunkBuf = 
new byte[(int) MAX_STRING_CHUNK_SIZE];
 
  133             while ((readSize = stringStream.read(stringChunkBuf, BOM_LEN, (
int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
 
  137                 AbstractFileChunk chunk = 
new AbstractFileChunk(
this, this.numChunks + 1);
 
  140                     chunk.index(ingester, stringChunkBuf, readSize + BOM_LEN, INDEX_CHARSET);
 
  142                 } 
catch (IngesterException ingEx) {
 
  144                     logger.log(Level.WARNING, 
"Ingester had a problem with extracted strings from file '" + sourceFile.getName() + 
"' (id: " + sourceFile.getId() + 
").", ingEx); 
 
  153             ingester.ingest(
this);
 
  155         } 
catch (IOException ex) {
 
  156             logger.log(Level.WARNING, 
"Unable to read input stream to divide and send to Solr, file: " + sourceFile.getName(), ex); 
 
  160                 stringStream.close();
 
  161             } 
catch (IOException ex) {
 
  162                 logger.log(Level.WARNING, 
"Error closing input stream stream, file: " + sourceFile.getName(), ex); 
 
  171     public boolean isContentTypeSpecific() {
 
  176     public boolean isSupported(AbstractFile file, String detectedFormat) {