api-docs/4.10.0/_keyword_search_ingest_module_8java_source.html

 /*

  * Autopsy Forensic Browser

  *

  * Copyright 2011-2019 Basis Technology Corp.

  * Contact: carrier <at> sleuthkit <dot> org

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 package org.sleuthkit.autopsy.keywordsearch;


 import com.google.common.collect.ImmutableList;

 import java.io.Reader;

 import java.util.HashMap;

 import java.util.List;

 import java.util.Map;

 import java.util.concurrent.atomic.AtomicInteger;

 import java.util.logging.Level;

 import org.openide.util.Lookup;

 import org.openide.util.NbBundle;

 import org.openide.util.NbBundle.Messages;

 import org.openide.util.lookup.Lookups;

 import org.sleuthkit.autopsy.casemodule.Case;

 import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;

 import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;

 import org.sleuthkit.autopsy.coreutils.Logger;

 import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;

 import org.sleuthkit.autopsy.ingest.FileIngestModule;

 import org.sleuthkit.autopsy.ingest.IngestJobContext;

 import org.sleuthkit.autopsy.ingest.IngestMessage;

 import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;

 import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;

 import org.sleuthkit.autopsy.ingest.IngestServices;

 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;

 import org.sleuthkit.autopsy.keywordsearch.TextFileExtractor.TextFileExtractorException;

 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;

 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;

 import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;

 import org.sleuthkit.autopsy.textextractors.TextExtractor;

 import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;

 import org.sleuthkit.autopsy.textextractors.configs.ImageConfig;

 import org.sleuthkit.autopsy.textextractors.configs.StringsConfig;

 import org.sleuthkit.datamodel.AbstractFile;

 import org.sleuthkit.datamodel.TskData;

 import org.sleuthkit.datamodel.TskData.FileKnown;


 @NbBundle.Messages({

     "# {0} - Reason for not starting Solr", "KeywordSearchIngestModule.init.tryStopSolrMsg={0}<br />Please try stopping Java Solr processes if any exist and restart the application.",

     "KeywordSearchIngestModule.init.badInitMsg=Keyword search server was not properly initialized, cannot run keyword search ingest.",

     "SolrConnectionCheck.Port=Invalid port number.",

     "# {0} - Reason for not connecting to Solr", "KeywordSearchIngestModule.init.exception.errConnToSolr.msg=Error connecting to SOLR server: {0}.",

     "KeywordSearchIngestModule.startUp.noOpenCore.msg=The index could not be opened or does not exist.",

     "CannotRunFileTypeDetection=Unable to run file type detection."

 })

 public final class KeywordSearchIngestModule implements FileIngestModule {


     private static final List<String> ARCHIVE_MIME_TYPES

             = ImmutableList.of(

                     //ignore unstructured binary and compressed data, for which string extraction or unzipper works better

                     "application/x-7z-compressed", //NON-NLS

                     "application/x-ace-compressed", //NON-NLS

                     "application/x-alz-compressed", //NON-NLS

                     "application/x-arj", //NON-NLS

                     "application/vnd.ms-cab-compressed", //NON-NLS

                     "application/x-cfs-compressed", //NON-NLS

                     "application/x-dgc-compressed", //NON-NLS

                     "application/x-apple-diskimage", //NON-NLS

                     "application/x-gca-compressed", //NON-NLS

                     "application/x-dar", //NON-NLS

                     "application/x-lzx", //NON-NLS

                     "application/x-lzh", //NON-NLS

                     "application/x-rar-compressed", //NON-NLS

                     "application/x-stuffit", //NON-NLS

                     "application/x-stuffitx", //NON-NLS

                     "application/x-gtar", //NON-NLS

                     "application/x-archive", //NON-NLS

                     "application/x-executable", //NON-NLS

                     "application/x-gzip", //NON-NLS

                     "application/zip", //NON-NLS

                     "application/x-zoo", //NON-NLS

                     "application/x-cpio", //NON-NLS

                     "application/x-shar", //NON-NLS

                     "application/x-tar", //NON-NLS

                     "application/x-bzip", //NON-NLS

                     "application/x-bzip2", //NON-NLS

                     "application/x-lzip", //NON-NLS

                     "application/x-lzma", //NON-NLS

                     "application/x-lzop", //NON-NLS

                     "application/x-z", //NON-NLS

                     "application/x-compress"); //NON-NLS


     enum StringsExtractOptions {

         EXTRACT_UTF16,

         EXTRACT_UTF8,

     };


     enum UpdateFrequency {


         FAST(20),

         AVG(10),

         SLOW(5),

         SLOWEST(1),

         NONE(Integer.MAX_VALUE),

         DEFAULT(5);

         private final int time;


         UpdateFrequency(int time) {

             this.time = time;

         }


         int getTime() {

             return time;

         }

     };

     private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());

     private final IngestServices services = IngestServices.getInstance();

     private Ingester ingester = null;

     private Indexer indexer;

     private FileTypeDetector fileTypeDetector;

 //only search images from current ingest, not images previously ingested/indexed

     //accessed read-only by searcher thread


     private boolean startedSearching = false;

     private Lookup stringsExtractionContext;

     private final KeywordSearchJobSettings settings;

     private boolean initialized = false;

     private long jobId;

     private static final AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging

     private int instanceNum = 0;

     private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();

     private IngestJobContext context;


     private enum IngestStatus {


         TEXT_INGESTED,

         STRINGS_INGESTED,

         METADATA_INGESTED,

         SKIPPED_ERROR_INDEXING,

         SKIPPED_ERROR_TEXTEXTRACT,

         SKIPPED_ERROR_IO

     };

     private static final Map<Long, Map<Long, IngestStatus>> ingestStatus = new HashMap<>(); //guarded by itself


     private static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status) {

         synchronized (ingestStatus) {

             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(ingestJobId);

             if (ingestStatusForJob == null) {

                 ingestStatusForJob = new HashMap<>();

                 ingestStatus.put(ingestJobId, ingestStatusForJob);

             }

             ingestStatusForJob.put(fileId, status);

             ingestStatus.put(ingestJobId, ingestStatusForJob);

         }

     }


     KeywordSearchIngestModule(KeywordSearchJobSettings settings) {

         this.settings = settings;

         instanceNum = instanceCount.getAndIncrement();

     }


     @Messages({

         "KeywordSearchIngestModule.startupMessage.failedToGetIndexSchema=Failed to get schema version for text index.",

         "# {0} - Solr version number", "KeywordSearchIngestModule.startupException.indexSolrVersionNotSupported=Adding text no longer supported for Solr version {0} of the text index.",

         "# {0} - schema version number", "KeywordSearchIngestModule.startupException.indexSchemaNotSupported=Adding text no longer supported for schema version {0} of the text index.",

         "KeywordSearchIngestModule.noOpenCase.errMsg=No open case available."

     })

     @Override

     public void startUp(IngestJobContext context) throws IngestModuleException {

         initialized = false;

         jobId = context.getJobId();


         Server server = KeywordSearch.getServer();

         if (server.coreIsOpen() == false) {

             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg());

         }


         try {

             Index indexInfo = server.getIndexInfo();

             if (!IndexFinder.getCurrentSolrVersion().equals(indexInfo.getSolrVersion())) {

                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSolrVersionNotSupported(indexInfo.getSolrVersion()));

             }

             if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {

                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));

             }

         } catch (NoOpenCoreException ex) {

             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);

         }


         try {

             fileTypeDetector = new FileTypeDetector();

         } catch (FileTypeDetector.FileTypeDetectorInitException ex) {

             throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);

         }


         ingester = Ingester.getDefault();

         this.context = context;


         // increment the module reference count

         // if first instance of this module for this job then check the server and existence of keywords

         Case openCase;

         try {

             openCase = Case.getCurrentCaseThrows();

         } catch (NoCurrentCaseException ex) {

             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_noOpenCase_errMsg(), ex);

         }

         if (refCounter.incrementAndGet(jobId) == 1) {

             if (openCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) {

                 // for multi-user cases need to verify connection to remore SOLR server

                 KeywordSearchService kwsService = new SolrSearchService();

                 Server.IndexingServerProperties properties = Server.getMultiUserServerProperties(openCase.getCaseDirectory());

                 int port;

                 try {

                     port = Integer.parseInt(properties.getPort());

                 } catch (NumberFormatException ex) {

                     // if there is an error parsing the port number

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg() + " " + Bundle.SolrConnectionCheck_Port(), ex);

                 }

                 try {

                     kwsService.tryConnect(properties.getHost(), port);

                 } catch (KeywordSearchServiceException ex) {

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg(), ex);

                 }

             } else {

                 // for single-user cases need to verify connection to local SOLR service

                 try {

                     if (!server.isRunning()) {

                         throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));

                     }

                 } catch (KeywordSearchModuleException ex) {

                     //this means Solr is not properly initialized

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);

                 }

                 try {

                     // make an actual query to verify that server is responding

                     // we had cases where getStatus was OK, but the connection resulted in a 404

                     server.queryNumIndexedDocuments();

                 } catch (KeywordSearchModuleException | NoOpenCoreException ex) {

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);

                 }


                 // check if this job has any searchable keywords

                 List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();

                 boolean hasKeywordsForSearch = false;

                 for (KeywordList keywordList : keywordLists) {

                     if (settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {

                         hasKeywordsForSearch = true;

                         break;

                     }

                 }

                 if (!hasKeywordsForSearch) {

                     services.postMessage(IngestMessage.createWarningMessage(KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.noKwInLstMsg"),

                             NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));

                 }

             }

         }


         StringsConfig stringsConfig = new StringsConfig();

         Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();

         stringsConfig.setExtractUTF8(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF8.toString())));

         stringsConfig.setExtractUTF16(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF16.toString())));

         stringsConfig.setLanguageScripts(KeywordSearchSettings.getStringExtractScripts());


         stringsExtractionContext = Lookups.fixed(stringsConfig);


         indexer = new Indexer();

         initialized = true;

     }


     @Override

     public ProcessResult process(AbstractFile abstractFile) {

         if (initialized == false) //error initializing indexing/Solr

         {

             logger.log(Level.SEVERE, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());  //NON-NLS

             putIngestStatus(jobId, abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

             return ProcessResult.OK;

         }


         if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {

             //skip indexing of virtual dirs (no content, no real name) - will index children files

             return ProcessResult.OK;

         }


         if (KeywordSearchSettings.getSkipKnown() && abstractFile.getKnown().equals(FileKnown.KNOWN)) {

             //index meta-data only

             if (context.fileIngestIsCancelled()) {

                 return ProcessResult.OK;

             }

             indexer.indexFile(abstractFile, false);

             return ProcessResult.OK;

         }


         //index the file and content (if the content is supported)

         if (context.fileIngestIsCancelled()) {

             return ProcessResult.OK;

         }

         indexer.indexFile(abstractFile, true);


         // Start searching if it hasn't started already

         if (!startedSearching) {

             if (context.fileIngestIsCancelled()) {

                 return ProcessResult.OK;

             }

             List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();

             IngestSearchRunner.getInstance().startJob(context, keywordListNames);

             startedSearching = true;

         }


         return ProcessResult.OK;

     }


     @Override

     public void shutDown() {

         logger.log(Level.INFO, "Keyword search ingest module instance {0} shutting down", instanceNum); //NON-NLS


         if ((initialized == false) || (context == null)) {

             return;

         }


         if (context.fileIngestIsCancelled()) {

             logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); //NON-NLS

             IngestSearchRunner.getInstance().stopJob(jobId);

             cleanup();

             return;

         }


         // Remove from the search list and trigger final commit and final search

         IngestSearchRunner.getInstance().endJob(jobId);


         // We only need to post the summary msg from the last module per job

         if (refCounter.decrementAndGet(jobId) == 0) {

             try {

                 final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();

                 logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS

                 final int numIndexedChunks = KeywordSearch.getServer().queryNumIndexedChunks();

                 logger.log(Level.INFO, "Indexed file chunks count: {0}", numIndexedChunks); //NON-NLS

             } catch (NoOpenCoreException | KeywordSearchModuleException ex) {

                 logger.log(Level.SEVERE, "Error executing Solr queries to check number of indexed files and file chunks", ex); //NON-NLS

             }

             postIndexSummary();

             synchronized (ingestStatus) {

                 ingestStatus.remove(jobId);

             }

         }


         cleanup();

     }


     private void cleanup() {

         stringsExtractionContext = null;

         initialized = false;

     }


     private void postIndexSummary() {

         int text_ingested = 0;

         int metadata_ingested = 0;

         int strings_ingested = 0;

         int error_text = 0;

         int error_index = 0;

         int error_io = 0;


         synchronized (ingestStatus) {

             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(jobId);

             if (ingestStatusForJob == null) {

                 return;

             }

             for (IngestStatus s : ingestStatusForJob.values()) {

                 switch (s) {

                     case TEXT_INGESTED:

                         text_ingested++;

                         break;

                     case METADATA_INGESTED:

                         metadata_ingested++;

                         break;

                     case STRINGS_INGESTED:

                         strings_ingested++;

                         break;

                     case SKIPPED_ERROR_TEXTEXTRACT:

                         error_text++;

                         break;

                     case SKIPPED_ERROR_INDEXING:

                         error_index++;

                         break;

                     case SKIPPED_ERROR_IO:

                         error_io++;

                         break;

                     default:

                        ;

                 }

             }

         }


         StringBuilder msg = new StringBuilder();

         msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append("</td><td>").append(strings_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append("</td><td>").append(metadata_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append("</td><td>").append(error_index).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append("</td><td>").append(error_text).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append("</td><td>").append(error_io).append("</td></tr>"); //NON-NLS

         msg.append("</table>"); //NON-NLS

         String indexStats = msg.toString();

         logger.log(Level.INFO, "Keyword Indexing Completed: {0}", indexStats); //NON-NLS

         services.postMessage(IngestMessage.createMessage(MessageType.INFO, KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxResultsLbl"), indexStats));

         if (error_index > 0) {

             MessageNotifyUtil.Notify.error(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrsTitle"),

                     NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));

         } else if (error_io + error_text > 0) {

             MessageNotifyUtil.Notify.warn(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),

                     NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));

         }

     }


     private class Indexer {


         private final Logger logger = Logger.getLogger(Indexer.class.getName());


         private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {

             ImageConfig imageConfig = new ImageConfig();

             imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());

             ProcessTerminator terminator = () -> context.fileIngestIsCancelled();

             Lookup extractionContext = Lookups.fixed(imageConfig, terminator);


             try {

                 TextExtractor extractor = TextExtractorFactory.getExtractor(aFile, extractionContext);

                 Reader extractedTextReader = extractor.getReader();

                 //divide into chunks and index

                 return Ingester.getDefault().indexText(extractedTextReader, aFile.getId(), aFile.getName(), aFile, context);

             } catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {

                 //No text extractor found... run the default instead

                 return false;

             }

         }


         private boolean extractStringsAndIndex(AbstractFile aFile) {

             try {

                 if (context.fileIngestIsCancelled()) {

                     return true;

                 }

                 TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext);

                 Reader extractedTextReader = stringsExtractor.getReader();

                 if (Ingester.getDefault().indexText(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);

                     return true;

                 } else {

                     logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()});  //NON-NLS

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);

                     return false;

                 }

             } catch (IngesterException | TextExtractor.InitReaderException ex) {

                 logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);  //NON-NLS

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

                 return false;

             }

         }


         private void indexFile(AbstractFile aFile, boolean indexContent) {

             //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());


             TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();


             // unallocated and unused blocks can only have strings extracted from them.

             if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))) {

                 if (context.fileIngestIsCancelled()) {

                     return;

                 }

                 extractStringsAndIndex(aFile);

                 return;

             }


             final long size = aFile.getSize();

             //if not to index content, or a dir, or 0 content, index meta data only


             if ((indexContent == false || aFile.isDir() || size == 0)) {

                 try {

                     if (context.fileIngestIsCancelled()) {

                         return;

                     }

                     ingester.indexMetaDataOnly(aFile);

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);

                 } catch (IngesterException ex) {

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

                     logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS

                 }

                 return;

             }


             if (context.fileIngestIsCancelled()) {

                 return;

             }

             String fileType = fileTypeDetector.getMIMEType(aFile);


             // we skip archive formats that are opened by the archive module.

             // @@@ We could have a check here to see if the archive module was enabled though...

             if (ARCHIVE_MIME_TYPES.contains(fileType)) {

                 try {

                     if (context.fileIngestIsCancelled()) {

                         return;

                     }

                     ingester.indexMetaDataOnly(aFile);

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);

                 } catch (IngesterException ex) {

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

                     logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS

                 }

                 return;

             }


             boolean wasTextAdded = false;


             //extract text with one of the extractors, divide into chunks and index with Solr

             try {

                 //logger.log(Level.INFO, "indexing: " + aFile.getName());

                 if (context.fileIngestIsCancelled()) {

                     return;

                 }

                 if (fileType.equals("application/octet-stream")) {

                     extractStringsAndIndex(aFile);

                     return;

                 }

                 if (!extractTextAndIndex(aFile, fileType)) {

                     // Text extractor not found for file. Extract string only.

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);

                 } else {

                     putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);

                     wasTextAdded = true;

                 }


             } catch (IngesterException e) {

                 logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS

                         + aFile.getName(), e);

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

             } catch (Exception e) {

                 logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS

                         + aFile.getName(), e);

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);

             }


             if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {

                 //Carved Files should be the only type of unallocated files capable of a txt extension and

                 //should be ignored by the TextFileExtractor because they may contain more than one text encoding

                 try {

                     TextFileExtractor textFileExtractor = new TextFileExtractor();

                     Reader textReader = textFileExtractor.getReader(aFile);

                     if (textReader == null) {

                         logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());

                     } else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {

                         putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);

                         wasTextAdded = true;

                     }

                 } catch (IngesterException ex) {

                     logger.log(Level.WARNING, "Unable to index as unicode", ex);

                 } catch (TextFileExtractorException ex) {

                     logger.log(Level.INFO, "Could not extract text with TextFileExtractor", ex);

                 }

             }


             // if it wasn't supported or had an error, default to strings

             if (wasTextAdded == false) {

                 extractStringsAndIndex(aFile);

             }

         }

     }

 }

org.sleuthkit.autopsy.textextractors.TextExtractorFactory
Definition: TextExtractorFactory.java:37

org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService
Definition: KeywordSearchService.java:34

org.sleuthkit

org.sleuthkit.autopsy.textextractors.TextExtractor.InitReaderException
Definition: TextExtractor.java:68

org.sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedFiles
int queryNumIndexedFiles()
Definition: Server.java:1016

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.postIndexSummary
void postIndexSummary()
Definition: KeywordSearchIngestModule.java:400

org.sleuthkit.autopsy.casemodule.Case
Definition: Case.java:127

org.sleuthkit.autopsy.textextractors.TextExtractor.getReader
Reader getReader()

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.fileTypeDetector
FileTypeDetector fileTypeDetector
Definition: KeywordSearchIngestModule.java:143

org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter.decrementAndGet
synchronized long decrementAndGet(long jobId)
Definition: IngestModuleReferenceCounter.java:46

org.sleuthkit.autopsy.textextractors.TextExtractorFactory.NoTextExtractorFound
Definition: TextExtractorFactory.java:146

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.cleanup
void cleanup()
Definition: KeywordSearchIngestModule.java:392

org.sleuthkit.autopsy.ingest.IngestMessage.MessageType
Definition: IngestMessage.java:37

org.sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedChunks
int queryNumIndexedChunks()
Definition: Server.java:1041

org.sleuthkit.autopsy.keywordsearch.TextFileExtractor
Definition: TextFileExtractor.java:32

org.sleuthkit.autopsy.keywordsearch.Server.IndexingServerProperties
Definition: Server.java:951

org.sleuthkit.autopsy.textextractors.configs.ImageConfig.setOCREnabled
void setOCREnabled(boolean enabled)
Definition: ImageConfig.java:40

org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService.tryConnect
void tryConnect(String host, int port)

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.Indexer
Definition: KeywordSearchIngestModule.java:463

org.sleuthkit.autopsy.keywordsearch.Server.getMultiUserServerProperties
static IndexingServerProperties getMultiUserServerProperties(String caseDirectory)
Definition: Server.java:852

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.METADATA_INGESTED
METADATA_INGESTED
No content, so we just text_ingested metadata.
Definition: KeywordSearchIngestModule.java:161

org.sleuthkit.autopsy.casemodule.Case.getCaseDirectory
String getCaseDirectory()
Definition: Case.java:1374

org

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.Indexer.extractTextAndIndex
boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat)
Definition: KeywordSearchIngestModule.java:480

org.sleuthkit.autopsy.keywordsearch.NoOpenCoreException
Definition: NoOpenCoreException.java:27

org.sleuthkit.autopsy.casemodule
Definition: AddImageAction.java:19

org.sleuthkit.autopsy.textextractors.configs.StringsConfig.setExtractUTF8
void setExtractUTF8(boolean enabled)
Definition: StringsConfig.java:48

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.startUp
void startUp(IngestJobContext context)
Definition: KeywordSearchIngestModule.java:205

org.sleuthkit.autopsy.keywordsearch.KeywordSearch.getServer
static synchronized Server getServer()
Definition: KeywordSearch.java:59

org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector
Definition: FileTypeDetector.java:44

org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter.incrementAndGet
synchronized long incrementAndGet(long jobId)
Definition: IngestModuleReferenceCounter.java:39

org.sleuthkit.autopsy.ingest.IngestMessage.createMessage
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
Definition: IngestMessage.java:183

org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult
Definition: IngestModule.java:52

org.sleuthkit.autopsy.casemodule.Case.CaseType
Definition: Case.java:172

org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector.getMIMEType
String getMIMEType(AbstractFile file)
Definition: FileTypeDetector.java:177

org.sleuthkit.autopsy.coreutils
Definition: AutopsyExceptionHandler.java:19

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.settings
final KeywordSearchJobSettings settings
Definition: KeywordSearchIngestModule.java:149

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.SKIPPED_ERROR_INDEXING
SKIPPED_ERROR_INDEXING
File was skipped because index engine had problems.
Definition: KeywordSearchIngestModule.java:162

org.sleuthkit.autopsy.coreutils.Logger
Definition: Logger.java:36

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.Indexer.extractStringsAndIndex
boolean extractStringsAndIndex(AbstractFile aFile)
Definition: KeywordSearchIngestModule.java:505

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.jobId
long jobId
Definition: KeywordSearchIngestModule.java:151

org.sleuthkit.autopsy.textextractors.TextExtractorFactory.getStringsExtractor
static TextExtractor getStringsExtractor(Content content, Lookup context)
Definition: TextExtractorFactory.java:136

org.sleuthkit.autopsy.textextractors.TextExtractorFactory.getExtractor
static TextExtractor getExtractor(Content content, Lookup context)
Definition: TextExtractorFactory.java:57

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.context
IngestJobContext context
Definition: KeywordSearchIngestModule.java:155

org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
Definition: Ingester.java:401

org.sleuthkit.autopsy.keywordsearch.TextFileExtractor.TextFileExtractorException
Definition: TextFileExtractor.java:60

org.sleuthkit.autopsy.ingest
Definition: BlockingIngestTaskQueue.java:19

org.sleuthkit.autopsy.coreutils.ExecUtil
Definition: ExecUtil.java:37

org.sleuthkit.autopsy.casemodule.Case.getCaseType
CaseType getCaseType()
Definition: Case.java:1293

org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException
Definition: IngestModule.java:61

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule
Definition: KeywordSearchIngestModule.java:72

org.sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedDocuments
int queryNumIndexedDocuments()
Definition: Server.java:1066

org.sleuthkit.autopsy.ingest.IngestMessage
Definition: IngestMessage.java:32

org.sleuthkit.autopsy.textextractors.TextExtractor
Definition: TextExtractor.java:29

org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector.FileTypeDetectorInitException
Definition: FileTypeDetector.java:356

org.sleuthkit.autopsy.keywordsearchservice
Definition: KeywordSearchService.java:19

org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator
Definition: ExecUtil.java:50

org.sleuthkit.autopsy.ingest.IngestServices.postMessage
void postMessage(final IngestMessage message)
Definition: IngestServices.java:97

org.sleuthkit.autopsy.textextractors.configs.ImageConfig
Definition: ImageConfig.java:29

org.sleuthkit.autopsy.textextractors.configs.StringsConfig
Definition: StringsConfig.java:37

org.sleuthkit.autopsy.ingest.IngestMessage.MessageType.INFO
INFO
Definition: IngestMessage.java:39

org.sleuthkit.autopsy.keywordsearch.Server
Definition: Server.java:82

org.sleuthkit.autopsy.coreutils.MessageNotifyUtil
Definition: MessageNotifyUtil.java:47

org.sleuthkit.autopsy.textextractors.configs.StringsConfig.setExtractUTF16
void setExtractUTF16(boolean enabled)
Definition: StringsConfig.java:57

org.sleuthkit.autopsy.ingest.IngestJobContext
Definition: IngestJobContext.java:29

org.sleuthkit.autopsy.ingest.IngestJobContext.fileIngestIsCancelled
boolean fileIngestIsCancelled()
Definition: IngestJobContext.java:96

org.sleuthkit.autopsy.keywordsearch.KeywordSearch
Definition: KeywordSearch.java:37

org.sleuthkit.autopsy.modules

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.SKIPPED_ERROR_TEXTEXTRACT
SKIPPED_ERROR_TEXTEXTRACT
File was skipped because of text extraction issues.
Definition: KeywordSearchIngestModule.java:163

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.putIngestStatus
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
Definition: KeywordSearchIngestModule.java:176

org.sleuthkit.autopsy.keywordsearch.SolrSearchService
Definition: SolrSearchService.java:64

org.sleuthkit.autopsy.ingest.IngestServices
Definition: IngestServices.java:32

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.indexer
Indexer indexer
Definition: KeywordSearchIngestModule.java:142

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus
Definition: KeywordSearchIngestModule.java:157

org.sleuthkit.autopsy.keywordsearch.KeywordSearchModuleFactory
Definition: KeywordSearchModuleFactory.java:40

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.process
ProcessResult process(AbstractFile abstractFile)
Definition: KeywordSearchIngestModule.java:307

org.sleuthkit.autopsy.keywordsearch.Ingester
Definition: Ingester.java:55

org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter
Definition: IngestModuleReferenceCounter.java:29

org.sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify.error
static void error(String title, String message)
Definition: MessageNotifyUtil.java:227

org.sleuthkit.autopsy.ingest.FileIngestModule
Definition: FileIngestModule.java:27

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.Indexer.indexFile
void indexFile(AbstractFile aFile, boolean indexContent)
Definition: KeywordSearchIngestModule.java:534

org.sleuthkit.autopsy.coreutils.Logger.getLogger
synchronized static Logger getLogger(String name)
Definition: Logger.java:124

org.sleuthkit.autopsy.casemodule.Case.getCurrentCaseThrows
static Case getCurrentCaseThrows()
Definition: Case.java:638

org.sleuthkit.autopsy.ingest.IngestMessage.createWarningMessage
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
Definition: IngestMessage.java:236

org.sleuthkit.autopsy

org.sleuthkit.autopsy.casemodule.Case.CaseType.MULTI_USER_CASE
MULTI_USER_CASE
Definition: Case.java:175

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.TEXT_INGESTED
TEXT_INGESTED
Definition: KeywordSearchIngestModule.java:159

org.sleuthkit.autopsy.keywordsearch
Definition: AccountsText.java:19

org.sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify
Definition: MessageNotifyUtil.java:160

org.sleuthkit.autopsy.keywordsearch.KeywordSearchModuleException
Definition: KeywordSearchModuleException.java:24

org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException
Definition: KeywordSearchServiceException.java:24

org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult.OK
OK
Definition: IngestModule.java:54

org.sleuthkit.autopsy.casemodule.NoCurrentCaseException
Definition: NoCurrentCaseException.java:26

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.stringsExtractionContext
Lookup stringsExtractionContext
Definition: KeywordSearchIngestModule.java:148

org.sleuthkit.autopsy.textextractors.configs.StringsConfig.setLanguageScripts
void setLanguageScripts(List< SCRIPT > scripts)
Definition: StringsConfig.java:87

org.sleuthkit.autopsy.textextractors
Definition: ArtifactTextExtractor.java:19

org.sleuthkit.autopsy.textextractors.configs
Definition: ImageConfig.java:19

org.sleuthkit.autopsy.keywordsearch.KeywordList
Definition: KeywordList.java:30

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.shutDown
void shutDown()
Definition: KeywordSearchIngestModule.java:353

org.sleuthkit.autopsy.ingest.IngestJobContext.getJobId
long getJobId()
Definition: IngestJobContext.java:60

org.sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify.warn
static void warn(String title, String message)
Definition: MessageNotifyUtil.java:237

org.sleuthkit.autopsy.modules.filetypeid
Definition: AddFileTypeDialog.java:19

org.sleuthkit.autopsy.ingest.IngestServices.getInstance
static synchronized IngestServices getInstance()
Definition: IngestServices.java:51

org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.STRINGS_INGESTED
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.
Definition: KeywordSearchIngestModule.java:160