19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.util.ArrayList;
 
   22 import java.util.HashMap;
 
   23 import java.util.List;
 
   25 import java.util.concurrent.atomic.AtomicInteger;
 
   26 import java.util.logging.Level;
 
   27 import org.openide.util.NbBundle;
 
   54     enum UpdateFrequency {
 
   60         NONE(Integer.MAX_VALUE),
 
   62         private final int time;
 
   64         UpdateFrequency(
int time) {
 
   83     private final KeywordSearchJobSettings 
settings;
 
   87     private static final AtomicInteger 
instanceCount = 
new AtomicInteger(0); 
 
  101     private static final Map<Long, Map<Long, IngestStatus>> 
ingestStatus = 
new HashMap<>(); 
 
  105             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(ingestJobId);                       
 
  106             if (ingestStatusForJob == null) {
 
  107                 ingestStatusForJob = 
new HashMap<>();
 
  108                 ingestStatus.put(ingestJobId, ingestStatusForJob);
 
  111             ingestStatusForJob.put(fileId, status);
 
  112             ingestStatus.put(ingestJobId, ingestStatusForJob);
 
  118         instanceNum = instanceCount.getAndIncrement();
 
  128         logger.log(Level.INFO, 
"Initializing instance {0}", instanceNum); 
 
  136             throw new IngestModuleException(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.startUp.fileTypeDetectorInitializationException.msg"));
 
  146                 if (!server.isRunning()) {
 
  147                     String msg = NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.badInitMsg");
 
  148                     logger.log(Level.SEVERE, msg);
 
  149                     String details = NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.tryStopSolrMsg", msg);
 
  154                 logger.log(Level.WARNING, 
"Error checking if Solr server is running while initializing ingest", ex); 
 
  156                 String msg = NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.badInitMsg");
 
  157                 String details = NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.tryStopSolrMsg", msg);
 
  167                         NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.init.exception.errConnToSolr.msg",
 
  172             List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();
 
  173             boolean hasKeywordsForSearch = 
false;
 
  175                 if (settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {
 
  176                     hasKeywordsForSearch = 
true;
 
  180             if (!hasKeywordsForSearch) {
 
  182                         NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
 
  187         stringExtractor = 
new StringsTextExtractor(
this);
 
  188         stringExtractor.setScripts(KeywordSearchSettings.getStringExtractScripts());
 
  189         stringExtractor.setOptions(KeywordSearchSettings.getStringExtractOptions());
 
  192         final StringBuilder sbScripts = 
new StringBuilder();
 
  193         for (
SCRIPT s : KeywordSearchSettings.getStringExtractScripts()) {
 
  194             sbScripts.append(s.name()).append(
" ");
 
  196         logger.log(Level.INFO, 
"Using string extract scripts: {0}", sbScripts.toString()); 
 
  198         textExtractors = 
new ArrayList<>();
 
  200         textExtractors.add(
new HtmlTextExtractor(
this));
 
  201         textExtractors.add(
new TikaTextExtractor(
this));
 
  209         if (initialized == 
false) 
 
  211             logger.log(Level.WARNING, 
"Skipping processing, module not initialized, file: {0}", abstractFile.
getName());  
 
  231         if (!startedSearching) {
 
  232             List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
 
  234             startedSearching = 
true;
 
  246         logger.log(Level.INFO, 
"Instance {0}", instanceNum); 
 
  248         if (initialized == 
false) {
 
  264                 ingestStatus.remove(jobId);
 
  273             logger.log(Level.INFO, 
"Indexed files count: {0}", numIndexedFiles); 
 
  274             logger.log(Level.INFO, 
"Indexed file chunks count: {0}", numIndexedChunks); 
 
  276             logger.log(Level.WARNING, 
"Error executing Solr query to check number of indexed files/chunks: ", ex); 
 
  286         logger.log(Level.INFO, 
"stop()"); 
 
  297         textExtractors.clear();
 
  298         textExtractors = null;
 
  299         stringExtractor = null;
 
  308         int text_ingested = 0;
 
  309         int metadata_ingested = 0;
 
  310         int strings_ingested = 0;
 
  316             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(jobId);
 
  322                     case METADATA_INGESTED:
 
  325                     case STRINGS_INGESTED:
 
  328                     case SKIPPED_ERROR_TEXTEXTRACT:
 
  331                     case SKIPPED_ERROR_INDEXING:
 
  334                     case SKIPPED_ERROR_IO:
 
  343         StringBuilder msg = 
new StringBuilder();
 
  344         msg.append(
"<table border=0><tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append(
"</td><td>").append(text_ingested).append(
"</td></tr>"); 
 
  345         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append(
"</td><td>").append(strings_ingested).append(
"</td></tr>"); 
 
  346         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append(
"</td><td>").append(metadata_ingested).append(
"</td></tr>"); 
 
  347         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append(
"</td><td>").append(error_index).append(
"</td></tr>"); 
 
  348         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append(
"</td><td>").append(error_text).append(
"</td></tr>"); 
 
  349         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append(
"</td><td>").append(error_io).append(
"</td></tr>"); 
 
  350         msg.append(
"</table>"); 
 
  351         String indexStats = msg.toString();
 
  352         logger.log(Level.INFO, 
"Keyword Indexing Completed: {0}", indexStats); 
 
  354         if (error_index > 0) {
 
  356                     NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));
 
  357         } 
else if (error_io + error_text > 0) {
 
  358             MessageNotifyUtil.
Notify.
warn(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),
 
  359                     NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));
 
  383             TextExtractor fileExtract = null;
 
  386             for (TextExtractor fe : textExtractors) {
 
  387                 if (fe.isSupported(aFile, detectedFormat)) {
 
  393             if (fileExtract == null) {
 
  394                 logger.log(Level.INFO, 
"No text extractor found for file id:{0}, name: {1}, detected format: {2}", 
new Object[]{aFile.getId(), aFile.getName(), detectedFormat}); 
 
  401             return fileExtract.index(aFile);
 
  413                 if (stringExtractor.index(aFile)) {
 
  417                     logger.log(Level.WARNING, 
"Failed to extract strings and ingest, file ''{0}'' (id: {1}).", 
new Object[]{aFile.getName(), aFile.getId()});  
 
  421             } 
catch (IngesterException ex) {
 
  422                 logger.log(Level.WARNING, 
"Failed to extract strings and ingest, file '" + aFile.
getName() + 
"' (id: " + aFile.
getId() + 
").", ex);  
 
  438             for (TextExtractor extractor : textExtractors) {
 
  439                 if (extractor.isContentTypeSpecific() == 
true 
  440                         && extractor.isSupported(aFile, detectedFormat)) {
 
  465             final long size = aFile.
getSize();
 
  467             if ((indexContent == 
false || aFile.
isDir() || size == 0)) {
 
  469                     ingester.ingest(aFile, 
false); 
 
  471                 } 
catch (IngesterException ex) {
 
  473                     logger.log(Level.WARNING, 
"Unable to index meta-data for file: " + aFile.
getId(), ex); 
 
  478             String detectedFormat;
 
  480                 detectedFormat = fileTypeDetector.
getFileType(aFile);
 
  482                 logger.log(Level.SEVERE, String.format(
"Could not detect format using fileTypeDetector for file: %s", aFile), ex); 
 
  488             if (TextExtractor.ARCHIVE_MIME_TYPES.contains(detectedFormat)) {
 
  490                     ingester.ingest(aFile, 
false); 
 
  492                 } 
catch (IngesterException ex) {
 
  494                     logger.log(Level.WARNING, 
"Unable to index meta-data for file: " + aFile.
getId(), ex); 
 
  499             boolean wasTextAdded = 
false;
 
  505                         logger.log(Level.WARNING, 
"Failed to extract text and ingest, file ''{0}'' (id: {1}).", 
new Object[]{aFile.getName(), aFile.getId()}); 
 
  512                 } 
catch (IngesterException e) {
 
  513                     logger.log(Level.INFO, 
"Could not extract text with Tika, " + aFile.
getId() + 
", "  
  516                 } 
catch (Exception e) {
 
  517                     logger.log(Level.WARNING, 
"Error extracting text with Tika, " + aFile.
getId() + 
", "  
  524             if (wasTextAdded == 
false) {
 
int queryNumIndexedFiles()
 
boolean isTextExtractSupported(AbstractFile aFile, String detectedFormat)
 
FileTypeDetector fileTypeDetector
 
synchronized long decrementAndGet(long jobId)
 
int queryNumIndexedChunks()
 
List< TextExtractor > textExtractors
 
static IngestMessage createErrorMessage(String source, String subject, String detailsHtml)
 
TskData.TSK_DB_FILES_TYPE_ENUM getType()
 
METADATA_INGESTED
No content, so we just text_ingested metadata. 
 
boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat)
 
StringsTextExtractor stringExtractor
 
void startUp(IngestJobContext context)
 
synchronized void startJob(long jobId, long dataSourceId, List< String > keywordListNames)
 
static synchronized Server getServer()
 
synchronized long incrementAndGet(long jobId)
 
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
 
static final AtomicInteger instanceCount
 
final KeywordSearchJobSettings settings
 
SKIPPED_ERROR_INDEXING
File was skipped because index engine had problems. 
 
boolean extractStringsAndIndex(AbstractFile aFile)
 
static final IngestModuleReferenceCounter refCounter
 
TskData.FileKnown getKnown()
 
static synchronized SearchRunner getInstance()
 
int queryNumIndexedDocuments()
 
void postMessage(final IngestMessage message)
 
boolean fileIngestIsCancelled()
 
SKIPPED_ERROR_TEXTEXTRACT
File was skipped because of text extraction issues. 
 
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
 
ProcessResult process(AbstractFile abstractFile)
 
static void error(String title, String message)
 
void indexFile(AbstractFile aFile, boolean indexContent)
 
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
 
String getFileType(AbstractFile file)
 
static Ingester getIngester()
 
static void warn(String title, String message)
 
static Logger getLogger(String name)
 
static final Map< Long, Map< Long, IngestStatus > > ingestStatus
 
final IngestServices services
 
static final Logger logger
 
static synchronized IngestServices getInstance()
 
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.