Autopsy  4.10.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
KeywordSearchIngestModule.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.collect.ImmutableList;
22 import java.io.Reader;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.concurrent.atomic.AtomicInteger;
27 import java.util.logging.Level;
28 import org.openide.util.Lookup;
29 import org.openide.util.NbBundle;
30 import org.openide.util.NbBundle.Messages;
31 import org.openide.util.lookup.Lookups;
52 import org.sleuthkit.datamodel.AbstractFile;
53 import org.sleuthkit.datamodel.TskData;
54 import org.sleuthkit.datamodel.TskData.FileKnown;
55 
64 @NbBundle.Messages({
65  "# {0} - Reason for not starting Solr", "KeywordSearchIngestModule.init.tryStopSolrMsg={0}<br />Please try stopping Java Solr processes if any exist and restart the application.",
66  "KeywordSearchIngestModule.init.badInitMsg=Keyword search server was not properly initialized, cannot run keyword search ingest.",
67  "SolrConnectionCheck.Port=Invalid port number.",
68  "# {0} - Reason for not connecting to Solr", "KeywordSearchIngestModule.init.exception.errConnToSolr.msg=Error connecting to SOLR server: {0}.",
69  "KeywordSearchIngestModule.startUp.noOpenCore.msg=The index could not be opened or does not exist.",
70  "CannotRunFileTypeDetection=Unable to run file type detection."
71 })
72 public final class KeywordSearchIngestModule implements FileIngestModule {
73 
78  private static final List<String> ARCHIVE_MIME_TYPES
79  = ImmutableList.of(
80  //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
81  "application/x-7z-compressed", //NON-NLS
82  "application/x-ace-compressed", //NON-NLS
83  "application/x-alz-compressed", //NON-NLS
84  "application/x-arj", //NON-NLS
85  "application/vnd.ms-cab-compressed", //NON-NLS
86  "application/x-cfs-compressed", //NON-NLS
87  "application/x-dgc-compressed", //NON-NLS
88  "application/x-apple-diskimage", //NON-NLS
89  "application/x-gca-compressed", //NON-NLS
90  "application/x-dar", //NON-NLS
91  "application/x-lzx", //NON-NLS
92  "application/x-lzh", //NON-NLS
93  "application/x-rar-compressed", //NON-NLS
94  "application/x-stuffit", //NON-NLS
95  "application/x-stuffitx", //NON-NLS
96  "application/x-gtar", //NON-NLS
97  "application/x-archive", //NON-NLS
98  "application/x-executable", //NON-NLS
99  "application/x-gzip", //NON-NLS
100  "application/zip", //NON-NLS
101  "application/x-zoo", //NON-NLS
102  "application/x-cpio", //NON-NLS
103  "application/x-shar", //NON-NLS
104  "application/x-tar", //NON-NLS
105  "application/x-bzip", //NON-NLS
106  "application/x-bzip2", //NON-NLS
107  "application/x-lzip", //NON-NLS
108  "application/x-lzma", //NON-NLS
109  "application/x-lzop", //NON-NLS
110  "application/x-z", //NON-NLS
111  "application/x-compress"); //NON-NLS
112 
116  enum StringsExtractOptions {
117  EXTRACT_UTF16,
118  EXTRACT_UTF8,
119  };
120 
121  enum UpdateFrequency {
122 
123  FAST(20),
124  AVG(10),
125  SLOW(5),
126  SLOWEST(1),
127  NONE(Integer.MAX_VALUE),
128  DEFAULT(5);
129  private final int time;
130 
131  UpdateFrequency(int time) {
132  this.time = time;
133  }
134 
135  int getTime() {
136  return time;
137  }
138  };
139  private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());
140  private final IngestServices services = IngestServices.getInstance();
141  private Ingester ingester = null;
142  private Indexer indexer;
144 //only search images from current ingest, not images previously ingested/indexed
145  //accessed read-only by searcher thread
146 
147  private boolean startedSearching = false;
148  private Lookup stringsExtractionContext;
149  private final KeywordSearchJobSettings settings;
150  private boolean initialized = false;
151  private long jobId;
152  private static final AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging
153  private int instanceNum = 0;
154  private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
156 
157  private enum IngestStatus {
158 
164  SKIPPED_ERROR_IO
165  };
166  private static final Map<Long, Map<Long, IngestStatus>> ingestStatus = new HashMap<>(); //guarded by itself
167 
176  private static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status) {
177  synchronized (ingestStatus) {
178  Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(ingestJobId);
179  if (ingestStatusForJob == null) {
180  ingestStatusForJob = new HashMap<>();
181  ingestStatus.put(ingestJobId, ingestStatusForJob);
182  }
183  ingestStatusForJob.put(fileId, status);
184  ingestStatus.put(ingestJobId, ingestStatusForJob);
185  }
186  }
187 
188  KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
189  this.settings = settings;
190  instanceNum = instanceCount.getAndIncrement();
191  }
192 
198  @Messages({
199  "KeywordSearchIngestModule.startupMessage.failedToGetIndexSchema=Failed to get schema version for text index.",
200  "# {0} - Solr version number", "KeywordSearchIngestModule.startupException.indexSolrVersionNotSupported=Adding text no longer supported for Solr version {0} of the text index.",
201  "# {0} - schema version number", "KeywordSearchIngestModule.startupException.indexSchemaNotSupported=Adding text no longer supported for schema version {0} of the text index.",
202  "KeywordSearchIngestModule.noOpenCase.errMsg=No open case available."
203  })
204  @Override
205  public void startUp(IngestJobContext context) throws IngestModuleException {
206  initialized = false;
207  jobId = context.getJobId();
208 
209  Server server = KeywordSearch.getServer();
210  if (server.coreIsOpen() == false) {
211  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg());
212  }
213 
214  try {
215  Index indexInfo = server.getIndexInfo();
216  if (!IndexFinder.getCurrentSolrVersion().equals(indexInfo.getSolrVersion())) {
217  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSolrVersionNotSupported(indexInfo.getSolrVersion()));
218  }
219  if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {
220  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));
221  }
222  } catch (NoOpenCoreException ex) {
223  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);
224  }
225 
226  try {
227  fileTypeDetector = new FileTypeDetector();
229  throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);
230  }
231 
232  ingester = Ingester.getDefault();
233  this.context = context;
234 
235  // increment the module reference count
236  // if first instance of this module for this job then check the server and existence of keywords
237  Case openCase;
238  try {
239  openCase = Case.getCurrentCaseThrows();
240  } catch (NoCurrentCaseException ex) {
241  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_noOpenCase_errMsg(), ex);
242  }
243  if (refCounter.incrementAndGet(jobId) == 1) {
244  if (openCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) {
245  // for multi-user cases need to verify connection to remore SOLR server
246  KeywordSearchService kwsService = new SolrSearchService();
248  int port;
249  try {
250  port = Integer.parseInt(properties.getPort());
251  } catch (NumberFormatException ex) {
252  // if there is an error parsing the port number
253  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg() + " " + Bundle.SolrConnectionCheck_Port(), ex);
254  }
255  try {
256  kwsService.tryConnect(properties.getHost(), port);
257  } catch (KeywordSearchServiceException ex) {
258  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg(), ex);
259  }
260  } else {
261  // for single-user cases need to verify connection to local SOLR service
262  try {
263  if (!server.isRunning()) {
264  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));
265  }
266  } catch (KeywordSearchModuleException ex) {
267  //this means Solr is not properly initialized
268  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);
269  }
270  try {
271  // make an actual query to verify that server is responding
272  // we had cases where getStatus was OK, but the connection resulted in a 404
273  server.queryNumIndexedDocuments();
275  throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);
276  }
277 
278  // check if this job has any searchable keywords
279  List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();
280  boolean hasKeywordsForSearch = false;
281  for (KeywordList keywordList : keywordLists) {
282  if (settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {
283  hasKeywordsForSearch = true;
284  break;
285  }
286  }
287  if (!hasKeywordsForSearch) {
288  services.postMessage(IngestMessage.createWarningMessage(KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.noKwInLstMsg"),
289  NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
290  }
291  }
292  }
293 
294  StringsConfig stringsConfig = new StringsConfig();
295  Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();
296  stringsConfig.setExtractUTF8(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF8.toString())));
297  stringsConfig.setExtractUTF16(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF16.toString())));
298  stringsConfig.setLanguageScripts(KeywordSearchSettings.getStringExtractScripts());
299 
300  stringsExtractionContext = Lookups.fixed(stringsConfig);
301 
302  indexer = new Indexer();
303  initialized = true;
304  }
305 
306  @Override
307  public ProcessResult process(AbstractFile abstractFile) {
308  if (initialized == false) //error initializing indexing/Solr
309  {
310  logger.log(Level.SEVERE, "Skipping processing, module not initialized, file: {0}", abstractFile.getName()); //NON-NLS
311  putIngestStatus(jobId, abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
312  return ProcessResult.OK;
313  }
314 
315  if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
316  //skip indexing of virtual dirs (no content, no real name) - will index children files
317  return ProcessResult.OK;
318  }
319 
320  if (KeywordSearchSettings.getSkipKnown() && abstractFile.getKnown().equals(FileKnown.KNOWN)) {
321  //index meta-data only
322  if (context.fileIngestIsCancelled()) {
323  return ProcessResult.OK;
324  }
325  indexer.indexFile(abstractFile, false);
326  return ProcessResult.OK;
327  }
328 
329  //index the file and content (if the content is supported)
330  if (context.fileIngestIsCancelled()) {
331  return ProcessResult.OK;
332  }
333  indexer.indexFile(abstractFile, true);
334 
335  // Start searching if it hasn't started already
336  if (!startedSearching) {
337  if (context.fileIngestIsCancelled()) {
338  return ProcessResult.OK;
339  }
340  List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
341  IngestSearchRunner.getInstance().startJob(context, keywordListNames);
342  startedSearching = true;
343  }
344 
345  return ProcessResult.OK;
346  }
347 
352  @Override
353  public void shutDown() {
354  logger.log(Level.INFO, "Keyword search ingest module instance {0} shutting down", instanceNum); //NON-NLS
355 
356  if ((initialized == false) || (context == null)) {
357  return;
358  }
359 
360  if (context.fileIngestIsCancelled()) {
361  logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); //NON-NLS
362  IngestSearchRunner.getInstance().stopJob(jobId);
363  cleanup();
364  return;
365  }
366 
367  // Remove from the search list and trigger final commit and final search
368  IngestSearchRunner.getInstance().endJob(jobId);
369 
370  // We only need to post the summary msg from the last module per job
371  if (refCounter.decrementAndGet(jobId) == 0) {
372  try {
373  final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
374  logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS
375  final int numIndexedChunks = KeywordSearch.getServer().queryNumIndexedChunks();
376  logger.log(Level.INFO, "Indexed file chunks count: {0}", numIndexedChunks); //NON-NLS
378  logger.log(Level.SEVERE, "Error executing Solr queries to check number of indexed files and file chunks", ex); //NON-NLS
379  }
380  postIndexSummary();
381  synchronized (ingestStatus) {
382  ingestStatus.remove(jobId);
383  }
384  }
385 
386  cleanup();
387  }
388 
392  private void cleanup() {
393  stringsExtractionContext = null;
394  initialized = false;
395  }
396 
400  private void postIndexSummary() {
401  int text_ingested = 0;
402  int metadata_ingested = 0;
403  int strings_ingested = 0;
404  int error_text = 0;
405  int error_index = 0;
406  int error_io = 0;
407 
408  synchronized (ingestStatus) {
409  Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(jobId);
410  if (ingestStatusForJob == null) {
411  return;
412  }
413  for (IngestStatus s : ingestStatusForJob.values()) {
414  switch (s) {
415  case TEXT_INGESTED:
416  text_ingested++;
417  break;
418  case METADATA_INGESTED:
419  metadata_ingested++;
420  break;
421  case STRINGS_INGESTED:
422  strings_ingested++;
423  break;
424  case SKIPPED_ERROR_TEXTEXTRACT:
425  error_text++;
426  break;
427  case SKIPPED_ERROR_INDEXING:
428  error_index++;
429  break;
430  case SKIPPED_ERROR_IO:
431  error_io++;
432  break;
433  default:
434  ;
435  }
436  }
437  }
438 
439  StringBuilder msg = new StringBuilder();
440  msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>"); //NON-NLS
441  msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append("</td><td>").append(strings_ingested).append("</td></tr>"); //NON-NLS
442  msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append("</td><td>").append(metadata_ingested).append("</td></tr>"); //NON-NLS
443  msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append("</td><td>").append(error_index).append("</td></tr>"); //NON-NLS
444  msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append("</td><td>").append(error_text).append("</td></tr>"); //NON-NLS
445  msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append("</td><td>").append(error_io).append("</td></tr>"); //NON-NLS
446  msg.append("</table>"); //NON-NLS
447  String indexStats = msg.toString();
448  logger.log(Level.INFO, "Keyword Indexing Completed: {0}", indexStats); //NON-NLS
449  services.postMessage(IngestMessage.createMessage(MessageType.INFO, KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxResultsLbl"), indexStats));
450  if (error_index > 0) {
451  MessageNotifyUtil.Notify.error(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrsTitle"),
452  NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));
453  } else if (error_io + error_text > 0) {
454  MessageNotifyUtil.Notify.warn(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),
455  NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));
456  }
457  }
458 
463  private class Indexer {
464 
465  private final Logger logger = Logger.getLogger(Indexer.class.getName());
466 
480  private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
481  ImageConfig imageConfig = new ImageConfig();
482  imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
483  ProcessTerminator terminator = () -> context.fileIngestIsCancelled();
484  Lookup extractionContext = Lookups.fixed(imageConfig, terminator);
485 
486  try {
487  TextExtractor extractor = TextExtractorFactory.getExtractor(aFile, extractionContext);
488  Reader extractedTextReader = extractor.getReader();
489  //divide into chunks and index
490  return Ingester.getDefault().indexText(extractedTextReader, aFile.getId(), aFile.getName(), aFile, context);
492  //No text extractor found... run the default instead
493  return false;
494  }
495  }
496 
505  private boolean extractStringsAndIndex(AbstractFile aFile) {
506  try {
507  if (context.fileIngestIsCancelled()) {
508  return true;
509  }
510  TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext);
511  Reader extractedTextReader = stringsExtractor.getReader();
512  if (Ingester.getDefault().indexText(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
513  putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
514  return true;
515  } else {
516  logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); //NON-NLS
517  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
518  return false;
519  }
520  } catch (IngesterException | TextExtractor.InitReaderException ex) {
521  logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS
522  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
523  return false;
524  }
525  }
526 
534  private void indexFile(AbstractFile aFile, boolean indexContent) {
535  //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());
536 
537  TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
538 
539  // unallocated and unused blocks can only have strings extracted from them.
540  if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))) {
541  if (context.fileIngestIsCancelled()) {
542  return;
543  }
544  extractStringsAndIndex(aFile);
545  return;
546  }
547 
548  final long size = aFile.getSize();
549  //if not to index content, or a dir, or 0 content, index meta data only
550 
551  if ((indexContent == false || aFile.isDir() || size == 0)) {
552  try {
553  if (context.fileIngestIsCancelled()) {
554  return;
555  }
556  ingester.indexMetaDataOnly(aFile);
557  putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
558  } catch (IngesterException ex) {
559  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
560  logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
561  }
562  return;
563  }
564 
565  if (context.fileIngestIsCancelled()) {
566  return;
567  }
568  String fileType = fileTypeDetector.getMIMEType(aFile);
569 
570  // we skip archive formats that are opened by the archive module.
571  // @@@ We could have a check here to see if the archive module was enabled though...
572  if (ARCHIVE_MIME_TYPES.contains(fileType)) {
573  try {
574  if (context.fileIngestIsCancelled()) {
575  return;
576  }
577  ingester.indexMetaDataOnly(aFile);
578  putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
579  } catch (IngesterException ex) {
580  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
581  logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
582  }
583  return;
584  }
585 
586  boolean wasTextAdded = false;
587 
588  //extract text with one of the extractors, divide into chunks and index with Solr
589  try {
590  //logger.log(Level.INFO, "indexing: " + aFile.getName());
591  if (context.fileIngestIsCancelled()) {
592  return;
593  }
594  if (fileType.equals("application/octet-stream")) {
595  extractStringsAndIndex(aFile);
596  return;
597  }
598  if (!extractTextAndIndex(aFile, fileType)) {
599  // Text extractor not found for file. Extract string only.
600  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
601  } else {
602  putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
603  wasTextAdded = true;
604  }
605 
606  } catch (IngesterException e) {
607  logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS
608  + aFile.getName(), e);
609  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
610  } catch (Exception e) {
611  logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS
612  + aFile.getName(), e);
613  putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
614  }
615 
616  if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
617  //Carved Files should be the only type of unallocated files capable of a txt extension and
618  //should be ignored by the TextFileExtractor because they may contain more than one text encoding
619  try {
620  TextFileExtractor textFileExtractor = new TextFileExtractor();
621  Reader textReader = textFileExtractor.getReader(aFile);
622  if (textReader == null) {
623  logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
624  } else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
625  putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
626  wasTextAdded = true;
627  }
628  } catch (IngesterException ex) {
629  logger.log(Level.WARNING, "Unable to index as unicode", ex);
630  } catch (TextFileExtractorException ex) {
631  logger.log(Level.INFO, "Could not extract text with TextFileExtractor", ex);
632  }
633  }
634 
635  // if it wasn't supported or had an error, default to strings
636  if (wasTextAdded == false) {
637  extractStringsAndIndex(aFile);
638  }
639  }
640  }
641 }
static IndexingServerProperties getMultiUserServerProperties(String caseDirectory)
Definition: Server.java:852
boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat)
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
static TextExtractor getStringsExtractor(Content content, Lookup context)
static TextExtractor getExtractor(Content content, Lookup context)
void postMessage(final IngestMessage message)
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
static void error(String title, String message)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
static void warn(String title, String message)
static synchronized IngestServices getInstance()
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.

Copyright © 2012-2018 Basis Technology. Generated on: Fri Mar 22 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.