Autopsy  4.19.2
Graphical digital forensics platform for The Sleuth Kit and other tools.
IngestSearchRunner.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2014 - 2021 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.util.concurrent.ThreadFactoryBuilder;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31 import java.util.concurrent.CancellationException;
32 import java.util.concurrent.ConcurrentHashMap;
33 import java.util.concurrent.ExecutionException;
34 import java.util.concurrent.Future;
35 import java.util.concurrent.ScheduledThreadPoolExecutor;
36 import static java.util.concurrent.TimeUnit.MILLISECONDS;
37 import java.util.concurrent.atomic.AtomicLong;
38 import java.util.logging.Level;
39 import javax.annotation.concurrent.GuardedBy;
40 import javax.swing.SwingUtilities;
41 import javax.swing.SwingWorker;
42 import org.netbeans.api.progress.ProgressHandle;
43 import org.openide.util.Cancellable;
44 import org.openide.util.NbBundle;
45 import org.openide.util.NbBundle.Messages;
54 
62 final class IngestSearchRunner {
63 
64  private static final Logger logger = Logger.getLogger(IngestSearchRunner.class.getName());
65  private static IngestSearchRunner instance = null;
66  private final IngestServices services = IngestServices.getInstance();
67  private Ingester ingester = null;
68  private long currentUpdateIntervalMs;
69  private volatile boolean periodicSearchTaskRunning;
70  private volatile Future<?> periodicSearchTaskHandle;
71  private final ScheduledThreadPoolExecutor periodicSearchTaskExecutor;
72  private static final int NUM_SEARCH_SCHEDULING_THREADS = 1;
73  private static final String SEARCH_SCHEDULER_THREAD_NAME = "periodic-search-scheduling-%d";
74  private final Map<Long, SearchJobInfo> jobs = new ConcurrentHashMap<>(); // Ingest job ID to search job info
75  private final boolean usingNetBeansGUI = RuntimeProperties.runningWithGUI();
76 
77  /*
78  * Constructs a singleton object that performs periodic and final keyword
79  * searches for ingest jobs. Periodic searches are done in background tasks.
80  * This represents a careful working around of the contract for
81  * IngestModule.process(). Final searches are done synchronously in the
82  * calling thread, as required by the contract for IngestModule.shutDown().
83  */
84  private IngestSearchRunner() {
85  currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000;
86  ingester = Ingester.getDefault();
87  periodicSearchTaskExecutor = new ScheduledThreadPoolExecutor(NUM_SEARCH_SCHEDULING_THREADS, new ThreadFactoryBuilder().setNameFormat(SEARCH_SCHEDULER_THREAD_NAME).build());
88  }
89 
95  public static synchronized IngestSearchRunner getInstance() {
96  if (instance == null) {
97  instance = new IngestSearchRunner();
98  }
99  return instance;
100  }
101 
109  public synchronized void startJob(IngestJobContext jobContext, List<String> keywordListNames) {
110  long jobId = jobContext.getJobId();
111  if (jobs.containsKey(jobId) == false) {
112  SearchJobInfo jobData = new SearchJobInfo(jobContext, keywordListNames);
113  jobs.put(jobId, jobData);
114  }
115 
116  /*
117  * Keep track of the number of keyword search file ingest modules that
118  * are doing analysis for the ingest job, i.e., that have called this
119  * method. This is needed by endJob().
120  */
121  jobs.get(jobId).incrementModuleReferenceCount();
122 
123  /*
124  * Start a periodic search task in the
125  */
126  if ((jobs.size() > 0) && (periodicSearchTaskRunning == false)) {
127  currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000;
128  periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS);
129  periodicSearchTaskRunning = true;
130  }
131  }
132 
138  public synchronized void endJob(long jobId) {
139  /*
140  * Only complete the job if this is the last keyword search file ingest
141  * module doing annalysis for this job.
142  */
143  SearchJobInfo job;
144  job = jobs.get(jobId);
145  if (job == null) {
146  return; // RJCTODO: SEVERE
147  }
148  if (job.decrementModuleReferenceCount() != 0) {
149  jobs.remove(jobId);
150  }
151 
152  /*
153  * Commit the index and do the final search. The final search is done in
154  * the ingest thread that shutDown() on the keyword search file ingest
155  * module, per the contract of IngestModule.shutDwon().
156  */
157  logger.log(Level.INFO, "Commiting search index before final search for search job {0}", job.getJobId()); //NON-NLS
158  commit();
159  logger.log(Level.INFO, "Starting final search for search job {0}", job.getJobId()); //NON-NLS
160  doFinalSearch(job);
161  logger.log(Level.INFO, "Final search for search job {0} completed", job.getJobId()); //NON-NLS
162 
163  if (jobs.isEmpty()) {
164  cancelPeriodicSearchSchedulingTask();
165  }
166  }
167 
173  public synchronized void stopJob(long jobId) {
174  logger.log(Level.INFO, "Stopping search job {0}", jobId); //NON-NLS
175  commit();
176 
177  SearchJobInfo job;
178  job = jobs.get(jobId);
179  if (job == null) {
180  return;
181  }
182 
183  /*
184  * Request cancellation of the current keyword search, whether it is a
185  * preiodic search or a final search.
186  */
187  IngestSearchRunner.Searcher currentSearcher = job.getCurrentSearcher();
188  if ((currentSearcher != null) && (!currentSearcher.isDone())) {
189  logger.log(Level.INFO, "Cancelling search job {0}", jobId); //NON-NLS
190  currentSearcher.cancel(true);
191  }
192 
193  jobs.remove(jobId);
194 
195  if (jobs.isEmpty()) {
196  cancelPeriodicSearchSchedulingTask();
197  }
198  }
199 
207  public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) {
208  for (String listName : keywordListNames) {
209  logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); //NON-NLS
210  for (SearchJobInfo j : jobs.values()) {
211  j.addKeywordListName(listName);
212  }
213  }
214  }
215 
221  private void commit() {
222  ingester.commit();
223 
224  /*
225  * Publish an event advertising the number of indexed items. Note that
226  * this is no longer the number of indexed files, since the text of many
227  * items in addition to files is indexed.
228  */
229  try {
230  final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
231  KeywordSearch.fireNumIndexedFilesChange(null, numIndexedFiles);
232  } catch (NoOpenCoreException | KeywordSearchModuleException ex) {
233  logger.log(Level.SEVERE, "Error executing Solr query for number of indexed files", ex); //NON-NLS
234  }
235  }
236 
243  private void doFinalSearch(SearchJobInfo job) {
244  if (!job.getKeywordListNames().isEmpty()) {
245  try {
246  /*
247  * Wait for any periodic searches being done in a SwingWorker
248  * pool thread to finish.
249  */
250  job.waitForCurrentWorker();
251  IngestSearchRunner.Searcher finalSearcher = new IngestSearchRunner.Searcher(job, true);
252  job.setCurrentSearcher(finalSearcher);
253  /*
254  * Do the final search synchronously on the current ingest
255  * thread, per the contract specified
256  */
257  finalSearcher.doInBackground();
258  } catch (InterruptedException | CancellationException ex) {
259  logger.log(Level.INFO, "Final search for search job {0} interrupted or cancelled", job.getJobId()); //NON-NLS
260  } catch (Exception ex) {
261  logger.log(Level.SEVERE, String.format("Final search for search job %d failed", job.getJobId()), ex); //NON-NLS
262  }
263  }
264  }
265 
269  private synchronized void cancelPeriodicSearchSchedulingTask() {
270  if (periodicSearchTaskHandle != null) {
271  logger.log(Level.INFO, "No more search jobs, stopping periodic search scheduling"); //NON-NLS
272  periodicSearchTaskHandle.cancel(true);
273  periodicSearchTaskRunning = false;
274  }
275  }
276 
283  private final class PeriodicSearchTask implements Runnable {
284 
285  @Override
286  public void run() {
287  /*
288  * If there are no more jobs or this task has been cancelled, exit.
289  */
290  if (jobs.isEmpty() || periodicSearchTaskHandle.isCancelled()) {
291  logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS
292  periodicSearchTaskRunning = false;
293  return;
294  }
295 
296  /*
297  * Commit the Solr index for the current case before doing the
298  * searches.
299  */
300  commit();
301 
302  /*
303  * Do a keyword search for each ingest job in progress. When the
304  * searches are done, recalculate the "hold off" time between
305  * searches to prevent back-to-back periodic searches and schedule
306  * the nect periodic search task.
307  */
308  final StopWatch stopWatch = new StopWatch();
309  stopWatch.start();
310  for (Iterator<Entry<Long, SearchJobInfo>> iterator = jobs.entrySet().iterator(); iterator.hasNext();) {
311  SearchJobInfo job = iterator.next().getValue();
312 
313  if (periodicSearchTaskHandle.isCancelled()) {
314  logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS
315  periodicSearchTaskRunning = false;
316  return;
317  }
318 
319  if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) {
320  logger.log(Level.INFO, "Starting periodic search for search job {0}", job.getJobId());
321  Searcher searcher = new Searcher(job, false);
322  job.setCurrentSearcher(searcher);
323  searcher.execute();
324  job.setWorkerRunning(true);
325  try {
326  searcher.get();
327  } catch (InterruptedException | ExecutionException ex) {
328  logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS
329  services.postMessage(IngestMessage.createErrorMessage(
330  KeywordSearchModuleFactory.getModuleName(),
331  NbBundle.getMessage(this.getClass(), "SearchRunner.Searcher.done.err.msg"), ex.getMessage()));
332  } catch (java.util.concurrent.CancellationException ex) {
333  logger.log(Level.SEVERE, String.format("Keyword search for ingest job %d cancelled", job.getJobId()), ex); //NON-NLS
334  }
335  }
336  }
337  stopWatch.stop();
338  logger.log(Level.INFO, "Periodic searches for all ingest jobs cumulatively took {0} secs", stopWatch.getElapsedTimeSecs()); //NON-NLS
339  recalculateUpdateIntervalTime(stopWatch.getElapsedTimeSecs()); // ELDEBUG
340  periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS);
341  }
342 
351  private void recalculateUpdateIntervalTime(long lastSerchTimeSec) {
352  if (lastSerchTimeSec * 1000 < currentUpdateIntervalMs / 4) {
353  return;
354  }
355  currentUpdateIntervalMs *= 2;
356  logger.log(Level.WARNING, "Last periodic search took {0} sec. Increasing search interval to {1} sec", new Object[]{lastSerchTimeSec, currentUpdateIntervalMs / 1000});
357  }
358  }
359 
364  private class SearchJobInfo {
365 
367  private final long jobId;
368  private final long dataSourceId;
369  private volatile boolean workerRunning;
370  @GuardedBy("this")
371  private final List<String> keywordListNames;
372  @GuardedBy("this")
373  private final Map<Keyword, Set<Long>> currentResults; // Keyword to object IDs of items with hits
374  private IngestSearchRunner.Searcher currentSearcher;
375  private final AtomicLong moduleReferenceCount = new AtomicLong(0);
376  private final Object finalSearchLock = new Object();
377 
378  private SearchJobInfo(IngestJobContext jobContext, List<String> keywordListNames) {
379  this.jobContext = jobContext;
380  jobId = jobContext.getJobId();
381  dataSourceId = jobContext.getDataSource().getId();
382  this.keywordListNames = new ArrayList<>(keywordListNames);
383  currentResults = new HashMap<>();
384  workerRunning = false;
385  currentSearcher = null;
386  }
387 
389  return jobContext;
390  }
391 
392  private long getJobId() {
393  return jobId;
394  }
395 
396  private long getDataSourceId() {
397  return dataSourceId;
398  }
399 
400  private synchronized List<String> getKeywordListNames() {
401  return new ArrayList<>(keywordListNames);
402  }
403 
404  private synchronized void addKeywordListName(String keywordListName) {
405  if (!keywordListNames.contains(keywordListName)) {
406  keywordListNames.add(keywordListName);
407  }
408  }
409 
410  private synchronized Set<Long> currentKeywordResults(Keyword k) {
411  return currentResults.get(k);
412  }
413 
414  private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) {
415  currentResults.put(k, resultsIDs);
416  }
417 
418  private boolean isWorkerRunning() {
419  return workerRunning;
420  }
421 
422  private void setWorkerRunning(boolean flag) {
423  workerRunning = flag;
424  }
425 
426  private synchronized IngestSearchRunner.Searcher getCurrentSearcher() {
427  return currentSearcher;
428  }
429 
430  private synchronized void setCurrentSearcher(IngestSearchRunner.Searcher searchRunner) {
431  currentSearcher = searchRunner;
432  }
433 
435  moduleReferenceCount.incrementAndGet();
436  }
437 
439  return moduleReferenceCount.decrementAndGet();
440  }
441 
447  private void waitForCurrentWorker() throws InterruptedException {
448  synchronized (finalSearchLock) {
449  while (workerRunning) {
450  logger.log(Level.INFO, String.format("Waiting for previous search task for job %d to finish", jobId)); //NON-NLS
451  finalSearchLock.wait();
452  logger.log(Level.INFO, String.format("Notified previous search task for job %d to finish", jobId)); //NON-NLS
453  }
454  }
455  }
456 
460  private void searchNotify() {
461  synchronized (finalSearchLock) {
462  workerRunning = false;
463  finalSearchLock.notify();
464  }
465  }
466  }
467 
468  /*
469  * A SwingWorker responsible for searching the Solr index of the current
470  * case for the keywords for an ingest job. Keyword hit analysis results are
471  * created and posted to the blackboard and notifications are sent to the
472  * ingest inbox.
473  */
474  private final class Searcher extends SwingWorker<Object, Void> {
475 
476  /*
477  * Searcher has private copies/snapshots of the lists and keywords
478  */
479  private final SearchJobInfo job;
480  private final List<Keyword> keywords; //keywords to search
481  private final List<String> keywordListNames; // lists currently being searched
482  private final List<KeywordList> keywordLists;
483  private final Map<Keyword, KeywordList> keywordToList; //keyword to list name mapping
485  private ProgressHandle progressIndicator;
486  private boolean finalRun = false;
487 
488  Searcher(SearchJobInfo job, boolean finalRun) {
489  this.job = job;
490  this.finalRun = finalRun;
491  keywordListNames = job.getKeywordListNames();
492  keywords = new ArrayList<>();
493  keywordToList = new HashMap<>();
494  keywordLists = new ArrayList<>();
495  }
496 
497  @Override
498  @Messages("SearchRunner.query.exception.msg=Error performing query:")
499  protected Object doInBackground() throws Exception {
500  try {
501  if (usingNetBeansGUI) {
502  /*
503  * If running in the NetBeans thick client application
504  * version of Autopsy, NetBeans progress handles (i.e.,
505  * progress bars) are used to display search progress in the
506  * lower right hand corner of the main application window.
507  *
508  * A layer of abstraction to allow alternate representations
509  * of progress could be used here, as it is in other places
510  * in the application (see implementations and usage of
511  * org.sleuthkit.autopsy.progress.ProgressIndicator
512  * interface), to better decouple keyword search from the
513  * application's presentation layer.
514  */
515  SwingUtilities.invokeAndWait(() -> {
516  final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
517  + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
518  progressIndicator = ProgressHandle.createHandle(displayName, new Cancellable() {
519  @Override
520  public boolean cancel() {
521  if (progressIndicator != null) {
522  progressIndicator.setDisplayName(displayName + " " + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg"));
523  }
524  logger.log(Level.INFO, "Search cancelled by user"); //NON-NLS
525  new Thread(() -> {
526  IngestSearchRunner.Searcher.this.cancel(true);
527  }).start();
528  return true;
529  }
530  });
531  progressIndicator.start();
532  progressIndicator.switchToIndeterminate();
533  });
534  }
535 
536  updateKeywords();
537  for (Keyword keyword : keywords) {
538  if (isCancelled() || job.getJobContext().fileIngestIsCancelled()) {
539  logger.log(Level.INFO, "Cancellation requested, exiting before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS
540  return null;
541  }
542 
543  KeywordList keywordList = keywordToList.get(keyword);
544  if (usingNetBeansGUI) {
545  String searchTermStr = keyword.getSearchTerm();
546  if (searchTermStr.length() > 50) {
547  searchTermStr = searchTermStr.substring(0, 49) + "...";
548  }
549  final String progressMessage = keywordList.getName() + ": " + searchTermStr;
550  SwingUtilities.invokeLater(() -> {
551  progressIndicator.progress(progressMessage);
552  });
553  }
554 
555  // Filtering
556  //limit search to currently ingested data sources
557  //set up a filter with 1 or more image ids OR'ed
558  KeywordSearchQuery keywordSearchQuery = KeywordSearchUtil.getQueryForKeyword(keyword, keywordList);
559  KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId());
560  keywordSearchQuery.addFilter(dataSourceFilter);
561 
562  // Do the actual search
563  QueryResults queryResults;
564  try {
565  queryResults = keywordSearchQuery.performQuery();
567  logger.log(Level.SEVERE, "Error performing query: " + keyword.getSearchTerm(), ex); //NON-NLS
568  if (usingNetBeansGUI) {
569  final String userMessage = Bundle.SearchRunner_query_exception_msg() + keyword.getSearchTerm();
570  SwingUtilities.invokeLater(() -> {
571  MessageNotifyUtil.Notify.error(userMessage, ex.getCause().getMessage());
572  });
573  }
574  //no reason to continue with next query if recovery failed
575  //or wait for recovery to kick in and run again later
576  //likely case has closed and threads are being interrupted
577  return null;
578  } catch (CancellationException e) {
579  logger.log(Level.INFO, "Cancellation requested, exiting during keyword query: {0}", keyword.getSearchTerm()); //NON-NLS
580  return null;
581  }
582 
583  // Reduce the results of the query to only those hits we
584  // have not already seen.
585  QueryResults newResults = filterResults(queryResults);
586 
587  if (!newResults.getKeywords().isEmpty()) {
588  // Create blackboard artifacts
589  newResults.process(this, keywordList.getIngestMessages(), true, job.getJobId());
590  }
591  }
592  } catch (Exception ex) {
593  logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS
594  } finally {
595  if (progressIndicator != null) {
596  SwingUtilities.invokeLater(new Runnable() {
597  @Override
598  public void run() {
599  progressIndicator.finish();
600  progressIndicator = null;
601  }
602  });
603  }
604  // In case a thread is waiting on this worker to be done
605  job.searchNotify();
606  }
607 
608  return null;
609  }
610 
614  private void updateKeywords() {
615  XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent();
616 
617  keywords.clear();
618  keywordToList.clear();
619  keywordLists.clear();
620 
621  for (String name : keywordListNames) {
622  KeywordList list = loader.getList(name);
623  keywordLists.add(list);
624  for (Keyword k : list.getKeywords()) {
625  keywords.add(k);
626  keywordToList.put(k, list);
627  }
628  }
629  }
630 
646  private QueryResults filterResults(QueryResults queryResult) {
647 
648  // Create a new (empty) QueryResults object to hold the most recently
649  // found hits.
650  QueryResults newResults = new QueryResults(queryResult.getQuery());
651 
652  // For each keyword represented in the results.
653  for (Keyword keyword : queryResult.getKeywords()) {
654  // These are all of the hits across all objects for the most recent search.
655  // This may well include duplicates of hits we've seen in earlier periodic searches.
656  List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
657 
658  // Sort the hits for this keyword so that we are always
659  // guaranteed to return the hit for the lowest chunk.
660  Collections.sort(queryTermResults);
661 
662  // This will be used to build up the hits we haven't seen before
663  // for this keyword.
664  List<KeywordHit> newUniqueHits = new ArrayList<>();
665 
666  // Get the set of object ids seen in the past by this searcher
667  // for the given keyword.
668  Set<Long> curTermResults = job.currentKeywordResults(keyword);
669  if (curTermResults == null) {
670  // We create a new empty set if we haven't seen results for
671  // this keyword before.
672  curTermResults = new HashSet<>();
673  }
674 
675  // For each hit for this keyword.
676  for (KeywordHit hit : queryTermResults) {
677  if (curTermResults.contains(hit.getSolrObjectId())) {
678  // Skip the hit if we've already seen a hit for
679  // this keyword in the object.
680  continue;
681  }
682 
683  // We haven't seen the hit before so add it to list of new
684  // unique hits.
685  newUniqueHits.add(hit);
686 
687  // Add the object id to the results we've seen for this
688  // keyword.
689  curTermResults.add(hit.getSolrObjectId());
690  }
691 
692  // Update the job with the list of objects for which we have
693  // seen hits for the current keyword.
694  job.addKeywordResults(keyword, curTermResults);
695 
696  // Add the new hits for the current keyword into the results
697  // to be returned.
698  newResults.addResult(keyword, newUniqueHits);
699  }
700 
701  return newResults;
702  }
703  }
704 
705 }
static IngestMessage createErrorMessage(String source, String subject, String detailsHtml)
synchronized void setCurrentSearcher(IngestSearchRunner.Searcher searchRunner)
Logger getLogger(String moduleDisplayName)
synchronized void addKeywordResults(Keyword k, Set< Long > resultsIDs)
static void error(String title, String message)

Copyright © 2012-2021 Basis Technology. Generated on: Tue Feb 22 2022
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.