Autopsy  4.21.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
ExtractedTextViewer.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2023 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.awt.Component;
22 import java.awt.Cursor;
23 import java.awt.event.ActionEvent;
24 import java.awt.event.ActionListener;
25 import java.beans.PropertyChangeEvent;
26 import java.util.ArrayList;
27 import java.util.Collection;
28 import java.util.EnumSet;
29 import java.util.LinkedHashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.logging.Level;
33 import org.apache.tika.mime.MimeTypes;
34 import org.openide.nodes.Node;
35 import org.openide.util.Lookup;
36 import org.openide.util.NbBundle;
37 import org.openide.util.lookup.ServiceProvider;
46 import org.sleuthkit.datamodel.AbstractFile;
47 import org.sleuthkit.datamodel.Account;
48 import org.sleuthkit.datamodel.BlackboardArtifact;
49 import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT;
50 import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT;
51 import org.sleuthkit.datamodel.BlackboardAttribute;
52 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT;
53 import org.sleuthkit.datamodel.Content;
54 import org.sleuthkit.datamodel.Report;
55 import org.sleuthkit.datamodel.TskCoreException;
56 import org.sleuthkit.datamodel.TskData;
57 
62 @ServiceProvider(service = TextViewer.class, position = 2)
63 public class ExtractedTextViewer implements TextViewer {
64 
65  private static final Logger logger = Logger.getLogger(ExtractedTextViewer.class.getName());
66 
67  private static final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT_TYPE = new BlackboardAttribute.Type(TSK_ASSOCIATED_ARTIFACT);
68  private static final BlackboardAttribute.Type TSK_ACCOUNT_TYPE = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE);
69 
70  private ExtractedContentPanel panel;
71  private volatile Node currentNode = null;
72  private ExtractedText currentSource = null;
73  private FileTypeDetector fileTypeDetector = null;
74 
75  // cache of last 10 solrHasFullyIndexedContent() requests sent to Solr.
76  private SolrIsFullyIndexedCache solrCache = null;
77 
85  try {
86  fileTypeDetector = new FileTypeDetector();
88  logger.log(Level.SEVERE, "Failed to initialize FileTypeDetector", ex); //NON-NLS
89  }
90 
91  solrCache = new SolrIsFullyIndexedCache();
92  // clear the cache when case opens or closes
93  Case.addEventTypeSubscriber(EnumSet.of(Case.Events.CURRENT_CASE), (PropertyChangeEvent evt) -> {
94  solrCache.clearCache();
95  });
96  }
97 
103  @Override
104  public void setNode(final Node node) {
105  // Clear the viewer.
106  if (node == null) {
107  currentNode = null;
108  resetComponent();
109  return;
110  }
111 
112  /*
113  * This deals with the known bug with an unknown cause where setNode is
114  * sometimes called twice for the same node.
115  */
116  if (node.equals(currentNode)) {
117  return;
118  } else {
119  currentNode = node;
120  }
121 
122  /*
123  * Assemble a collection of all of the indexed text "sources" for the
124  * node.
125  */
126  List<ExtractedText> sources = new ArrayList<>();
127  Lookup nodeLookup = node.getLookup();
128 
133  AdHocQueryResult adHocQueryResult = nodeLookup.lookup(AdHocQueryResult.class);
134  AbstractFile file = nodeLookup.lookup(AbstractFile.class);
135  BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class);
136  Report report = nodeLookup.lookup(Report.class);
137 
138  /*
139  * First, get text with highlighted hits if this node is for a search
140  * result.
141  */
142  ExtractedText highlightedHitText = null;
143  if (adHocQueryResult != null) {
144  /*
145  * The node is an ad hoc search result node.
146  */
147  highlightedHitText = new HighlightedText(adHocQueryResult.getSolrObjectId(), adHocQueryResult.getResults());
148  } else if (artifact != null) {
149  if (artifact.getArtifactTypeID() == TSK_KEYWORD_HIT.getTypeID()) {
150  /*
151  * The node is a keyword hit artifact node.
152  */
153  try {
154  highlightedHitText = new HighlightedText(artifact);
155  } catch (TskCoreException ex) {
156  logger.log(Level.SEVERE, "Failed to create HighlightedText for " + artifact, ex); //NON-NLS
157  }
158  } else if (artifact.getArtifactTypeID() == TSK_ACCOUNT.getTypeID() && file != null) {
159  try {
160  BlackboardAttribute attribute = artifact.getAttribute(TSK_ACCOUNT_TYPE);
161  if (attribute != null && Account.Type.CREDIT_CARD.getTypeName().equals(attribute.getValueString())) {
162  /*
163  * The node is an credit card account node.
164  */
165  highlightedHitText = getAccountsText(file, nodeLookup);
166  }
167  } catch (TskCoreException ex) {
168  logger.log(Level.SEVERE, "Failed to create AccountsText for " + file, ex); //NON-NLS
169  }
170  }
171  }
172  if (highlightedHitText != null) {
173  sources.add(highlightedHitText);
174  }
175 
176  /*
177  * Next, add the "raw" (not highlighted) text, if any, for any file
178  * associated with the node.
179  */
180  ExtractedText rawContentText = null;
181  if (file != null) {
182 
183  // see if Solr has fully indexed this file
184  if (solrHasFullyIndexedContent(file.getId())) {
185  rawContentText = new SolrIndexedText(file, file.getId());
186  sources.add(rawContentText);
187  } else {
188  // Solr does not have fully indexed content.
189  // see if it's a file type for which we can extract text
190  if (ableToExtractTextFromFile(file)) {
191  try {
192  rawContentText = new FileReaderExtractedText(file);
193  sources.add(rawContentText);
195  // do nothing
196  }
197  }
198  }
199  }
200 
201  /*
202  * Add the "raw" (not highlighted) text, if any, for any report
203  * associated with the node.
204  */
205  if (report != null) {
206  // see if Solr has fully indexed this file
207  if (solrHasFullyIndexedContent(report.getId())) {
208  rawContentText = new SolrIndexedText(report, report.getId());
209  sources.add(rawContentText);
210  }
211  }
212 
213  /*
214  * Finally, add the "raw" (not highlighted) text, if any, for any
215  * artifact associated with the node.
216  */
217  ExtractedText rawArtifactText = null;
218  try {
219  rawArtifactText = getRawArtifactText(artifact);
220  if (rawArtifactText != null) {
221  sources.add(rawArtifactText);
222  }
223  } catch (TskCoreException | NoCurrentCaseException ex) {
224  logger.log(Level.SEVERE, "Error creating RawText for " + file, ex); //NON-NLS
225  }
226 
227  // Now set the default source to be displayed.
228  if (highlightedHitText != null) {
229  currentSource = highlightedHitText;
230  } else if (rawArtifactText != null) {
231  currentSource = rawArtifactText;
232  } else {
233  currentSource = rawContentText;
234  }
235 
236  // Push the text sources into the panel.
237  for (ExtractedText source : sources) {
238  int currentPage = source.getCurrentPage();
239  if (currentPage == 0 && source.hasNextPage()) {
240  source.nextPage();
241  }
242  }
243  panel.updateControls(currentSource);
244 
245  String contentName = "";
246  if (file != null) {
247  contentName = file.getName();
248  }
249  setPanel(contentName, sources);
250 
251  }
252 
253  private ExtractedText getRawArtifactText(BlackboardArtifact artifact) throws TskCoreException, NoCurrentCaseException {
254  ExtractedText rawArtifactText = null;
255  if (null != artifact) {
256  /*
257  * For keyword hit artifacts, add the text of the artifact that hit,
258  * not the hit artifact; otherwise add the text for the artifact.
259  */
260  if (artifact.getArtifactTypeID() == TSK_KEYWORD_HIT.getTypeID()
261  || artifact.getArtifactTypeID() == TSK_ACCOUNT.getTypeID()) {
262 
263  BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT_TYPE);
264  if (attribute != null) {
265  long artifactId = attribute.getValueLong();
266  BlackboardArtifact associatedArtifact = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboardArtifact(artifactId);
267  if (solrHasFullyIndexedContent(associatedArtifact.getArtifactID())) {
268  rawArtifactText = new SolrIndexedText(associatedArtifact, associatedArtifact.getArtifactID());
269  }
270  }
271 
272  } else {
273  if (solrHasFullyIndexedContent(artifact.getArtifactID())) {
274  rawArtifactText = new SolrIndexedText(artifact, artifact.getArtifactID());
275  }
276  }
277  }
278  return rawArtifactText;
279  }
280 
281  static private ExtractedText getAccountsText(Content content, Lookup nodeLookup) throws TskCoreException {
282  /*
283  * get all the credit card artifacts
284  */
285  //if the node had artifacts in the lookup use them, other wise look up all credit card artifacts for the content.
286  Collection<? extends BlackboardArtifact> artifacts = nodeLookup.lookupAll(BlackboardArtifact.class);
287  artifacts = (artifacts == null || artifacts.isEmpty())
288  ? content.getArtifacts(TSK_ACCOUNT)
289  : artifacts;
290 
291  return new AccountsText(content.getId(), artifacts);
292  }
293 
294  private void scrollToCurrentHit() {
295  final ExtractedText source = panel.getSelectedSource();
296  if (source == null || !source.isSearchable()) {
297  return;
298  }
299 
300  panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(source.currentItem()));
301  }
302 
303  @Override
304  public String getTitle() {
305  return NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.getTitle");
306  }
307 
308  @Override
309  public String getToolTip() {
310  return NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.toolTip");
311  }
312 
313  @Override
314  public TextViewer createInstance() {
315  return new ExtractedTextViewer();
316  }
317 
318  @Override
319  public synchronized Component getComponent() {
320  if (panel == null) {
321  panel = new ExtractedContentPanel();
322  panel.addPrevMatchControlListener(new PrevFindActionListener());
323  panel.addNextMatchControlListener(new NextFindActionListener());
324  panel.addPrevPageControlListener(new PrevPageActionListener());
325  panel.addNextPageControlListener(new NextPageActionListener());
326  panel.addSourceComboControlListener(new SourceChangeActionListener());
327  }
328  return panel;
329  }
330 
331  @Override
332  public void resetComponent() {
333  panel.resetDisplay();
334  currentNode = null;
335  currentSource = null;
336  panel.updateControls(currentSource);
337  }
338 
339  @Override
340  public boolean isSupported(Node node) {
341  if (node == null) {
342  return false;
343  }
344 
345  /*
346  * If the lookup of the node contains an ad hoc search result object,
347  * then there must be indexed text that produced the hit.
348  */
349  AdHocQueryResult adHocQueryResult = node.getLookup().lookup(AdHocQueryResult.class);
350  if (adHocQueryResult != null) {
351  return true;
352  }
353 
354  /*
355  * If the lookup of the node contains either a keyword hit artifact or a
356  * credit card account artifact from a credit card account numbers
357  * search, then there must be indexed text that produced the hit(s).
358  */
359  BlackboardArtifact artifact = node.getLookup().lookup(BlackboardArtifact.class);
360  if (artifact != null) {
361  final int artifactTypeID = artifact.getArtifactTypeID();
362  if (artifactTypeID == TSK_KEYWORD_HIT.getTypeID()) {
363  return true;
364  } else if (artifactTypeID == TSK_ACCOUNT.getTypeID()) {
365  try {
366  BlackboardAttribute attribute = artifact.getAttribute(TSK_ACCOUNT_TYPE);
367  if (attribute != null && Account.Type.CREDIT_CARD.getTypeName().equals(attribute.getValueString())) {
368  return true;
369  }
370  } catch (TskCoreException ex) {
371  /*
372  * If there was an error checking the account type, fall
373  * back to the check below for the file associated with the
374  * account (if there is one).
375  */
376  logger.log(Level.SEVERE, "Error getting TSK_ACCOUNT_TYPE attribute from artifact " + artifact.getArtifactID(), ex);
377  }
378  }
379  }
380 
381  /*
382  * If the lookup of the node contains a file, check to see if there is
383  * indexed text for the file. Note that there should be a file in the
384  * lookup of all nodes except artifact nodes that are associated with a
385  * data source instead of a file.
386  */
387  AbstractFile file = node.getLookup().lookup(AbstractFile.class);
388  if (file != null) {
389 
390  // see if Solr has fully indexed this file
391  if (solrHasFullyIndexedContent(file.getId())) {
392  return true;
393  }
394 
395  // Solr does not have fully indexed content.
396  // see if it's a file type for which we can extract text
397  if (ableToExtractTextFromFile(file)) {
398  return true;
399  }
400  }
401 
402  /*
403  * If the lookup of the node contains an artifact that is neither a
404  * keyword hit artifact nor a credit card account artifact, and the
405  * artifact is not associated with a file, check to see if there is
406  * indexed text for the artifact.
407  */
408  if (artifact != null) {
409  return solrHasFullyIndexedContent(artifact.getArtifactID());
410  }
411 
412  /*
413  * If the lookup of the node contains no artifacts or file but does
414  * contain a report, check to see if there is indexed text for the
415  * report.
416  */
417  Report report = node.getLookup().lookup(Report.class);
418  if (report != null) {
419  return solrHasFullyIndexedContent(report.getId());
420  }
421 
422  /*
423  * If the lookup of the node contains neither ad hoc search results, nor
424  * artifacts, nor a file, nor a report, there is no indexed text.
425  */
426  return false;
427  }
428 
429  @Override
430  public int isPreferred(Node node) {
431  return 4;
432  }
433 
442  private void setPanel(String contentName, List<ExtractedText> sources) {
443  if (panel != null) {
444  panel.setSources(contentName, sources);
445  }
446  }
447 
460  private boolean solrHasFullyIndexedContent(Long objectId) {
461 
462  // check if we have cached this decision
463  if (solrCache.containsKey(objectId)) {
464  return solrCache.getCombination(objectId);
465  }
466 
467  final Server solrServer = KeywordSearch.getServer();
468  if (solrServer.coreIsOpen() == false) {
469  solrCache.putCombination(objectId, false);
470  return false;
471  }
472 
473  // verify that all of the chunks in the file have been indexed.
474  try {
475  boolean isFullyIndexed = solrServer.queryIsFullyIndexed(objectId);
476  solrCache.putCombination(objectId, isFullyIndexed);
477  return isFullyIndexed;
479  logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
480  solrCache.putCombination(objectId, false);
481  return false;
482  }
483  }
484 
494  private boolean ableToExtractTextFromFile(AbstractFile file) {
495 
496  TskData.TSK_DB_FILES_TYPE_ENUM fileType = file.getType();
497 
498  if (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
499  return false;
500  }
501 
502  if ((fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
503  || fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
504  || (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED))) {
505  return false;
506  }
507 
508  final long size = file.getSize();
509  if (file.isDir() || size == 0) {
510  return false;
511  }
512 
513  String mimeType = fileTypeDetector.getMIMEType(file).trim().toLowerCase();
514 
515  if (KeywordSearchIngestModule.ARCHIVE_MIME_TYPES.contains(mimeType)) {
516  return false;
517  }
518 
519  if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
520  return false;
521  }
522 
523  // Often times there is an exception when trying to initiale a reader,
524  // thus making that specific file "unsupported". The only way to identify
525  // this situation is to initialize the reader.
526  try {
527  FileReaderExtractedText tmp = new FileReaderExtractedText(file);
529  return false;
530  }
531 
532  return true;
533  }
534 
538  private class NextFindActionListener implements ActionListener {
539 
540  @Override
541  public void actionPerformed(ActionEvent e) {
542  ExtractedText source = panel.getSelectedSource();
543  if (source == null) {
544  // reset
545  panel.updateControls(null);
546  return;
547  }
548  final boolean hasNextItem = source.hasNextItem();
549  final boolean hasNextPage = source.hasNextPage();
550  int indexVal;
551  if (hasNextItem || hasNextPage) {
552  if (!hasNextItem) {
553  //flip the page
554  nextPage();
555  indexVal = source.currentItem();
556  } else {
557  indexVal = source.nextItem();
558  }
559 
560  //scroll
561  panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(indexVal));
562 
563  //update display
564  panel.updateCurrentMatchDisplay(source.currentItem());
565  panel.updateTotaMatcheslDisplay(source.getNumberHits());
566 
567  //update controls if needed
568  if (!source.hasNextItem() && !source.hasNextPage()) {
569  panel.enableNextMatchControl(false);
570  }
571  if (source.hasPreviousItem() || source.hasPreviousPage()) {
572  panel.enablePrevMatchControl(true);
573  }
574  }
575  }
576  }
577 
581  private class PrevFindActionListener implements ActionListener {
582 
583  @Override
584  public void actionPerformed(ActionEvent e) {
585  ExtractedText source = panel.getSelectedSource();
586  final boolean hasPreviousItem = source.hasPreviousItem();
587  final boolean hasPreviousPage = source.hasPreviousPage();
588  int indexVal;
589  if (hasPreviousItem || hasPreviousPage) {
590  if (!hasPreviousItem) {
591  //flip the page
592  previousPage();
593  indexVal = source.currentItem();
594  } else {
595  indexVal = source.previousItem();
596  }
597 
598  //scroll
599  panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(indexVal));
600 
601  //update display
602  panel.updateCurrentMatchDisplay(source.currentItem());
603  panel.updateTotaMatcheslDisplay(source.getNumberHits());
604 
605  //update controls if needed
606  if (!source.hasPreviousItem() && !source.hasPreviousPage()) {
607  panel.enablePrevMatchControl(false);
608  }
609  if (source.hasNextItem() || source.hasNextPage()) {
610  panel.enableNextMatchControl(true);
611  }
612  }
613  }
614  }
615 
619  private class SourceChangeActionListener implements ActionListener {
620 
621  @Override
622  public void actionPerformed(ActionEvent e) {
623  currentSource = panel.getSelectedSource();
624 
625  if (currentSource == null) {
626  //TODO might need to reset something
627  return;
628  }
629 
630  panel.updateControls(currentSource);
631  }
632  }
633 
634  private void nextPage() {
635  // we should never have gotten here -- reset
636  if (currentSource == null) {
637  panel.updateControls(null);
638  return;
639  }
640 
641  if (currentSource.hasNextPage()) {
642  currentSource.nextPage();
643 
644  //set new text
645  panel.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
646  panel.refreshCurrentMarkup();
647  panel.setCursor(null);
648 
649  //update display
650  panel.updateCurrentPageDisplay(currentSource.getCurrentPage());
651 
652  //scroll to current selection
653  scrollToCurrentHit();
654 
655  //update controls if needed
656  if (!currentSource.hasNextPage()) {
657  panel.enableNextPageControl(false);
658  }
659  if (currentSource.hasPreviousPage()) {
660  panel.enablePrevPageControl(true);
661  }
662 
663  panel.updateSearchControls(currentSource);
664  }
665  }
666 
667  private void previousPage() {
668  // reset, we should have never gotten here if null
669  if (currentSource == null) {
670  panel.updateControls(null);
671  return;
672  }
673 
674  if (currentSource.hasPreviousPage()) {
675  currentSource.previousPage();
676 
677  //set new text
678  panel.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
679  panel.refreshCurrentMarkup();
680  panel.setCursor(null);
681 
682  //update display
683  panel.updateCurrentPageDisplay(currentSource.getCurrentPage());
684 
685  //scroll to current selection
686  scrollToCurrentHit();
687 
688  //update controls if needed
689  if (!currentSource.hasPreviousPage()) {
690  panel.enablePrevPageControl(false);
691  }
692  if (currentSource.hasNextPage()) {
693  panel.enableNextPageControl(true);
694  }
695 
696  panel.updateSearchControls(currentSource);
697 
698  }
699  }
700 
704  private class NextPageActionListener implements ActionListener {
705 
706  @Override
707  public void actionPerformed(ActionEvent e) {
708  nextPage();
709  }
710  }
711 
715  private class PrevPageActionListener implements ActionListener {
716 
717  @Override
718  public void actionPerformed(ActionEvent e) {
719  previousPage();
720  }
721  }
722 
727  private class SolrIsFullyIndexedCache {
728 
729  private static final int CACHE_SIZE = 10;
730  private final LinkedHashMap<Long, Boolean> cache;
731 
733  this.cache = new LinkedHashMap<Long, Boolean>(CACHE_SIZE, 0.75f, true) {
734  @Override
735  protected boolean removeEldestEntry(Map.Entry<Long, Boolean> eldest) {
736  return size() > CACHE_SIZE;
737  }
738  };
739  }
740 
741  public void putCombination(long key, boolean value) {
742  cache.put(key, value);
743  }
744 
745  public Boolean getCombination(long key) {
746  return cache.get(key);
747  }
748 
749  public void clearCache() {
750  cache.clear();
751  }
752 
753  public boolean containsKey(long key) {
754  return cache.containsKey(key);
755  }
756  }
757 }
boolean queryIsFullyIndexed(long contentID)
Definition: Server.java:1652
void setPanel(String contentName, List< ExtractedText > sources)
static ExtractedText getAccountsText(Content content, Lookup nodeLookup)
ExtractedText getRawArtifactText(BlackboardArtifact artifact)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static void addEventTypeSubscriber(Set< Events > eventTypes, PropertyChangeListener subscriber)
Definition: Case.java:708

Copyright © 2012-2022 Basis Technology. Generated on: Tue Feb 6 2024
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.