Autopsy  4.19.3
Graphical digital forensics platform for The Sleuth Kit and other tools.
TextExtractorFactory.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2018-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.textextractors;
20 
21 import java.util.Arrays;
22 import java.util.List;
23 import org.openide.util.Lookup;
24 import org.sleuthkit.datamodel.AbstractFile;
25 import org.sleuthkit.datamodel.BlackboardArtifact;
26 import org.sleuthkit.datamodel.Content;
27 import org.sleuthkit.datamodel.Report;
28 
37 public class TextExtractorFactory {
38 
57  public static TextExtractor getExtractor(Content content, Lookup context) throws NoTextExtractorFound {
58  if (content instanceof AbstractFile) {
59  for (TextExtractor extractor : getFileExtractors((AbstractFile) content, context)) {
60  if (extractor.isSupported()) {
61  return extractor;
62  }
63  }
64  } else if (content instanceof BlackboardArtifact) {
65  TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
66  artifactExtractor.setExtractionSettings(context);
67  return artifactExtractor;
68  } else if (content instanceof Report) {
69  TextExtractor reportExtractor = new TikaTextExtractor(content);
70  reportExtractor.setExtractionSettings(context);
71  return reportExtractor;
72  }
73 
74  throw new NoTextExtractorFound(
75  String.format("Could not find a suitable reader for "
76  + "content with name [%s] and id=[%d].",
77  content.getName(), content.getId())
78  );
79  }
80 
90  private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
91  List<TextExtractor> fileExtractors = Arrays.asList(
92  new TextFileExtractor(content),
93  new HtmlTextExtractor(content),
94  new SqliteTextExtractor(content),
95  new TikaTextExtractor(content));
96 
97  fileExtractors.forEach((fileExtractor) -> {
98  fileExtractor.setExtractionSettings(context);
99  });
100 
101  return fileExtractors;
102  }
103 
116  public static TextExtractor getExtractor(Content content) throws NoTextExtractorFound {
117  return TextExtractorFactory.getExtractor(content, null);
118  }
119 
136  public static TextExtractor getStringsExtractor(Content content, Lookup context) {
137  StringsTextExtractor stringsInstance = new StringsTextExtractor(content);
138  stringsInstance.setExtractionSettings(context);
139  return stringsInstance;
140  }
141 
146  public static class NoTextExtractorFound extends Exception {
147 
148  public NoTextExtractorFound(String msg) {
149  super(msg);
150  }
151 
152  public NoTextExtractorFound(Throwable ex) {
153  super(ex);
154  }
155 
156  private NoTextExtractorFound(String msg, Throwable ex) {
157  super(msg, ex);
158  }
159  }
160 }
default void setExtractionSettings(Lookup context)
static TextExtractor getStringsExtractor(Content content, Lookup context)
static TextExtractor getExtractor(Content content, Lookup context)
static List< TextExtractor > getFileExtractors(AbstractFile content, Lookup context)

Copyright © 2012-2022 Basis Technology. Generated on: Sat Sep 24 2022
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.