Autopsy  3.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
TextExtractor.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.Arrays;
22 import java.util.List;
23 import java.util.Map;
26 
31 interface TextExtractor {
32 
37 
40  };
41 
42  //generally text extractors should ignore archives
43  //and let unpacking modules take case of them
44  static final List<String> ARCHIVE_MIME_TYPES =
45  Arrays.asList(
46  //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
47  "application/x-7z-compressed", //NON-NLS
48  "application/x-ace-compressed", //NON-NLS
49  "application/x-alz-compressed", //NON-NLS
50  "application/x-arj", //NON-NLS
51  "application/vnd.ms-cab-compressed", //NON-NLS
52  "application/x-cfs-compressed", //NON-NLS
53  "application/x-dgc-compressed", //NON-NLS
54  "application/x-apple-diskimage", //NON-NLS
55  "application/x-gca-compressed", //NON-NLS
56  "application/x-dar", //NON-NLS
57  "application/x-lzx", //NON-NLS
58  "application/x-lzh", //NON-NLS
59  "application/x-rar-compressed", //NON-NLS
60  "application/x-stuffit", //NON-NLS
61  "application/x-stuffitx", //NON-NLS
62  "application/x-gtar", //NON-NLS
63  "application/x-archive", //NON-NLS
64  "application/x-executable", //NON-NLS
65  "application/x-gzip", //NON-NLS
66  "application/zip", //NON-NLS
67  "application/x-zoo", //NON-NLS
68  "application/x-cpio", //NON-NLS
69  "application/x-shar", //NON-NLS
70  "application/x-tar", //NON-NLS
71  "application/x-bzip", //NON-NLS
72  "application/x-bzip2", //NON-NLS
73  "application/x-lzip", //NON-NLS
74  "application/x-lzma", //NON-NLS
75  "application/x-lzop", //NON-NLS
76  "application/x-z", //NON-NLS
77  "application/x-compress"); //NON-NLS
78 
84  int getNumChunks();
85 
91  AbstractFile getSourceFile();
92 
100  boolean index(AbstractFile sourceFile) throws Ingester.IngesterException;
101 
109  boolean setScripts(List<SCRIPT> extractScript);
110 
116  List<SCRIPT> getScripts();
117 
124  Map<String, String> getOptions();
125 
131  void setOptions(Map<String, String> options);
132 
140  boolean isContentTypeSpecific();
141 
151  boolean isSupported(AbstractFile file, String detectedFormat);
152 }
EXTRACT_UTF16
extract UTF16 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString() ...
EXTRACT_UTF8
extract UTF8 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString() ...

Copyright © 2012-2015 Basis Technology. Generated on: Mon Oct 19 2015
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.