Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.logging.Level;
25 import org.apache.solr.client.solrj.SolrServerException;
26 import org.apache.solr.common.SolrInputDocument;
27 import org.openide.util.NbBundle;
43 
47 //JMTODO: Should this class really be a singleton?
48 class Ingester {
49 
50  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
51  private volatile boolean uncommitedIngests = false;
52  private final Server solrServer = KeywordSearch.getServer();
53  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
54  private static Ingester instance;
55  private static final int SINGLE_READ_CHARS = 512;
56 
57  private Ingester() {
58  }
59 
60  public static synchronized Ingester getDefault() {
61  if (instance == null) {
62  instance = new Ingester();
63  }
64  return instance;
65  }
66 
67  //JMTODO: this is probably useless
68  @Override
69  @SuppressWarnings("FinalizeDeclaration")
70  protected void finalize() throws Throwable {
71  super.finalize();
72 
73  // Warn if files might have been left uncommited.
74  if (uncommitedIngests) {
75  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
76  }
77  }
78 
89  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
90  indexChunk("", file.getName(), getContentFields(file));
91  }
92 
103  void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
104  indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
105  }
106 
115  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
116  return item.accept(SOLR_FIELDS_VISITOR);
117  }
118 
139  < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
140  final long sourceID = extractor.getID(source);
141  final String sourceName = extractor.getName(source);
142 
143  int numChunks = 0; //unknown until chunking is done
144 
145  if (extractor.isDisabled()) {
146  /* some Extractors, notable the strings extractor, have options
147  * which can be configured such that no extraction should be done */
148  return true;
149  }
150 
151  Map<String, String> fields = getContentFields(source);
152  //Get a reader for the content of the given source
153  try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
154  Chunker chunker = new Chunker(reader);
155  for (Chunk chunk : chunker) {
156  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
157  fields.put(Server.Schema.ID.toString(), chunkId);
158  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
159  try {
160  //add the chunk text to Solr index
161  indexChunk(chunk.toString(), sourceName, fields);
162  numChunks++;
163  } catch (Ingester.IngesterException ingEx) {
164  extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
165  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
166 
167  throw ingEx; //need to rethrow to signal error and move on
168  }
169  }
170  if (chunker.hasException()) {
171  extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
172  return false;
173  }
174  } catch (Exception ex) {
175  extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
176  return false;
177  } finally {
178  //after all chunks, index just the meta data, including the numChunks, of the parent file
179  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
180  //reset id field to base document id
181  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
182  //"parent" docs don't have chunk_size
183  fields.remove(Server.Schema.CHUNK_SIZE.toString());
184  indexChunk(null, sourceName, fields);
185  }
186 
187  return true;
188  }
189 
203  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
204  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
205  //JMTODO: actually if the we couldn't get the image id it is set to -1,
206  // but does this really mean we don't want to index it?
207 
208  //skip the file, image id unknown
209  //JMTODO: does this need to ne internationalized?
210  String msg = NbBundle.getMessage(Ingester.class,
211  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
212  logger.log(Level.SEVERE, msg);
213  throw new IngesterException(msg);
214  }
215 
216  //Make a SolrInputDocument out of the field map
217  SolrInputDocument updateDoc = new SolrInputDocument();
218  for (String key : fields.keySet()) {
219  updateDoc.addField(key, fields.get(key));
220  }
221  //add the content to the SolrInputDocument
222  //JMTODO: can we just add it to the field map before passing that in?
223  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
224 
225  try {
226  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
227  solrServer.addDocument(updateDoc);
228  uncommitedIngests = true;
229 
230  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
231  //JMTODO: does this need to be internationalized?
232  throw new IngesterException(
233  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
234  }
235  }
236 
241  void commit() {
242  try {
243  solrServer.commit();
244  uncommitedIngests = false;
245  } catch (NoOpenCoreException | SolrServerException ex) {
246  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
247 
248  }
249  }
250 
254  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
255 
256  @Override
257  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
258  return new HashMap<>();
259  }
260 
261  @Override
262  public Map<String, String> visit(File f) {
263  return getCommonAndMACTimeFields(f);
264  }
265 
266  @Override
267  public Map<String, String> visit(DerivedFile df) {
268  return getCommonAndMACTimeFields(df);
269  }
270 
271  @Override
272  public Map<String, String> visit(Directory d) {
273  return getCommonAndMACTimeFields(d);
274  }
275 
276  @Override
277  public Map<String, String> visit(LayoutFile lf) {
278  // layout files do not have times
279  return getCommonFields(lf);
280  }
281 
282  @Override
283  public Map<String, String> visit(LocalFile lf) {
284  return getCommonAndMACTimeFields(lf);
285  }
286 
287  @Override
288  public Map<String, String> visit(SlackFile f) {
289  return getCommonAndMACTimeFields(f);
290  }
291 
301  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
302  Map<String, String> params = getCommonFields(file);
303  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
304  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
305  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
306  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
307  return params;
308  }
309 
318  private Map<String, String> getCommonFields(AbstractFile af) {
319  Map<String, String> params = new HashMap<>();
320  params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
321  try {
322  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(af.getDataSource().getId()));
323  } catch (TskCoreException ex) {
324  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + af.getId(), ex); //NON-NLS
325  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
326  }
327  params.put(Server.Schema.FILE_NAME.toString(), af.getName());
328  return params;
329  }
330 
338  @Override
339  public Map<String, String> visit(BlackboardArtifact artifact) {
340  Map<String, String> params = new HashMap<>();
341  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
342  try {
343  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
344  } catch (TskCoreException ex) {
345  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
346  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
347  }
348  return params;
349  }
350  }
351 
356  static class IngesterException extends Exception {
357 
358  private static final long serialVersionUID = 1L;
359 
360  IngesterException(String message, Throwable ex) {
361  super(message, ex);
362  }
363 
364  IngesterException(String message) {
365  super(message);
366  }
367  }
368 }
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:301
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile af)
Definition: Ingester.java:318
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:339
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:257

Copyright © 2012-2016 Basis Technology. Generated on: Mon Apr 24 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.