Autopsy  4.8.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.logging.Level;
25 import org.apache.commons.lang3.math.NumberUtils;
26 import org.apache.solr.client.solrj.SolrServerException;
27 import org.apache.solr.common.SolrInputDocument;
28 import org.openide.util.NbBundle;
35 import org.sleuthkit.datamodel.AbstractFile;
36 import org.sleuthkit.datamodel.BlackboardArtifact;
37 import org.sleuthkit.datamodel.Content;
38 import org.sleuthkit.datamodel.DerivedFile;
39 import org.sleuthkit.datamodel.Directory;
40 import org.sleuthkit.datamodel.File;
41 import org.sleuthkit.datamodel.LayoutFile;
42 import org.sleuthkit.datamodel.LocalDirectory;
43 import org.sleuthkit.datamodel.LocalFile;
44 import org.sleuthkit.datamodel.Report;
45 import org.sleuthkit.datamodel.SlackFile;
46 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
47 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
48 import org.sleuthkit.datamodel.TskCoreException;
49 
53 //JMTODO: Should this class really be a singleton?
54 class Ingester {
55 
56  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
57  private volatile boolean uncommitedIngests = false;
58  private final Server solrServer = KeywordSearch.getServer();
59  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
60  private static Ingester instance;
61  private static final int SINGLE_READ_CHARS = 512;
62 
63  private Ingester() {
64  }
65 
66  public static synchronized Ingester getDefault() {
67  if (instance == null) {
68  instance = new Ingester();
69  }
70  return instance;
71  }
72 
73  //JMTODO: this is probably useless
74  @Override
75  @SuppressWarnings("FinalizeDeclaration")
76  protected void finalize() throws Throwable {
77  super.finalize();
78 
79  // Warn if files might have been left uncommited.
80  if (uncommitedIngests) {
81  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
82  }
83  }
84 
95  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
96  indexChunk("", file.getName().toLowerCase(), getContentFields(file));
97  }
98 
109  void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
110  indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
111  }
112 
121  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
122  return item.accept(SOLR_FIELDS_VISITOR);
123  }
124 
144  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
145  < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
146  final long sourceID = extractor.getID(source);
147  final String sourceName = extractor.getName(source);
148 
149  int numChunks = 0; //unknown until chunking is done
150 
151  if (extractor.isDisabled()) {
152  /*
153  * some Extractors, notable the strings extractor, have options
154  * which can be configured such that no extraction should be done
155  */
156  return true;
157  }
158 
159  Map<String, String> fields = getContentFields(source);
160  //Get a reader for the content of the given source
161  try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
162  Chunker chunker = new Chunker(reader);
163  for (Chunk chunk : chunker) {
164  if (context != null && context.fileIngestIsCancelled()) {
165  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
166  return false;
167  }
168  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
169  fields.put(Server.Schema.ID.toString(), chunkId);
170  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
171  try {
172  //add the chunk text to Solr index
173  indexChunk(chunk.toString(), sourceName, fields);
174  numChunks++;
175  } catch (Ingester.IngesterException ingEx) {
176  extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
177  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
178 
179  throw ingEx; //need to rethrow to signal error and move on
180  }
181  }
182  if (chunker.hasException()) {
183  extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
184  return false;
185  }
186  } catch (Exception ex) {
187  extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
188  return false;
189  } finally {
190  if (context != null && context.fileIngestIsCancelled()) {
191  return false;
192  } else {
193  //after all chunks, index just the meta data, including the numChunks, of the parent file
194  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
195  //reset id field to base document id
196  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
197  //"parent" docs don't have chunk_size
198  fields.remove(Server.Schema.CHUNK_SIZE.toString());
199  indexChunk(null, sourceName, fields);
200  }
201  }
202  return true;
203  }
204 
218  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
219  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
220  //JMTODO: actually if the we couldn't get the image id it is set to -1,
221  // but does this really mean we don't want to index it?
222 
223  //skip the file, image id unknown
224  String msg = NbBundle.getMessage(Ingester.class,
225  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
226  logger.log(Level.SEVERE, msg);
227  throw new IngesterException(msg);
228  }
229 
230  //Make a SolrInputDocument out of the field map
231  SolrInputDocument updateDoc = new SolrInputDocument();
232  for (String key : fields.keySet()) {
233  updateDoc.addField(key, fields.get(key));
234  }
235 
236  try {
237  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
238 
239  //add the content to the SolrInputDocument
240  //JMTODO: can we just add it to the field map before passing that in?
241  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
242 
243  // We also add the content (if present) in lowercase form to facilitate case
244  // insensitive substring/regular expression search.
245  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
246  if (indexSchemaVersion >= 2.1) {
247  updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
248  }
249 
250  TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
251 
252  solrServer.addDocument(updateDoc);
253  HealthMonitor.submitTimingMetric(metric);
254  uncommitedIngests = true;
255 
256  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
257  //JMTODO: does this need to be internationalized?
258  throw new IngesterException(
259  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
260  }
261  }
262 
267  void commit() {
268  try {
269  solrServer.commit();
270  uncommitedIngests = false;
271  } catch (NoOpenCoreException | SolrServerException ex) {
272  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
273 
274  }
275  }
276 
280  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
281 
282  @Override
283  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
284  return new HashMap<>();
285  }
286 
287  @Override
288  public Map<String, String> visit(File f) {
289  return getCommonAndMACTimeFields(f);
290  }
291 
292  @Override
293  public Map<String, String> visit(DerivedFile df) {
294  return getCommonAndMACTimeFields(df);
295  }
296 
297  @Override
298  public Map<String, String> visit(Directory d) {
299  return getCommonAndMACTimeFields(d);
300  }
301 
302  @Override
303  public Map<String, String> visit(LocalDirectory ld) {
304  return getCommonAndMACTimeFields(ld);
305  }
306 
307  @Override
308  public Map<String, String> visit(LayoutFile lf) {
309  // layout files do not have times
310  return getCommonFields(lf);
311  }
312 
313  @Override
314  public Map<String, String> visit(LocalFile lf) {
315  return getCommonAndMACTimeFields(lf);
316  }
317 
318  @Override
319  public Map<String, String> visit(SlackFile f) {
320  return getCommonAndMACTimeFields(f);
321  }
322 
332  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
333  Map<String, String> params = getCommonFields(file);
334  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
335  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
336  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
337  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
338  return params;
339  }
340 
349  private Map<String, String> getCommonFields(AbstractFile file) {
350  Map<String, String> params = new HashMap<>();
351  params.put(Server.Schema.ID.toString(), Long.toString(file.getId()));
352  try {
353  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
354  } catch (TskCoreException ex) {
355  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
356  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
357  }
358  params.put(Server.Schema.FILE_NAME.toString(), file.getName().toLowerCase());
359  return params;
360  }
361 
369  @Override
370  public Map<String, String> visit(BlackboardArtifact artifact) {
371  Map<String, String> params = new HashMap<>();
372  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
373  try {
374  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
375  } catch (TskCoreException ex) {
376  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
377  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
378  }
379  return params;
380  }
381 
389  @Override
390  public Map<String, String> visit(Report report) {
391  Map<String, String> params = new HashMap<>();
392  params.put(Server.Schema.ID.toString(), Long.toString(report.getId()));
393  try {
394  Content dataSource = report.getDataSource();
395  if (null == dataSource) {
396  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
397  } else {
398  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
399  }
400  } catch (TskCoreException ex) {
401  logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
402  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
403  }
404  return params;
405  }
406  }
407 
412  static class IngesterException extends Exception {
413 
414  private static final long serialVersionUID = 1L;
415 
416  IngesterException(String message, Throwable ex) {
417  super(message, ex);
418  }
419 
420  IngesterException(String message) {
421  super(message);
422  }
423  }
424 }
Map< String, String > visit(LocalDirectory ld)
Definition: Ingester.java:303
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:332
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Definition: Ingester.java:349
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:370
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:283

Copyright © 2012-2018 Basis Technology. Generated on: Thu Oct 4 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.