Autopsy  4.10.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.io.Reader;
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.math.NumberUtils;
27 import org.apache.solr.client.solrj.SolrServerException;
28 import org.apache.solr.common.SolrInputDocument;
29 import org.openide.util.NbBundle;
36 import org.sleuthkit.datamodel.AbstractFile;
37 import org.sleuthkit.datamodel.BlackboardArtifact;
38 import org.sleuthkit.datamodel.Content;
39 import org.sleuthkit.datamodel.DerivedFile;
40 import org.sleuthkit.datamodel.Directory;
41 import org.sleuthkit.datamodel.File;
42 import org.sleuthkit.datamodel.LayoutFile;
43 import org.sleuthkit.datamodel.LocalDirectory;
44 import org.sleuthkit.datamodel.LocalFile;
45 import org.sleuthkit.datamodel.Report;
46 import org.sleuthkit.datamodel.SlackFile;
47 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
48 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
49 import org.sleuthkit.datamodel.TskCoreException;
50 
54 //JMTODO: Should this class really be a singleton?
55 class Ingester {
56 
57  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
58  private volatile boolean uncommitedIngests = false;
59  private final Server solrServer = KeywordSearch.getServer();
60  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
61  private static Ingester instance;
62 
63  private Ingester() {
64  }
65 
66  public static synchronized Ingester getDefault() {
67  if (instance == null) {
68  instance = new Ingester();
69  }
70  return instance;
71  }
72 
73  //JMTODO: this is probably useless
74  @Override
75  @SuppressWarnings("FinalizeDeclaration")
76  protected void finalize() throws Throwable {
77  super.finalize();
78 
79  // Warn if files might have been left uncommited.
80  if (uncommitedIngests) {
81  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
82  }
83  }
84 
95  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
96  indexChunk("", file.getName().toLowerCase(), getContentFields(file));
97  }
98 
109  void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
110  indexChunk("", sourceName, getContentFields(artifact));
111  }
112 
121  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
122  return item.accept(SOLR_FIELDS_VISITOR);
123  }
124 
144  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
145  < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
146  int numChunks = 0; //unknown until chunking is done
147 
148  Map<String, String> fields = getContentFields(source);
149  //Get a reader for the content of the given source
150  try (BufferedReader reader = new BufferedReader(sourceReader)) {
151  Chunker chunker = new Chunker(reader);
152  for (Chunk chunk : chunker) {
153  if (context != null && context.fileIngestIsCancelled()) {
154  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
155  return false;
156  }
157  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
158  fields.put(Server.Schema.ID.toString(), chunkId);
159  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
160  try {
161  //add the chunk text to Solr index
162  indexChunk(chunk.toString(), sourceName, fields);
163  numChunks++;
164  } catch (Ingester.IngesterException ingEx) {
165  logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
166  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
167 
168  throw ingEx; //need to rethrow to signal error and move on
169  }
170  }
171  if (chunker.hasException()) {
172  logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
173  return false;
174  }
175  } catch (Exception ex) {
176  logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
177  return false;
178  } finally {
179  if (context != null && context.fileIngestIsCancelled()) {
180  return false;
181  } else {
182  //after all chunks, index just the meta data, including the numChunks, of the parent file
183  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
184  //reset id field to base document id
185  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
186  //"parent" docs don't have chunk_size
187  fields.remove(Server.Schema.CHUNK_SIZE.toString());
188  indexChunk(null, sourceName, fields);
189  }
190  }
191  return true;
192  }
193 
207  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
208  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
209  //JMTODO: actually if the we couldn't get the image id it is set to -1,
210  // but does this really mean we don't want to index it?
211 
212  //skip the file, image id unknown
213  String msg = NbBundle.getMessage(Ingester.class,
214  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
215  logger.log(Level.SEVERE, msg);
216  throw new IngesterException(msg);
217  }
218 
219  //Make a SolrInputDocument out of the field map
220  SolrInputDocument updateDoc = new SolrInputDocument();
221  for (String key : fields.keySet()) {
222  updateDoc.addField(key, fields.get(key));
223  }
224 
225  try {
226  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
227 
228  //add the content to the SolrInputDocument
229  //JMTODO: can we just add it to the field map before passing that in?
230  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
231 
232  // We also add the content (if present) in lowercase form to facilitate case
233  // insensitive substring/regular expression search.
234  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
235  if (indexSchemaVersion >= 2.1) {
236  updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
237  }
238 
239  TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
240 
241  solrServer.addDocument(updateDoc);
242  HealthMonitor.submitTimingMetric(metric);
243  uncommitedIngests = true;
244 
245  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
246  //JMTODO: does this need to be internationalized?
247  throw new IngesterException(
248  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
249  }
250  }
251 
256  void commit() {
257  try {
258  solrServer.commit();
259  uncommitedIngests = false;
260  } catch (NoOpenCoreException | SolrServerException ex) {
261  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
262 
263  }
264  }
265 
269  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
270 
271  @Override
272  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
273  return new HashMap<>();
274  }
275 
276  @Override
277  public Map<String, String> visit(File f) {
278  return getCommonAndMACTimeFields(f);
279  }
280 
281  @Override
282  public Map<String, String> visit(DerivedFile df) {
283  return getCommonAndMACTimeFields(df);
284  }
285 
286  @Override
287  public Map<String, String> visit(Directory d) {
288  return getCommonAndMACTimeFields(d);
289  }
290 
291  @Override
292  public Map<String, String> visit(LocalDirectory ld) {
293  return getCommonAndMACTimeFields(ld);
294  }
295 
296  @Override
297  public Map<String, String> visit(LayoutFile lf) {
298  // layout files do not have times
299  return getCommonFields(lf);
300  }
301 
302  @Override
303  public Map<String, String> visit(LocalFile lf) {
304  return getCommonAndMACTimeFields(lf);
305  }
306 
307  @Override
308  public Map<String, String> visit(SlackFile f) {
309  return getCommonAndMACTimeFields(f);
310  }
311 
321  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
322  Map<String, String> params = getCommonFields(file);
323  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
324  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
325  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
326  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
327  return params;
328  }
329 
338  private Map<String, String> getCommonFields(AbstractFile file) {
339  Map<String, String> params = new HashMap<>();
340  params.put(Server.Schema.ID.toString(), Long.toString(file.getId()));
341  try {
342  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
343  } catch (TskCoreException ex) {
344  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
345  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
346  }
347  params.put(Server.Schema.FILE_NAME.toString(), file.getName().toLowerCase());
348  return params;
349  }
350 
358  @Override
359  public Map<String, String> visit(BlackboardArtifact artifact) {
360  Map<String, String> params = new HashMap<>();
361  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
362  try {
363  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
364  } catch (TskCoreException ex) {
365  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
366  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
367  }
368  return params;
369  }
370 
378  @Override
379  public Map<String, String> visit(Report report) {
380  Map<String, String> params = new HashMap<>();
381  params.put(Server.Schema.ID.toString(), Long.toString(report.getId()));
382  try {
383  Content dataSource = report.getDataSource();
384  if (null == dataSource) {
385  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
386  } else {
387  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
388  }
389  } catch (TskCoreException ex) {
390  logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
391  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
392  }
393  return params;
394  }
395  }
396 
401  static class IngesterException extends Exception {
402 
403  private static final long serialVersionUID = 1L;
404 
405  IngesterException(String message, Throwable ex) {
406  super(message, ex);
407  }
408 
409  IngesterException(String message) {
410  super(message);
411  }
412  }
413 }
Map< String, String > visit(LocalDirectory ld)
Definition: Ingester.java:292
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:321
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Definition: Ingester.java:338
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:359
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:272

Copyright © 2012-2018 Basis Technology. Generated on: Fri Mar 22 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.