Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
27 import java.util.Map;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
36 import org.sleuthkit.datamodel.AbstractContent;
37 import org.sleuthkit.datamodel.AbstractFile;
38 import org.sleuthkit.datamodel.Content;
39 import org.sleuthkit.datamodel.ContentVisitor;
40 import org.sleuthkit.datamodel.DerivedFile;
41 import org.sleuthkit.datamodel.Directory;
42 import org.sleuthkit.datamodel.File;
43 import org.sleuthkit.datamodel.LayoutFile;
44 import org.sleuthkit.datamodel.LocalFile;
45 import org.sleuthkit.datamodel.ReadContentInputStream;
46 import org.sleuthkit.datamodel.TskCoreException;
47 
51 class Ingester {
52 
53  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
54  private volatile boolean uncommitedIngests = false;
55  private final Server solrServer = KeywordSearch.getServer();
56  private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
57  private static Ingester instance;
58 
59  //for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
60  //TODO use a streaming way to add content to /update handler
61  private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
62  private static final String ENCODING = "UTF-8"; //NON-NLS
63 
64  private Ingester() {
65  }
66 
67  public static synchronized Ingester getDefault() {
68  if (instance == null) {
69  instance = new Ingester();
70  }
71  return instance;
72  }
73 
74  @Override
75  @SuppressWarnings("FinalizeDeclaration")
76  protected void finalize() throws Throwable {
77  super.finalize();
78 
79  // Warn if files might have been left uncommited.
80  if (uncommitedIngests) {
81  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
82  }
83  }
84 
94  void ingest(AbstractFileStringContentStream afscs) throws IngesterException {
95  Map<String, String> params = getContentFields(afscs.getSourceContent());
96  ingest(afscs, params, afscs.getSourceContent().getSize());
97  }
98 
111  void ingest(TextExtractor fe) throws IngesterException {
112  Map<String, String> params = getContentFields(fe.getSourceFile());
113 
114  params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
115 
116  ingest(new NullContentStream(fe.getSourceFile()), params, 0);
117  }
118 
131  void ingest(AbstractFileChunk fec, ByteContentStream bcs, int size) throws IngesterException {
132  AbstractContent sourceContent = bcs.getSourceContent();
133  Map<String, String> params = getContentFields(sourceContent);
134 
135  //overwrite id with the chunk id
136  params.put(Server.Schema.ID.toString(),
137  Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
138 
139  ingest(bcs, params, size);
140  }
141 
155  void ingest(AbstractFile file, boolean ingestContent) throws IngesterException {
156  if (ingestContent == false || file.isDir()) {
157  ingest(new NullContentStream(file), getContentFields(file), 0);
158  } else {
159  ingest(new FscContentStream(file), getContentFields(file), file.getSize());
160  }
161  }
162 
170  private Map<String, String> getContentFields(AbstractContent fsc) {
171  return fsc.accept(getContentFieldsV);
172  }
173 
177  private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
178 
179  @Override
180  protected Map<String, String> defaultVisit(Content cntnt) {
181  return new HashMap<>();
182  }
183 
184  @Override
185  public Map<String, String> visit(File f) {
186  Map<String, String> params = getCommonFields(f);
187  getCommonFileContentFields(params, f);
188  return params;
189  }
190 
191  @Override
192  public Map<String, String> visit(DerivedFile df) {
193  Map<String, String> params = getCommonFields(df);
194  getCommonFileContentFields(params, df);
195  return params;
196  }
197 
198  @Override
199  public Map<String, String> visit(Directory d) {
200  Map<String, String> params = getCommonFields(d);
201  getCommonFileContentFields(params, d);
202  return params;
203  }
204 
205  @Override
206  public Map<String, String> visit(LayoutFile lf) {
207  // layout files do not have times
208  return getCommonFields(lf);
209  }
210 
211  @Override
212  public Map<String, String> visit(LocalFile lf) {
213  Map<String, String> params = getCommonFields(lf);
214  getCommonFileContentFields(params, lf);
215  return params;
216  }
217 
218  private Map<String, String> getCommonFileContentFields(Map<String, String> params, AbstractFile file) {
219  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
220  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
221  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
222  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
223  return params;
224  }
225 
226  private Map<String, String> getCommonFields(AbstractFile af) {
227  Map<String, String> params = new HashMap<>();
228  params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
229  try {
230  long dataSourceId = af.getDataSource().getId();
231  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
232  } catch (TskCoreException ex) {
233  logger.log(Level.SEVERE, "Could not get data source id to properly index the file {0}", af.getId()); //NON-NLS
234  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
235  }
236 
237  params.put(Server.Schema.FILE_NAME.toString(), af.getName());
238  return params;
239  }
240  }
241 
258  void ingest(ContentStream cs, Map<String, String> fields, final long size) throws IngesterException {
259 
260  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
261  //skip the file, image id unknown
262  String msg = NbBundle.getMessage(this.getClass(),
263  "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
264  logger.log(Level.SEVERE, msg);
265  throw new IngesterException(msg);
266  }
267 
268  final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
269  SolrInputDocument updateDoc = new SolrInputDocument();
270 
271  for (String key : fields.keySet()) {
272  updateDoc.addField(key, fields.get(key));
273  }
274 
275  //using size here, but we are no longer ingesting entire files
276  //size is normally a chunk size, up to 1MB
277  if (size > 0) {
278  // TODO (RC): Use try with resources, adjust exception messages
279  InputStream is = null;
280  int read = 0;
281  try {
282  is = cs.getStream();
283  read = is.read(docChunkContentBuf);
284  } catch (IOException ex) {
285  throw new IngesterException(
286  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.cantReadStream.msg",
287  cs.getName()));
288  } finally {
289  if (null != is) {
290  try {
291  is.close();
292  } catch (IOException ex) {
293  logger.log(Level.WARNING, "Could not close input stream after reading content, " + cs.getName(), ex); //NON-NLS
294  }
295  }
296  }
297 
298  if (read != 0) {
299  String s = "";
300  try {
301  s = new String(docChunkContentBuf, 0, read, ENCODING);
302  // Sanitize by replacing non-UTF-8 characters with caret '^' before adding to index
303  char[] chars = null;
304  for (int i = 0; i < s.length(); i++) {
305  if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
306  // only convert string to char[] if there is a non-UTF8 character
307  if (chars == null) {
308  chars = s.toCharArray();
309  }
310  chars[i] = '^';
311  }
312  }
313  // check if the string was modified (i.e. there was a non-UTF8 character found)
314  if (chars != null) {
315  s = new String(chars);
316  }
317  } catch (UnsupportedEncodingException ex) {
318  logger.log(Level.SEVERE, "Unsupported encoding", ex); //NON-NLS
319  }
320  updateDoc.addField(Server.Schema.CONTENT.toString(), s);
321  } else {
322  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
323  }
324  } else {
325  //no content, such as case when 0th chunk indexed
326  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
327  }
328 
329  try {
330  //TODO consider timeout thread, or vary socket timeout based on size of indexed content
331  solrServer.addDocument(updateDoc);
332  uncommitedIngests = true;
333  } catch (KeywordSearchModuleException ex) {
334  throw new IngesterException(
335  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.err.msg", cs.getName()), ex);
336  }
337 
338  }
339 
347  static int getTimeout(long size) {
348  if (size < 1024 * 1024L) //1MB
349  {
350  return 60;
351  } else if (size < 10 * 1024 * 1024L) //10MB
352  {
353  return 1200;
354  } else if (size < 100 * 1024 * 1024L) //100MB
355  {
356  return 3600;
357  } else {
358  return 3 * 3600;
359  }
360 
361  }
362 
367  void commit() {
368  try {
369  solrServer.commit();
370  uncommitedIngests = false;
371  } catch (NoOpenCoreException | SolrServerException ex) {
372  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
373  }
374  }
375 
379  private static class FscContentStream implements ContentStream {
380 
381  private AbstractFile f;
382 
383  FscContentStream(AbstractFile f) {
384  this.f = f;
385  }
386 
387  @Override
388  public String getName() {
389  return f.getName();
390  }
391 
392  @Override
393  public String getSourceInfo() {
394  return NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getSrcInfo", f.getId());
395  }
396 
397  @Override
398  public String getContentType() {
399  return null;
400  }
401 
402  @Override
403  public Long getSize() {
404  return f.getSize();
405  }
406 
407  @Override
408  public InputStream getStream() throws IOException {
409  return new ReadContentInputStream(f);
410  }
411 
412  @Override
413  public Reader getReader() throws IOException {
414  throw new UnsupportedOperationException(
415  NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getReader"));
416  }
417  }
418 
422  private static class NullContentStream implements ContentStream {
423 
424  AbstractContent aContent;
425 
426  NullContentStream(AbstractContent aContent) {
427  this.aContent = aContent;
428  }
429 
430  @Override
431  public String getName() {
432  return aContent.getName();
433  }
434 
435  @Override
436  public String getSourceInfo() {
437  return NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
438  }
439 
440  @Override
441  public String getContentType() {
442  return null;
443  }
444 
445  @Override
446  public Long getSize() {
447  return 0L;
448  }
449 
450  @Override
451  public InputStream getStream() throws IOException {
452  return new ByteArrayInputStream(new byte[0]);
453  }
454 
455  @Override
456  public Reader getReader() throws IOException {
457  throw new UnsupportedOperationException(
458  NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getReader"));
459  }
460  }
461 
466  static class IngesterException extends Exception {
467 
468  private static final long serialVersionUID = 1L;
469 
470  IngesterException(String message, Throwable ex) {
471  super(message, ex);
472  }
473 
474  IngesterException(String message) {
475  super(message);
476  }
477  }
478 }
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile af)
Definition: Ingester.java:226
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Definition: Ingester.java:218

Copyright © 2012-2016 Basis Technology. Generated on: Tue Oct 25 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.