api-docs/4.3/_ingester_8java_source.html

 /*

  * Autopsy Forensic Browser

  *

  * Copyright 2011-2017 Basis Technology Corp.

  * Contact: carrier <at> sleuthkit <dot> org

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 package org.sleuthkit.autopsy.keywordsearch;


 import java.io.BufferedReader;

 import java.util.HashMap;

 import java.util.Map;

 import java.util.logging.Level;

 import org.apache.solr.client.solrj.SolrServerException;

 import org.apache.solr.common.SolrInputDocument;

 import org.openide.util.NbBundle;

 import org.sleuthkit.autopsy.coreutils.Logger;

 import org.sleuthkit.autopsy.datamodel.ContentUtils;

 import org.sleuthkit.autopsy.ingest.IngestJobContext;

 import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk;

 import org.sleuthkit.datamodel.AbstractFile;

 import org.sleuthkit.datamodel.BlackboardArtifact;

 import org.sleuthkit.datamodel.DerivedFile;

 import org.sleuthkit.datamodel.Directory;

 import org.sleuthkit.datamodel.File;

 import org.sleuthkit.datamodel.LayoutFile;

 import org.sleuthkit.datamodel.LocalFile;

 import org.sleuthkit.datamodel.SlackFile;

 import org.sleuthkit.datamodel.SleuthkitItemVisitor;

 import org.sleuthkit.datamodel.SleuthkitVisitableItem;

 import org.sleuthkit.datamodel.TskCoreException;


 //JMTODO: Should this class really be a singleton?

 class Ingester {


     private static final Logger logger = Logger.getLogger(Ingester.class.getName());

     private volatile boolean uncommitedIngests = false;

     private final Server solrServer = KeywordSearch.getServer();

     private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();

     private static Ingester instance;

     private static final int SINGLE_READ_CHARS = 512;


     private Ingester() {

     }


     public static synchronized Ingester getDefault() {

         if (instance == null) {

             instance = new Ingester();

         }

         return instance;

     }


     //JMTODO: this is probably useless

     @Override

     @SuppressWarnings("FinalizeDeclaration")

     protected void finalize() throws Throwable {

         super.finalize();


         // Warn if files might have been left uncommited.

         if (uncommitedIngests) {

             logger.warning("Ingester was used to add files that it never committed."); //NON-NLS

         }

     }


     void indexMetaDataOnly(AbstractFile file) throws IngesterException {

         indexChunk("", file.getName(), getContentFields(file));

     }


     void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {

         indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));

     }


     private Map<String, String> getContentFields(SleuthkitVisitableItem item) {

         return item.accept(SOLR_FIELDS_VISITOR);

     }


     < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {

         final long sourceID = extractor.getID(source);

         final String sourceName = extractor.getName(source);


         int numChunks = 0; //unknown until chunking is done


         if (extractor.isDisabled()) {

             /* some Extractors, notable the strings extractor, have options

              * which can be configured such that no extraction should be done */

             return true;

         }


         Map<String, String> fields = getContentFields(source);

         //Get a reader for the content of the given source

         try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {

             Chunker chunker = new Chunker(reader);

             for (Chunk chunk : chunker) {

                 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);

                 fields.put(Server.Schema.ID.toString(), chunkId);

                 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));

                 try {

                     //add the chunk text to Solr index

                     indexChunk(chunk.toString(), sourceName, fields);

                     numChunks++;

                 } catch (Ingester.IngesterException ingEx) {

                     extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS

                             + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS


                     throw ingEx; //need to rethrow to signal error and move on

                 }

             }

             if (chunker.hasException()) {

                 extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());

                 return false;

             }

         } catch (Exception ex) {

             extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS

             return false;

         } finally {

             //after all chunks, index just the meta data, including the  numChunks, of the parent file

             fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));

             //reset id field to base document id

             fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));

             //"parent" docs don't have chunk_size

             fields.remove(Server.Schema.CHUNK_SIZE.toString());

             indexChunk(null, sourceName, fields);

         }


         return true;

     }


     private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {

         if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {

             //JMTODO: actually if the we couldn't get the image id it is set to -1,

             // but does this really mean we don't want to index it?


             //skip the file, image id unknown

             //JMTODO: does this need to ne internationalized?

             String msg = NbBundle.getMessage(Ingester.class,

                     "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?

             logger.log(Level.SEVERE, msg);

             throw new IngesterException(msg);

         }


         //Make a SolrInputDocument out of the field map

         SolrInputDocument updateDoc = new SolrInputDocument();

         for (String key : fields.keySet()) {

             updateDoc.addField(key, fields.get(key));

         }

         //add the content to the SolrInputDocument

         //JMTODO: can we just add it to the field map before passing that in?

         updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);


         try {

             //TODO: consider timeout thread, or vary socket timeout based on size of indexed content

             solrServer.addDocument(updateDoc);

             uncommitedIngests = true;


         } catch (KeywordSearchModuleException | NoOpenCoreException ex) {

             //JMTODO: does this need to be internationalized?

             throw new IngesterException(

                     NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);

         }

     }


     void commit() {

         try {

             solrServer.commit();

             uncommitedIngests = false;

         } catch (NoOpenCoreException | SolrServerException ex) {

             logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS


         }

     }


     static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {


         @Override

         protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {

             return new HashMap<>();

         }


         @Override

         public Map<String, String> visit(File f) {

             return getCommonAndMACTimeFields(f);

         }


         @Override

         public Map<String, String> visit(DerivedFile df) {

             return getCommonAndMACTimeFields(df);

         }


         @Override

         public Map<String, String> visit(Directory d) {

             return getCommonAndMACTimeFields(d);

         }


         @Override

         public Map<String, String> visit(LayoutFile lf) {

             // layout files do not have times

             return getCommonFields(lf);

         }


         @Override

         public Map<String, String> visit(LocalFile lf) {

             return getCommonAndMACTimeFields(lf);

         }


         @Override

         public Map<String, String> visit(SlackFile f) {

             return getCommonAndMACTimeFields(f);

         }


         private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {

             Map<String, String> params = getCommonFields(file);

             params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));

             params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));

             params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));

             params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));

             return params;

         }


         private Map<String, String> getCommonFields(AbstractFile af) {

             Map<String, String> params = new HashMap<>();

             params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));

             try {

                 params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(af.getDataSource().getId()));

             } catch (TskCoreException ex) {

                 logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + af.getId(), ex); //NON-NLS

                 params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));

             }

             params.put(Server.Schema.FILE_NAME.toString(), af.getName());

             return params;

         }


         @Override

         public Map<String, String> visit(BlackboardArtifact artifact) {

             Map<String, String> params = new HashMap<>();

             params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));

             try {

                 params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));

             } catch (TskCoreException ex) {

                 logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS

                 params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));

             }

             return params;

         }

     }


     static class IngesterException extends Exception {


         private static final long serialVersionUID = 1L;


         IngesterException(String message, Throwable ex) {

             super(message, ex);

         }


         IngesterException(String message) {

             super(message);

         }

     }

 }

org::sleuthkit

org::sleuthkit::datamodel::AbstractContent::getName
String getName()

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(LayoutFile lf)
Definition: Ingester.java:277

org::sleuthkit::datamodel::SleuthkitItemVisitor

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(File f)
Definition: Ingester.java:262

org::sleuthkit.autopsy.datamodel.ContentUtils
Definition: ContentUtils.java:53

org

org::sleuthkit::datamodel::AbstractFile::getAtime
long getAtime()

org::sleuthkit.autopsy.datamodel
Definition: AbstractAbstractFileNode.java:19

org::sleuthkit::datamodel::AbstractFile::getCtime
long getCtime()

org::sleuthkit::datamodel::BlackboardArtifact

org::sleuthkit::datamodel::AbstractFile::getDataSource
Content getDataSource()

org::sleuthkit.autopsy.keywordsearch.Server.Schema
Definition: Server.java:81

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor
Definition: Ingester.java:254

org::sleuthkit::datamodel::SleuthkitVisitableItem

org::sleuthkit.autopsy.coreutils
Definition: AutopsyExceptionHandler.java:19

org::sleuthkit::datamodel::Directory

org::sleuthkit.autopsy.coreutils.Logger
Definition: Logger.java:36

org::sleuthkit.autopsy.keywordsearch.Chunker
Definition: Chunker.java:40

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.getCommonAndMACTimeFields
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:301

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(SlackFile f)
Definition: Ingester.java:288

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(Directory d)
Definition: Ingester.java:272

org::sleuthkit.autopsy.datamodel.ContentUtils.getStringTimeISO8601
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Definition: ContentUtils.java:95

org::sleuthkit.autopsy.ingest
Definition: DataSourceIngestCancellationPanel.java:19

org::sleuthkit.autopsy.keywordsearch.Chunker.Chunk
Definition: Chunker.java:322

org::sleuthkit::datamodel::AbstractFile::getCrtime
long getCrtime()

org::sleuthkit::datamodel::DerivedFile

org::sleuthkit::datamodel::TskCoreException

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.getCommonFields
Map< String, String > getCommonFields(AbstractFile af)
Definition: Ingester.java:318

org::sleuthkit::datamodel::Content::getId
long getId()

org::sleuthkit.autopsy.keywordsearch.Server
Definition: Server.java:76

org::sleuthkit::datamodel::File

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(DerivedFile df)
Definition: Ingester.java:267

org::sleuthkit.autopsy.ingest.IngestJobContext
Definition: IngestJobContext.java:29

org::sleuthkit::datamodel::AbstractContent::getId
long getId()

org::sleuthkit::datamodel::AbstractFile

org::sleuthkit::datamodel::AbstractFile::getMtime
long getMtime()

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:339

org::sleuthkit::datamodel::LayoutFile

org::sleuthkit::datamodel

org::sleuthkit::datamodel::BlackboardArtifact::getArtifactID
long getArtifactID()

org::sleuthkit.autopsy

org::sleuthkit.autopsy.keywordsearch
Definition: AccountsText.java:19

org::sleuthkit::datamodel::LocalFile

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.visit
Map< String, String > visit(LocalFile lf)
Definition: Ingester.java:283

org::sleuthkit::datamodel::SlackFile

org::sleuthkit.autopsy.keywordsearch.Ingester.SolrFieldsVisitor.defaultVisit
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:257