19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.BufferedReader;
 
   22 import java.util.HashMap;
 
   24 import java.util.logging.Level;
 
   25 import org.apache.solr.client.solrj.SolrServerException;
 
   26 import org.apache.solr.common.SolrInputDocument;
 
   27 import org.openide.util.NbBundle;
 
   50     private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 
   51     private volatile boolean uncommitedIngests = 
false;
 
   52     private final Server solrServer = KeywordSearch.getServer();
 
   53     private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = 
new SolrFieldsVisitor();
 
   54     private static Ingester instance;
 
   55     private static final int SINGLE_READ_CHARS = 512;
 
   60     public static synchronized Ingester getDefault() {
 
   61         if (instance == null) {
 
   62             instance = 
new Ingester();
 
   69     @SuppressWarnings(
"FinalizeDeclaration")
 
   70     protected 
void finalize() throws Throwable {
 
   74         if (uncommitedIngests) {
 
   75             logger.warning(
"Ingester was used to add files that it never committed."); 
 
   89     void indexMetaDataOnly(AbstractFile file) 
throws IngesterException {
 
   90         indexChunk(
"", file.getName(), getContentFields(file));
 
  103     void indexMetaDataOnly(BlackboardArtifact artifact) 
throws IngesterException {
 
  104         indexChunk(
"", 
new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
 
  115     private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
 
  116         return item.accept(SOLR_FIELDS_VISITOR);
 
  139     < T extends SleuthkitVisitableItem> 
boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) 
throws Ingester.IngesterException {
 
  140         final long sourceID = extractor.getID(source);
 
  141         final String sourceName = extractor.getName(source);
 
  145         if (extractor.isDisabled()) {
 
  151         Map<String, String> fields = getContentFields(source);
 
  153         try (BufferedReader reader = 
new BufferedReader(extractor.getReader(source));) {
 
  154             Chunker chunker = 
new Chunker(reader);
 
  155             for (Chunk chunk : chunker) {
 
  156                 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
 
  157                 fields.put(Server.Schema.ID.toString(), chunkId);
 
  158                 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
 
  161                     indexChunk(chunk.toString(), sourceName, fields);
 
  163                 } 
catch (Ingester.IngesterException ingEx) {
 
  164                     extractor.logWarning(
"Ingester had a problem with extracted string from file '"  
  165                             + sourceName + 
"' (id: " + sourceID + 
").", ingEx);
 
  170             if (chunker.hasException()) {
 
  171                 extractor.logWarning(
"Error chunking content from " + sourceID + 
": " + sourceName, chunker.getException());
 
  174         } 
catch (Exception ex) {
 
  175             extractor.logWarning(
"Unexpected error, can't read content stream from " + sourceID + 
": " + sourceName, ex);
 
  179             fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
 
  181             fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
 
  183             fields.remove(Server.Schema.CHUNK_SIZE.toString());
 
  184             indexChunk(null, sourceName, fields);
 
  203     private void indexChunk(String chunk, String sourceName, Map<String, String> fields) 
throws IngesterException {
 
  204         if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
 
  210             String msg = NbBundle.getMessage(Ingester.class,
 
  211                     "Ingester.ingest.exception.unknownImgId.msg", sourceName); 
 
  212             logger.log(Level.SEVERE, msg);
 
  213             throw new IngesterException(msg);
 
  217         SolrInputDocument updateDoc = 
new SolrInputDocument();
 
  218         for (String key : fields.keySet()) {
 
  219             updateDoc.addField(key, fields.get(key));
 
  223         updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
 
  227             solrServer.addDocument(updateDoc);
 
  228             uncommitedIngests = 
true;
 
  230         } 
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
 
  232             throw new IngesterException(
 
  233                     NbBundle.getMessage(Ingester.class, 
"Ingester.ingest.exception.err.msg", sourceName), ex);
 
  244             uncommitedIngests = 
false;
 
  245         } 
catch (NoOpenCoreException | SolrServerException ex) {
 
  246             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  258             return new HashMap<>();
 
  319             Map<String, String> params = 
new HashMap<>();
 
  324                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the file " + af.
getId(), ex); 
 
  325                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  340             Map<String, String> params = 
new HashMap<>();
 
  343                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
 
  345                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the artifact " + artifact.
getArtifactID(), ex); 
 
  346                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  356     static class IngesterException 
extends Exception {
 
  358         private static final long serialVersionUID = 1L;
 
  360         IngesterException(String message, Throwable ex) {
 
  364         IngesterException(String message) {
 
Map< String, String > visit(LayoutFile lf)
 
Map< String, String > visit(File f)
 
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
 
Map< String, String > visit(SlackFile f)
 
Map< String, String > visit(Directory d)
 
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
 
Map< String, String > getCommonFields(AbstractFile af)
 
Map< String, String > visit(DerivedFile df)
 
Map< String, String > visit(BlackboardArtifact artifact)
 
Map< String, String > visit(LocalFile lf)
 
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)