19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.BufferedReader;
22 import java.io.Reader;
23 import java.util.HashMap;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.math.NumberUtils;
27 import org.apache.solr.client.solrj.SolrServerException;
28 import org.apache.solr.common.SolrInputDocument;
29 import org.openide.util.NbBundle;
57 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
58 private volatile boolean uncommitedIngests =
false;
59 private final Server solrServer = KeywordSearch.getServer();
60 private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR =
new SolrFieldsVisitor();
61 private static Ingester instance;
66 public static synchronized Ingester getDefault() {
67 if (instance == null) {
68 instance =
new Ingester();
75 @SuppressWarnings(
"FinalizeDeclaration")
76 protected
void finalize() throws Throwable {
80 if (uncommitedIngests) {
81 logger.warning(
"Ingester was used to add files that it never committed.");
95 void indexMetaDataOnly(AbstractFile file)
throws IngesterException {
96 indexChunk(
"", file.getName().toLowerCase(), getContentFields(file));
109 void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName)
throws IngesterException {
110 indexChunk(
"", sourceName, getContentFields(artifact));
121 private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
122 return item.accept(SOLR_FIELDS_VISITOR);
143 < T extends SleuthkitVisitableItem>
boolean indexText(Reader sourceReader,
long sourceID, String sourceName, T source, IngestJobContext context)
throws Ingester.IngesterException {
146 Map<String, String> fields = getContentFields(source);
148 try (BufferedReader reader =
new BufferedReader(sourceReader)) {
149 Chunker chunker =
new Chunker(reader);
150 for (Chunk chunk : chunker) {
151 if (context != null && context.fileIngestIsCancelled()) {
152 logger.log(Level.INFO,
"File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
155 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
156 fields.put(Server.Schema.ID.toString(), chunkId);
157 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
160 indexChunk(chunk.toString(), sourceName, fields);
162 }
catch (Ingester.IngesterException ingEx) {
163 logger.log(Level.WARNING,
"Ingester had a problem with extracted string from file '"
164 + sourceName +
"' (id: " + sourceID +
").", ingEx);
169 if (chunker.hasException()) {
170 logger.log(Level.WARNING,
"Error chunking content from " + sourceID +
": " + sourceName, chunker.getException());
173 }
catch (Exception ex) {
174 logger.log(Level.WARNING,
"Unexpected error, can't read content stream from " + sourceID +
": " + sourceName, ex);
177 if (context != null && context.fileIngestIsCancelled()) {
181 fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
183 fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
185 fields.remove(Server.Schema.CHUNK_SIZE.toString());
186 indexChunk(null, sourceName, fields);
205 private void indexChunk(String chunk, String sourceName, Map<String, String> fields)
throws IngesterException {
206 if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
211 String msg = NbBundle.getMessage(Ingester.class,
212 "Ingester.ingest.exception.unknownImgId.msg", sourceName);
213 logger.log(Level.SEVERE, msg);
214 throw new IngesterException(msg);
218 SolrInputDocument updateDoc =
new SolrInputDocument();
219 for (String key : fields.keySet()) {
220 updateDoc.addField(key, fields.get(key));
228 updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
232 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
233 if (indexSchemaVersion >= 2.1) {
234 updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ?
"" : chunk.toLowerCase()));
237 TimingMetric metric = HealthMonitor.getTimingMetric(
"Solr: Index chunk");
239 solrServer.addDocument(updateDoc);
240 HealthMonitor.submitTimingMetric(metric);
241 uncommitedIngests =
true;
243 }
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
245 throw new IngesterException(
246 NbBundle.getMessage(Ingester.class,
"Ingester.ingest.exception.err.msg", sourceName), ex);
257 uncommitedIngests =
false;
258 }
catch (NoOpenCoreException | SolrServerException ex) {
259 logger.log(Level.WARNING,
"Error commiting index", ex);
267 static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
270 protected Map<String, String>
defaultVisit(SleuthkitVisitableItem svi) {
271 return new HashMap<>();
275 public Map<String, String>
visit(File f) {
280 public Map<String, String>
visit(DerivedFile df) {
285 public Map<String, String>
visit(Directory d) {
290 public Map<String, String>
visit(LocalDirectory ld) {
295 public Map<String, String>
visit(LayoutFile lf) {
301 public Map<String, String>
visit(LocalFile lf) {
306 public Map<String, String>
visit(SlackFile f) {
337 Map<String, String> params =
new HashMap<>();
338 params.put(
Server.
Schema.ID.toString(), Long.toString(file.getId()));
340 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
341 }
catch (TskCoreException ex) {
342 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file " + file.getId(), ex);
343 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
345 params.put(
Server.
Schema.FILE_NAME.toString(), file.getName().toLowerCase());
357 public Map<String, String>
visit(BlackboardArtifact artifact) {
358 Map<String, String> params =
new HashMap<>();
359 params.put(
Server.
Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
361 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
362 }
catch (TskCoreException ex) {
363 logger.log(Level.SEVERE,
"Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex);
364 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
377 public Map<String, String>
visit(Report report) {
378 Map<String, String> params =
new HashMap<>();
379 params.put(
Server.
Schema.ID.toString(), Long.toString(report.getId()));
381 Content dataSource = report.getDataSource();
382 if (null == dataSource) {
383 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
385 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
387 }
catch (TskCoreException ex) {
388 logger.log(Level.SEVERE,
"Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex);
389 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
399 static class IngesterException
extends Exception {
401 private static final long serialVersionUID = 1L;
403 IngesterException(String message, Throwable ex) {
407 IngesterException(String message) {
Map< String, String > visit(Report report)
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > visit(LocalDirectory ld)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)