19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
52 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
53 private volatile boolean uncommitedIngests =
false;
54 private final Server solrServer = KeywordSearch.getServer();
55 private final GetContentFieldsV getContentFieldsV =
new GetContentFieldsV();
56 private static Ingester instance;
60 private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
61 private static final String docContentEncoding =
"UTF-8";
66 public static synchronized Ingester getDefault() {
67 if (instance == null) {
68 instance =
new Ingester();
74 @SuppressWarnings(
"FinalizeDeclaration")
75 protected
void finalize() throws Throwable {
79 if (uncommitedIngests) {
80 logger.warning(
"Ingester was used to add files that it never committed.");
93 void ingest(AbstractFileStringContentStream afscs)
throws IngesterException {
94 Map<String, String> params = getContentFields(afscs.getSourceContent());
95 ingest(afscs, params, afscs.getSourceContent().getSize());
110 void ingest(TextExtractor fe)
throws IngesterException {
111 Map<String, String> params = getContentFields(fe.getSourceFile());
113 params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
115 ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
130 void ingest(AbstractFileChunk fec, ByteContentStream bcs,
int size)
throws IngesterException {
131 AbstractContent sourceContent = bcs.getSourceContent();
132 Map<String, String> params = getContentFields(sourceContent);
135 params.put(Server.Schema.ID.toString(),
136 Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
138 ingest(bcs, params, size);
154 void ingest(AbstractFile file,
boolean ingestContent)
throws IngesterException {
155 if (ingestContent ==
false || file.isDir()) {
156 ingest(
new NullContentStream(file), getContentFields(file), 0);
158 ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
169 private Map<String, String> getContentFields(AbstractContent fsc) {
170 return fsc.accept(getContentFieldsV);
180 return new HashMap<>();
184 public Map<String, String>
visit(File f) {
191 public Map<String, String>
visit(DerivedFile df) {
198 public Map<String, String>
visit(Directory d) {
205 public Map<String, String>
visit(LayoutFile lf) {
211 public Map<String, String>
visit(LocalFile lf) {
226 Map<String, String> params =
new HashMap<>();
227 params.put(
Server.
Schema.ID.toString(), Long.toString(af.getId()));
229 long dataSourceId = af.getDataSource().getId();
230 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
231 }
catch (TskCoreException ex) {
232 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file {0}", af.getId());
233 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
236 params.put(
Server.
Schema.FILE_NAME.toString(), af.getName());
257 void ingest(ContentStream cs, Map<String, String> fields,
final long size)
throws IngesterException {
259 if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
261 String msg = NbBundle.getMessage(this.getClass(),
262 "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
263 logger.log(Level.SEVERE, msg);
264 throw new IngesterException(msg);
267 final byte[] docChunkContentBuf =
new byte[MAX_DOC_CHUNK_SIZE];
268 SolrInputDocument updateDoc =
new SolrInputDocument();
270 for (String key : fields.keySet()) {
271 updateDoc.addField(key, fields.get(key));
278 InputStream is = null;
282 read = is.read(docChunkContentBuf);
283 }
catch (IOException ex) {
284 throw new IngesterException(
285 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.cantReadStream.msg",
291 }
catch (IOException ex) {
292 logger.log(Level.WARNING,
"Could not close input stream after reading content, " + cs.getName(), ex);
300 s =
new String(docChunkContentBuf, 0, read, docContentEncoding);
301 }
catch (UnsupportedEncodingException ex) {
302 logger.log(Level.SEVERE,
"Unsupported encoding", ex);
304 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
306 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
310 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
315 solrServer.addDocument(updateDoc);
316 uncommitedIngests =
true;
317 }
catch (KeywordSearchModuleException ex) {
318 throw new IngesterException(
319 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
331 static int getTimeout(
long size) {
332 if (size < 1024 * 1024L)
335 }
else if (size < 10 * 1024 * 1024L)
338 }
else if (size < 100 * 1024 * 1024L)
354 uncommitedIngests =
false;
355 }
catch (NoOpenCoreException | SolrServerException ex) {
356 logger.log(Level.WARNING,
"Error commiting index", ex);
365 private AbstractFile
f;
378 return NbBundle.getMessage(this.getClass(),
"Ingester.FscContentStream.getSrcInfo", f.getId());
393 return new ReadContentInputStream(f);
398 throw new UnsupportedOperationException(
399 NbBundle.getMessage(
this.getClass(),
"Ingester.FscContentStream.getReader"));
408 AbstractContent aContent;
411 this.aContent = aContent;
416 return aContent.getName();
421 return NbBundle.getMessage(this.getClass(),
"Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
436 return new ByteArrayInputStream(
new byte[0]);
441 throw new UnsupportedOperationException(
442 NbBundle.getMessage(
this.getClass(),
"Ingester.NullContentStream.getReader"));
450 static class IngesterException
extends Exception {
452 private static final long serialVersionUID = 1L;
454 IngesterException(String message, Throwable ex) {
458 IngesterException(String message) {
Map< String, String > visit(Directory d)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)