19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
28 import java.util.Map.Entry;
29 import java.util.concurrent.ExecutorService;
30 import java.util.concurrent.Executors;
31 import java.util.concurrent.Future;
32 import java.util.concurrent.TimeUnit;
33 import java.util.concurrent.TimeoutException;
34 import java.util.logging.Level;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.SolrServerException;
37 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
38 import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
39 import org.apache.solr.common.SolrException;
40 import org.apache.solr.common.SolrException.ErrorCode;
41 import org.apache.solr.common.util.ContentStream;
42 import org.apache.solr.common.SolrInputDocument;
43 import org.openide.util.Exceptions;
44 import org.openide.util.NbBundle;
67 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
68 private volatile boolean uncommitedIngests =
false;
69 private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
70 private final Server solrServer = KeywordSearch.getServer();
71 private final GetContentFieldsV getContentFieldsV =
new GetContentFieldsV();
72 private static Ingester instance;
76 private static final int MAX_DOC_CHUNK_SIZE = 1024*1024;
77 private static final String docContentEncoding =
"UTF-8";
83 public static synchronized Ingester getDefault() {
84 if (instance == null) {
85 instance =
new Ingester();
91 @SuppressWarnings(
"FinalizeDeclaration")
92 protected
void finalize() throws Throwable {
96 if (uncommitedIngests) {
97 logger.warning(
"Ingester was used to add files that it never committed.");
109 void ingest(AbstractFileStringContentStream afscs)
throws IngesterException {
110 Map<String, String> params = getContentFields(afscs.getSourceContent());
111 ingest(afscs, params, afscs.getSourceContent().getSize());
125 void ingest(TextExtractor fe)
throws IngesterException {
126 Map<String, String> params = getContentFields(fe.getSourceFile());
128 params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
130 ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
144 void ingest(AbstractFileChunk fec, ByteContentStream bcs,
int size)
throws IngesterException {
145 AbstractContent sourceContent = bcs.getSourceContent();
146 Map<String, String> params = getContentFields(sourceContent);
149 params.put(Server.Schema.ID.toString(),
150 Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
152 ingest(bcs, params, size);
167 void ingest(AbstractFile file,
boolean ingestContent)
throws IngesterException {
168 if (ingestContent ==
false || file.isDir()) {
169 ingest(
new NullContentStream(file), getContentFields(file), 0);
171 ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
181 private Map<String, String> getContentFields(AbstractContent fsc) {
182 return fsc.accept(getContentFieldsV);
192 return new HashMap<String, String>();
239 Map<String, String> params =
new HashMap<String, String>();
241 long dataSourceId = -1;
244 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
246 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file " + af.
getId());
247 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
271 void ingest(ContentStream cs, Map<String, String> fields,
final long size)
throws IngesterException {
273 if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
275 String msg = NbBundle.getMessage(this.getClass(),
276 "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
277 logger.log(Level.SEVERE, msg);
278 throw new IngesterException(msg);
281 final byte[] docChunkContentBuf =
new byte[MAX_DOC_CHUNK_SIZE];
282 SolrInputDocument updateDoc =
new SolrInputDocument();
284 for (String key : fields.keySet()) {
285 updateDoc.addField(key, fields.get(key));
293 InputStream is = null;
297 read = is.read(docChunkContentBuf);
298 }
catch (IOException ex) {
299 throw new IngesterException(
300 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.cantReadStream.msg",
305 }
catch (IOException ex) {
306 logger.log(Level.WARNING,
"Could not close input stream after reading content, " + cs.getName(), ex);
313 s =
new String(docChunkContentBuf, 0, read, docContentEncoding);
314 }
catch (UnsupportedEncodingException ex) {
315 Exceptions.printStackTrace(ex);
317 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
319 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
324 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
330 solrServer.addDocument(updateDoc);
331 uncommitedIngests =
true;
332 }
catch (KeywordSearchModuleException ex) {
333 throw new IngesterException(
334 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
352 private void ingestExtract(ContentStream cs, Map<String, String> fields,
final long size)
throws IngesterException {
353 final ContentStreamUpdateRequest up =
new ContentStreamUpdateRequest(
"/update/extract");
354 up.addContentStream(cs);
355 setFields(up, fields);
356 up.setAction(AbstractUpdateRequest.ACTION.COMMIT,
true,
true);
358 final String contentType = cs.getContentType();
359 if (contentType != null && !contentType.trim().equals(
"")) {
360 up.setParam(
"stream.contentType", contentType);
364 up.setParam(
"commit",
"false");
366 final Future<?> f = upRequestExecutor.submit(
new UpRequestTask(up));
369 f.get(getTimeout(size), TimeUnit.SECONDS);
370 }
catch (TimeoutException te) {
371 logger.log(Level.WARNING,
"Solr timeout encountered, trying to restart Solr");
374 throw new IngesterException(
375 NbBundle.getMessage(
this.getClass(),
"Ingester.ingestExtract.exception.solrTimeout.msg",
376 fields.get(
"id"), fields.get(
"file_name")));
377 }
catch (Exception e) {
378 throw new IngesterException(
379 NbBundle.getMessage(
this.getClass(),
"Ingester.ingestExtract.exception.probPostToSolr.msg",
380 fields.get(
"id"), fields.get(
"file_name")), e);
382 uncommitedIngests =
true;
388 private void hardSolrRestart() {
390 solrServer.closeCore();
391 }
catch (KeywordSearchModuleException ex) {
392 logger.log(Level.WARNING,
"Cannot close core", ex);
399 }
catch (KeywordSearchModuleException ex) {
400 logger.log(Level.WARNING,
"Cannot start", ex);
401 }
catch (SolrServerNoPortException ex) {
402 logger.log(Level.WARNING,
"Cannot start server with this port", ex);
406 solrServer.openCore();
407 }
catch (KeywordSearchModuleException ex) {
408 logger.log(Level.WARNING,
"Cannot open core", ex);
418 static int getTimeout(
long size) {
419 if (size < 1024 * 1024L)
422 }
else if (size < 10 * 1024 * 1024L)
425 }
else if (size < 100 * 1024 * 1024L)
436 ContentStreamUpdateRequest up;
445 up.setMethod(METHOD.POST);
446 solrServer.request(up);
448 throw new RuntimeException(
449 NbBundle.getMessage(
this.getClass(),
"Ingester.UpReqestTask.run.exception.sorlNotAvail.msg"), ex);
450 }
catch (IllegalStateException ex) {
452 throw new RuntimeException(
453 NbBundle.getMessage(
this.getClass(),
"Ingester.UpRequestTask.run.exception.probReadFile.msg"), ex);
454 }
catch (SolrServerException ex) {
457 throw new RuntimeException(
458 NbBundle.getMessage(
this.getClass(),
"Ingester.UpRequestTask.run.exception.solrProb.msg"), ex);
459 }
catch (SolrException ex) {
461 ErrorCode ec = ErrorCode.getErrorCode(ex.code());
465 if (ec.equals(ErrorCode.SERVER_ERROR)) {
466 throw new RuntimeException(NbBundle.getMessage(
this.getClass(),
467 "Ingester.UpRequestTask.run.exception.probPostToSolr.msg",
486 uncommitedIngests =
false;
487 }
catch (NoOpenCoreException ex) {
488 logger.log(Level.WARNING,
"Error commiting index", ex);
489 }
catch (SolrServerException ex) {
490 logger.log(Level.WARNING,
"Error commiting index", ex);
500 private static void setFields(ContentStreamUpdateRequest up, Map<String, String> fields) {
501 for (Entry<String, String> field : fields.entrySet()) {
502 up.setParam(
"literal." + field.getKey(), field.getValue());
524 return NbBundle.getMessage(this.getClass(),
"Ingester.FscContentStream.getSrcInfo", f.
getId());
544 throw new UnsupportedOperationException(
545 NbBundle.getMessage(
this.getClass(),
"Ingester.FscContentStream.getReader"));
557 this.aContent = aContent;
567 return NbBundle.getMessage(this.getClass(),
"Ingester.NullContentStream.getSrcInfo.text", aContent.
getId());
582 return new ByteArrayInputStream(
new byte[0]);
587 throw new UnsupportedOperationException(
588 NbBundle.getMessage(
this.getClass(),
"Ingester.NullContentStream.getReader"));
596 static class IngesterException
extends Exception {
598 IngesterException(String message, Throwable ex) {
602 IngesterException(String message) {
Map< String, String > visit(Directory d)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)